[otbn,dv] Add loop warping support to UVM environment

This is analogous to the previous commit (which added support to the
Verilator-based environment), but can use uvm_hdl_deposit to force
signals, which allows us to do the forcing in SystemVerilog, rather
than having to grub around in C++.

Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>
diff --git a/hw/ip/otbn/dv/uvm/env/otbn_env_pkg.sv b/hw/ip/otbn/dv/uvm/env/otbn_env_pkg.sv
index 086d072..62546c3 100644
--- a/hw/ip/otbn/dv/uvm/env/otbn_env_pkg.sv
+++ b/hw/ip/otbn/dv/uvm/env/otbn_env_pkg.sv
@@ -10,6 +10,7 @@
   import dv_lib_pkg::*;
   import tl_agent_pkg::*;
   import cip_base_pkg::*;
+  import otbn_model_pkg::*;
   import otbn_model_agent_pkg::*;
   import otbn_memutil_pkg::*;
 
diff --git a/hw/ip/otbn/dv/uvm/env/otbn_loop_if.sv b/hw/ip/otbn/dv/uvm/env/otbn_loop_if.sv
index cae8fb7..14c1af5 100644
--- a/hw/ip/otbn/dv/uvm/env/otbn_loop_if.sv
+++ b/hw/ip/otbn/dv/uvm/env/otbn_loop_if.sv
@@ -24,7 +24,9 @@
 
   input logic [31:0] current_loop_start,
   input logic [31:0] current_loop_end,
-  input logic [31:0] next_loop_end
+  input logic [31:0] next_loop_end,
+
+  input logic [31:0] current_loop_d_iterations
 );
 
   function automatic otbn_env_pkg::stack_fullness_e get_fullness();
@@ -98,4 +100,40 @@
           !((current_loop_start <= insn_addr_i) && (insn_addr_i <= current_loop_end))) ##1
          (insn_addr_i == current_loop_end))
 
+  // Loop length tracking. If we want to convert between the current "iteration count" as stored by
+  // the RTL (which counts down from the initial count to 1) and the iteration count as used in the
+  // ISS or spec, we need to know the total number of iterations for this loop. Of course, the RTL
+  // doesn't store that (since it doesn't need it), so we have to reconstruct it here.
+  logic [31:0] lengths[$];
+  always @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      lengths.delete();
+    end else begin
+      if (current_loop_finish && lengths.size()) begin
+        lengths.pop_front();
+      end
+      if (loop_start_req_i && loop_start_commit_i) begin
+        lengths.push_front(loop_iterations_i);
+      end
+    end
+  end
+
+  // Convert from the RTL-level view of the iteration counter (starting at the number of iterations
+  // and counting down to 1) to the ISA-level view (starting at zero and counting up). If iters is
+  // greater than or equal to the surrounding loop count, returns 0: the index of the first
+  // iteration.
+  function logic [31:0] loop_iters_to_count(logic [31:0] iters);
+    if (!lengths.size()) return 0;
+    return (iters < lengths[0]) ? lengths[0] - iters : 32'd0;
+  endfunction
+
+  // Convert from the ISA-level view (starting at zero and counting up) of the iteration counter to
+  // the RTL-level view (starting at the number of iterations and counting down to 1). If count is
+  // greater than or equal to the surrounding loop count, returns 1: the index of the last
+  // iteration.
+  function logic [31:0] loop_count_to_iters(logic [31:0] count);
+    if (!lengths.size()) return 0;
+    return (count < lengths[0]) ? lengths[0] - count : 32'd1;
+  endfunction
+
 endinterface
diff --git a/hw/ip/otbn/dv/uvm/env/seq_lib/otbn_base_vseq.sv b/hw/ip/otbn/dv/uvm/env/seq_lib/otbn_base_vseq.sv
index 24d7dd8..806024a 100644
--- a/hw/ip/otbn/dv/uvm/env/seq_lib/otbn_base_vseq.sv
+++ b/hw/ip/otbn/dv/uvm/env/seq_lib/otbn_base_vseq.sv
@@ -19,13 +19,15 @@
   protected bit running_ = 1'b0;
 
   // Load the contents of an ELF file into the DUT's memories, either by a DPI backdoor (if backdoor
-  // is true) or with TL transactions.
+  // is true) or with TL transactions. Also, pass loop warp rules to the ISS through the model.
   protected task load_elf(string path, bit backdoor);
     if (backdoor) begin
       load_elf_backdoor(path);
     end else begin
       load_elf_over_bus(path);
     end
+    // Pass loop warp rules that we've just loaded into otbn_memutil into the model.
+    otbn_take_loop_warps(cfg.model_agent_cfg.vif.handle, cfg.mem_util);
   endtask
 
   // Load the contents of an ELF file into the DUT's memories by a DPI backdoor
@@ -107,7 +109,6 @@
   // If the block gets reset, this task will exit early.
   protected task run_otbn();
     int exp_end_addr;
-    uvm_reg_data_t cmd_val;
 
     // Check that we haven't been called re-entrantly. This could happen if there's a bug in the
     // reset sequence, which relies on run_otbn() to exit properly when it sees a device reset.
@@ -116,15 +117,15 @@
     `DV_CHECK_FATAL(!running_)
     running_ = 1'b1;
 
-    // Start OTBN by writing EXECUTE to the CMD register.
-    `uvm_info(`gfn, $sformatf("\n\t ----| Starting OTBN"), UVM_MEDIUM)
-    csr_utils_pkg::csr_wr(ral.cmd, otbn_pkg::CmdExecute);
-
-    // Now wait until OTBN has finished
-    `uvm_info(`gfn, $sformatf("\n\t ----| Waiting for OTBN to finish"), UVM_MEDIUM)
-    csr_utils_pkg::csr_spinwait(.ptr(ral.status), .exp_data(otbn_pkg::StatusIdle));
-
-    `uvm_info(`gfn, $sformatf("\n\t ----| OTBN finished"), UVM_MEDIUM)
+    fork : isolation_fork
+      begin
+        fork
+          _run_otbn();
+          _run_loop_warps();
+        join_any
+        disable fork;
+      end
+    join
 
     // Post-run checks
     //
@@ -143,6 +144,62 @@
     running_ = 1'b0;
    endtask
 
+  // The guts of the run_otbn task. Writes to the CMD register to start OTBN and polls the status
+  // register until completion. On reset, this returns immediately.
+  protected task _run_otbn();
+    // Start OTBN by writing EXECUTE to the CMD register.
+    `uvm_info(`gfn, $sformatf("\n\t ----| Starting OTBN"), UVM_MEDIUM)
+    csr_utils_pkg::csr_wr(ral.cmd, otbn_pkg::CmdExecute);
+
+    // Now wait until OTBN has finished
+    `uvm_info(`gfn, $sformatf("\n\t ----| Waiting for OTBN to finish"), UVM_MEDIUM)
+    csr_utils_pkg::csr_spinwait(.ptr(ral.status), .exp_data(otbn_pkg::StatusIdle));
+
+    `uvm_info(`gfn, $sformatf("\n\t ----| OTBN finished"), UVM_MEDIUM)
+
+  endtask
+
+  // Monitor the bound-in loop controller interface to take action on loop warp events. Runs
+  // forever, but is spawned by run_otbn(), which will kill it when the OTBN run completes or the
+  // block is reset.
+  protected task _run_loop_warps();
+    logic [31:0] addr, old_iters, old_count;
+    bit [31:0]   new_count, new_iters;
+
+    forever begin
+      // Run on the negative edge of the clock: we want to force a "_d" value, so should make sure
+      // we get in after the DUT's logic that runs on posedge.
+      @(negedge cfg.clk_rst_vif.clk);
+
+      // If the loop stack is empty (so we don't have a current loop), there's nothing to do here.
+      if (cfg.loop_vif.get_fullness() == StackEmpty)
+        continue;
+
+      // Get the current address and iteration counter.
+      addr = cfg.loop_vif.insn_addr_i;
+      old_iters = cfg.loop_vif.current_loop_d_iterations;
+
+      // Convert from the "RTL view" of the iteration count (counting down to 1) to the "ISA view"
+      // (counting up from zero).
+      old_count = cfg.loop_vif.loop_iters_to_count(old_iters);
+
+      // Do a DPI call to otbn_memutil to look up whether there is a loop warp that we should be
+      // taking. This returns 1'b1 and fills in new_count if there is a warp that does something.
+      if (!OtbnMemUtilGetLoopWarp(cfg.mem_util, addr, old_count, new_count))
+        continue;
+
+      // Convert this back to the "RTL view"
+      new_iters = cfg.loop_vif.loop_count_to_iters(new_count);
+
+      // Override the _d signal
+      if (uvm_hdl_deposit({"tb.dut.u_otbn_core.u_otbn_controller.",
+                           "u_otbn_loop_controller.current_loop_d.loop_iterations"},
+                          new_iters) != 1) begin
+        `dv_fatal("Failed to override loop_iterations for loop warp.")
+      end
+    end
+  endtask
+
   virtual protected function string pick_elf_path();
     chandle helper;
     int     num_files;
diff --git a/hw/ip/otbn/dv/uvm/otbn_model_agent/otbn_model_if.sv b/hw/ip/otbn/dv/uvm/otbn_model_agent/otbn_model_if.sv
index 2d579cc..2931e40 100644
--- a/hw/ip/otbn/dv/uvm/otbn_model_agent/otbn_model_if.sv
+++ b/hw/ip/otbn/dv/uvm/otbn_model_agent/otbn_model_if.sv
@@ -20,6 +20,7 @@
   bit                       done;         // Operation done
   bit                       err;          // Something went wrong
   bit [31:0]                stop_pc;      // PC at end of operation
+  chandle                   handle;       // Handle for DPI calls to C model
 
   // Wait until done goes high. Stops early on reset
   task automatic wait_done();
diff --git a/hw/ip/otbn/dv/uvm/tb.sv b/hw/ip/otbn/dv/uvm/tb.sv
index 36bad48..ffa8f60 100644
--- a/hw/ip/otbn/dv/uvm/tb.sv
+++ b/hw/ip/otbn/dv/uvm/tb.sv
@@ -136,12 +136,16 @@
       .loop_start_commit_i,
       .loop_iterations_i,
       .otbn_stall_i,
+
       // These addresses are start/end addresses for entries in the loop stack. As with insn_addr_i,
       // we expand them to 32 bits. Also the loop stack entries have a type that's not exposed
       // outside of the loop controller module so we need to extract the fields here.
       .current_loop_start (32'(current_loop_q.loop_start)),
       .current_loop_end   (32'(current_loop_q.loop_end)),
-      .next_loop_end      (32'(next_loop.loop_end))
+      .next_loop_end      (32'(next_loop.loop_end)),
+
+      // This count is used by the loop warping code.
+      .current_loop_d_iterations (current_loop_d.loop_iterations)
     );
 
   bind dut.u_otbn_core.u_otbn_alu_bignum otbn_alu_bignum_if i_otbn_alu_bignum_if (.*);
@@ -191,8 +195,14 @@
     .err_o        (model_if.err)
   );
 
-  // Pull the final PC out of the DUT
+  // Pull the final PC and the OtbnModel handle out of the SV model wrapper.
   assign model_if.stop_pc = u_model.stop_pc_q;
+  // The always_ff is because the spec doesn't allow continuous assignments for chandles. The value
+  // is populated in an init block and we'll only read this when the start signal is asserted, which
+  // will be much more than 1 cycle later, so we shouldn't need to worry about a stale value.
+  always_ff @(posedge model_if.clk_i) begin
+    model_if.handle <= u_model.model_handle;
+  end
 
   otbn_insn_cnt_if insn_cnt_if (
    .clk_i            (clk),