[otbn,rtl] Shuffle timing of some internal signals

The point of this is to allow us to flop the idle_o signal (through a
prim_mubi4_sender) and the intr_done_o signal (by adding a flop to the
prim_intr_hw instance).

It turns out that we can do this without changing the external timing
at all. The following signals move forward a cycle:

    locked                           (now called locking)
    otbn_dmem_scramble_key_req_busy
    otbn_imem_scramble_key_req_busy
    idle
    done

and we rename the is_not_running signals to have an explicit _d and
_q. Note that these don't quite line up with the is_not_running and
is_not_running_r that we had before: is_not_running_q is the same
timing as is_not_running.

Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>
diff --git a/hw/ip/otbn/rtl/otbn_controller.sv b/hw/ip/otbn/rtl/otbn_controller.sv
index 8389ce2..a3cf232 100644
--- a/hw/ip/otbn/rtl/otbn_controller.sv
+++ b/hw/ip/otbn/rtl/otbn_controller.sv
@@ -21,8 +21,8 @@
   input logic clk_i,
   input logic rst_ni,
 
-  input  logic start_i,  // start the processing at address zero
-  output logic locked_o, // OTBN in locked state and must be reset to perform any further actions
+  input  logic start_i,   // start the processing at address zero
+  output logic locking_o, // Controller is in or is entering the locked state
 
   input prim_mubi_pkg::mubi4_t escalate_en_i,
   output controller_err_bits_t err_bits_o,
@@ -308,7 +308,7 @@
   assign executing = (state_q == OtbnStateRun) ||
                      (state_q == OtbnStateStall);
 
-  assign locked_o = (state_q == OtbnStateLocked) & ~secure_wipe_running_i;
+  assign locking_o = (state_d == OtbnStateLocked) & ~secure_wipe_running_i;
   assign start_secure_wipe_o = executing & (done_complete | err) & ~secure_wipe_running_i;
 
   assign jump_or_branch = (insn_valid_i &
@@ -499,7 +499,7 @@
     if (!rst_ni) begin
       err_bits_q <= '0;
     end else begin
-      if (start_i && !locked_o) begin
+      if (start_i && !locking_o) begin
         err_bits_q <= '0;
       end else begin
         err_bits_q <= err_bits_q | err_bits_d;