[otbn, rtl] Add prim_buf and prim_flop

These add needed optimization barriers around security hardening
features.

Signed-off-by: Greg Chadwick <gac@lowrisc.org>
diff --git a/hw/ip/otbn/rtl/otbn_controller.sv b/hw/ip/otbn/rtl/otbn_controller.sv
index 50ee1a2..85d01b1 100644
--- a/hw/ip/otbn/rtl/otbn_controller.sv
+++ b/hw/ip/otbn/rtl/otbn_controller.sv
@@ -235,6 +235,13 @@
 
   logic [ExtWLEN-1:0] selection_result;
 
+  logic [1:0] rf_bignum_wr_en_unbuf;
+  logic [4:0] rf_bignum_wr_addr_unbuf;
+  logic [4:0] rf_bignum_rd_addr_a_unbuf;
+  logic       rf_bignum_rd_en_a_unbuf;
+  logic [4:0] rf_bignum_rd_addr_b_unbuf;
+  logic       rf_bignum_rd_en_b_unbuf;
+
   logic rf_bignum_rd_a_indirect_en;
   logic rf_bignum_rd_b_indirect_en;
   logic rf_bignum_wr_indirect_en;
@@ -816,13 +823,47 @@
     assign rf_bignum_rd_data_b_no_intg[i*32+:32] = rf_bignum_rd_data_b_intg_i[i*39+:32];
   end
 
-  assign rf_bignum_rd_addr_a_o = insn_dec_bignum_i.rf_a_indirect ? insn_bignum_rd_addr_a_q :
-                                                                   insn_dec_bignum_i.a;
-  assign rf_bignum_rd_en_a_o = insn_dec_bignum_i.rf_ren_a & insn_valid_i & ~stall;
+  // Bignum RF control signals from the controller aren't actually used, instead the predecoded
+  // one-hot versions are. The predecoded versions get checked against the signals produced here.
+  // Buffer them to ensure they don't get optimised away (with a functionaly correct OTBN they will
+  // always be identical).
+  assign rf_bignum_rd_addr_a_unbuf = insn_dec_bignum_i.rf_a_indirect ? insn_bignum_rd_addr_a_q :
+                                                                       insn_dec_bignum_i.a;
 
-  assign rf_bignum_rd_addr_b_o = insn_dec_bignum_i.rf_b_indirect ? insn_bignum_rd_addr_b_q :
-                                                                   insn_dec_bignum_i.b;
-  assign rf_bignum_rd_en_b_o = insn_dec_bignum_i.rf_ren_b & insn_valid_i & ~stall;
+  prim_buf #(
+    .Width(WdrAw)
+  ) u_rf_bignum_rd_addr_a_buf (
+    .in_i (rf_bignum_rd_addr_a_unbuf),
+    .out_o(rf_bignum_rd_addr_a_o)
+  );
+
+  assign rf_bignum_rd_en_a_unbuf = insn_dec_bignum_i.rf_ren_a & insn_valid_i & ~stall;
+
+  prim_buf #(
+    .Width(1)
+  ) u_rf_bignum_rd_en_a_buf (
+    .in_i (rf_bignum_rd_en_a_unbuf),
+    .out_o(rf_bignum_rd_en_a_o)
+  );
+
+  assign rf_bignum_rd_addr_b_unbuf = insn_dec_bignum_i.rf_b_indirect ? insn_bignum_rd_addr_b_q :
+                                                                       insn_dec_bignum_i.b;
+
+  prim_buf #(
+    .Width(WdrAw)
+  ) u_rf_bignum_rd_addr_b_buf (
+    .in_i (rf_bignum_rd_addr_b_unbuf),
+    .out_o(rf_bignum_rd_addr_b_o)
+  );
+
+  assign rf_bignum_rd_en_b_unbuf = insn_dec_bignum_i.rf_ren_b & insn_valid_i & ~stall;
+
+  prim_buf #(
+    .Width(1)
+  ) u_rf_bignum_rd_en_b_buf (
+    .in_i (rf_bignum_rd_en_b_unbuf),
+    .out_o(rf_bignum_rd_en_b_o)
+  );
 
   assign alu_bignum_operation_o.operand_a = rf_bignum_rd_data_a_no_intg;
 
@@ -872,7 +913,7 @@
 
   always_comb begin
     // By default write nothing
-    rf_bignum_wr_en_o     = 2'b00;
+    rf_bignum_wr_en_unbuf = 2'b00;
 
     // Only write if valid instruction wants a bignum rf write and it isn't stalled. If instruction
     // doesn't execute (e.g. due to an error) the write won't commit.
@@ -880,14 +921,26 @@
       if (insn_dec_bignum_i.mac_en && insn_dec_bignum_i.mac_shift_out) begin
         // Special handling for BN.MULQACC.SO, only enable upper or lower half depending on
         // mac_wr_hw_sel_upper.
-        rf_bignum_wr_en_o = insn_dec_bignum_i.mac_wr_hw_sel_upper ? 2'b10 : 2'b01;
+        rf_bignum_wr_en_unbuf = insn_dec_bignum_i.mac_wr_hw_sel_upper ? 2'b10 : 2'b01;
       end else begin
         // For everything else write both halves immediately.
-        rf_bignum_wr_en_o = 2'b11;
+        rf_bignum_wr_en_unbuf = 2'b11;
       end
     end
   end
 
+  // Bignum RF control signals from the controller aren't actually used, instead the predecoded
+  // one-hot versions are. The predecoded versions get checked against the signals produced here.
+  // Buffer them to ensure they don't get optimised away (with a functionaly correct OTBN they will
+  // always be identical).
+  prim_buf #(
+    .Width(2)
+  ) u_bignum_wr_en_buf (
+    .in_i (rf_bignum_wr_en_unbuf),
+    .out_o(rf_bignum_wr_en_o)
+  );
+
+
   assign rf_bignum_wr_commit_o = |rf_bignum_wr_en_o & insn_executing & !stall;
 
   assign rf_bignum_indirect_en_o    = insn_executing & rf_indirect_stall;
@@ -935,8 +988,19 @@
     end
   end
 
-  assign rf_bignum_wr_addr_o = insn_dec_bignum_i.rf_d_indirect ? insn_bignum_wr_addr_q :
-                                                                 insn_dec_bignum_i.d;
+  // Bignum RF control signals from the controller aren't actually used, instead the predecoded
+  // one-hot versions are. The predecoded versions get checked against the signals produced here.
+  // Buffer them to ensure they don't get optimised away (with a functionaly correct OTBN they will
+  // always be identical).
+  assign rf_bignum_wr_addr_unbuf = insn_dec_bignum_i.rf_d_indirect ? insn_bignum_wr_addr_q :
+                                                                     insn_dec_bignum_i.d;
+
+  prim_buf #(
+    .Width(WdrAw)
+  ) u_rf_bignum_wr_addr_buf (
+    .in_i (rf_bignum_wr_addr_unbuf),
+    .out_o(rf_bignum_wr_addr_o)
+  );
 
   // For the shift-out variant of BN.MULQACC the bottom half of the MAC result is written to one
   // half of a desintation register specified by the instruction (mac_wr_hw_sel_upper). The bottom
diff --git a/hw/ip/otbn/rtl/otbn_instruction_fetch.sv b/hw/ip/otbn/rtl/otbn_instruction_fetch.sv
index c3beb37..d4bcc91 100644
--- a/hw/ip/otbn/rtl/otbn_instruction_fetch.sv
+++ b/hw/ip/otbn/rtl/otbn_instruction_fetch.sv
@@ -70,7 +70,7 @@
   endfunction
 
   logic [ImemAddrWidth-1:0] insn_prefetch_addr;
-  logic [38:0]              insn_fetch_resp_data_intg_q;
+  logic [38:0]              insn_fetch_resp_data_intg_q, insn_fetch_resp_data_intg_d;
   logic [ImemAddrWidth-1:0] insn_fetch_resp_addr_q;
   logic                     insn_fetch_resp_valid_q, insn_fetch_resp_valid_d;
   logic [1:0]               insn_fetch_resp_intg_error_vec;
@@ -81,18 +81,16 @@
 
   rf_predec_bignum_t   rf_predec_bignum_indirect, rf_predec_bignum_sec_wipe;
   rf_predec_bignum_t   rf_predec_bignum_q, rf_predec_bignum_d, rf_predec_bignum_insn;
-  alu_predec_bignum_t  alu_predec_bignum_q, alu_predec_bignum_d;
+  alu_predec_bignum_t  alu_predec_bignum, alu_predec_bignum_q, alu_predec_bignum_d;
   ispr_predec_bignum_t ispr_predec_bignum_q, ispr_predec_bignum_d;
   ispr_predec_bignum_t ispr_predec_bignum;
-  mac_predec_bignum_t  mac_predec_bignum_q, mac_predec_bignum_d;
+  mac_predec_bignum_t  mac_predec_bignum, mac_predec_bignum_q, mac_predec_bignum_d;
   logic                lsu_addr_en_predec_q, lsu_addr_en_predec_d;
   logic                lsu_addr_en_predec_insn;
   logic                insn_addr_err_unbuf;
 
   logic [NWdr-1:0] rf_bignum_wr_sec_wipe_onehot;
 
-  logic rf_predec_bignum_en, other_predec_en;
-
   // The prefetch has failed if a fetch is requested and either no prefetch has done or was done to
   // the wrong address.
   assign insn_prefetch_fail = insn_fetch_req_valid_i &
@@ -121,9 +119,9 @@
     .imem_rvalid_i,
 
     .rf_predec_bignum_o   (rf_predec_bignum_insn),
-    .alu_predec_bignum_o  (alu_predec_bignum_d),
+    .alu_predec_bignum_o  (alu_predec_bignum),
     .ispr_predec_bignum_o (ispr_predec_bignum),
-    .mac_predec_bignum_o  (mac_predec_bignum_d),
+    .mac_predec_bignum_o  (mac_predec_bignum),
     .lsu_addr_en_predec_o (lsu_addr_en_predec_insn)
   );
 
@@ -145,60 +143,103 @@
                                        rf_we    : rf_bignum_wr_indirect_onehot_i};
 
   assign rf_predec_bignum_sec_wipe = '{rf_ren_a : '0,
-                                            rf_ren_b : '0,
-                                            rf_we    : rf_bignum_wr_sec_wipe_onehot};
+                                       rf_ren_b : '0,
+                                       rf_we    : rf_bignum_wr_sec_wipe_onehot};
 
   // Register enables for bignum come from precode unless indirect register accesses are used
   assign rf_predec_bignum_d = sec_wipe_wdr_en_i       ? rf_predec_bignum_sec_wipe :
                               rf_bignum_indirect_en_i ? rf_predec_bignum_indirect :
                               insn_fetch_en           ? rf_predec_bignum_insn     :
-                                                        '0;
+                              insn_fetch_resp_clear_i ? '0                        :
+                                                        rf_predec_bignum_q;
 
-  assign rf_predec_bignum_en =
-    insn_fetch_en | rf_bignum_indirect_en_i | sec_wipe_wdr_en_i | insn_fetch_resp_clear_i;
+  assign ispr_predec_bignum_d = insn_fetch_en           ? ispr_predec_bignum :
+                                insn_fetch_resp_clear_i ? '0                 :
+                                                          ispr_predec_bignum_q;
 
-  assign ispr_predec_bignum_d = insn_fetch_en ? ispr_predec_bignum : '0;
-
-  assign lsu_addr_en_predec_d = lsu_addr_en_predec_insn & insn_fetch_en;
-
-  assign other_predec_en = insn_fetch_en | insn_fetch_resp_clear_i;
+  assign lsu_addr_en_predec_d = insn_fetch_en           ? lsu_addr_en_predec_insn :
+                                insn_fetch_resp_clear_i ? 1'b0:
+                                                          lsu_addr_en_predec_q;
 
   assign insn_fetch_en = imem_rvalid_i & insn_fetch_req_valid_i;
 
+  assign insn_fetch_resp_data_intg_d = insn_fetch_en ? imem_rdata_i :
+                                                       insn_fetch_resp_data_intg_q;
+
+  prim_flop #(
+    .Width(39),
+    .ResetValue('0)
+  ) u_insn_fetch_resp_data_intg_flop (
+    .clk_i,
+    .rst_ni,
+
+    .d_i(insn_fetch_resp_data_intg_d),
+    .q_o(insn_fetch_resp_data_intg_q)
+  );
+
   always_ff @(posedge clk_i) begin
     if (insn_fetch_en) begin
-      insn_fetch_resp_data_intg_q <= imem_rdata_i;
       insn_fetch_resp_addr_q      <= insn_prefetch_addr;
     end
   end
 
-  always_ff @(posedge clk_i or negedge rst_ni) begin
-    if (!rst_ni) begin
-      alu_predec_bignum_q <= '0;
-      mac_predec_bignum_q <= '0;
-    end else if (insn_fetch_en) begin
-      alu_predec_bignum_q <= alu_predec_bignum_d;
-      mac_predec_bignum_q <= mac_predec_bignum_d;
-    end
-  end
+  assign alu_predec_bignum_d = insn_fetch_en ? alu_predec_bignum : alu_predec_bignum_q;
+  assign mac_predec_bignum_d = insn_fetch_en ? mac_predec_bignum : mac_predec_bignum_q;
 
-  always_ff @(posedge clk_i or negedge rst_ni) begin
-    if (!rst_ni) begin
-      rf_predec_bignum_q <= '0;
-    end else if (rf_predec_bignum_en) begin
-      rf_predec_bignum_q <= rf_predec_bignum_d;
-    end
-  end
+  prim_flop #(
+    .Width($bits(alu_predec_bignum_t)),
+    .ResetValue('0)
+  ) u_alu_predec_bignum_flop(
+    .clk_i,
+    .rst_ni,
 
-  always_ff @(posedge clk_i or negedge rst_ni) begin
-    if (!rst_ni) begin
-      ispr_predec_bignum_q <= '0;
-      lsu_addr_en_predec_q <= 1'b0;
-    end else if (other_predec_en) begin
-      ispr_predec_bignum_q <= ispr_predec_bignum_d;
-      lsu_addr_en_predec_q <= lsu_addr_en_predec_d;
-    end
-  end
+    .d_i(alu_predec_bignum_d),
+    .q_o(alu_predec_bignum_q)
+  );
+
+  prim_flop #(
+    .Width($bits(mac_predec_bignum_t)),
+    .ResetValue('0)
+  ) u_mac_predec_bignum_flop (
+    .clk_i,
+    .rst_ni,
+
+    .d_i(mac_predec_bignum_d),
+    .q_o(mac_predec_bignum_q)
+  );
+
+  prim_flop #(
+    .Width($bits(rf_predec_bignum_t)),
+    .ResetValue('0)
+  ) u_rf_predec_bignum_flop (
+    .clk_i,
+    .rst_ni,
+
+    .d_i(rf_predec_bignum_d),
+    .q_o(rf_predec_bignum_q)
+  );
+
+  prim_flop #(
+    .Width($bits(ispr_predec_bignum_t)),
+    .ResetValue('0)
+  ) u_ispr_predec_bignum_flop (
+    .clk_i,
+    .rst_ni,
+
+    .d_i(ispr_predec_bignum_d),
+    .q_o(ispr_predec_bignum_q)
+  );
+
+  prim_flop #(
+    .Width(1),
+    .ResetValue(1'b0)
+  ) u_lsu_addr_en_predec_flop (
+    .clk_i,
+    .rst_ni,
+
+    .d_i(lsu_addr_en_predec_d),
+    .q_o(lsu_addr_en_predec_q)
+  );
 
   always_ff @(posedge clk_i) begin
     if (insn_prefetch) begin
@@ -242,7 +283,6 @@
     .err_o     (insn_fetch_resp_intg_error_vec)
   );
 
-
   assign imem_req_o = insn_prefetch;
 
   assign insn_fetch_resp_valid_o = insn_fetch_resp_valid_q;
diff --git a/hw/ip/otbn/rtl/otbn_loop_controller.sv b/hw/ip/otbn/rtl/otbn_loop_controller.sv
index 85f426b..018c948 100644
--- a/hw/ip/otbn/rtl/otbn_loop_controller.sv
+++ b/hw/ip/otbn/rtl/otbn_loop_controller.sv
@@ -62,7 +62,7 @@
   logic        at_current_loop_end_insn;
   logic        current_loop_finish;
   logic        current_loop_counter_dec;
-  logic [38:0] current_loop_addrs_padded_intg;
+  logic [38:0] current_loop_addrs_padded_intg_unbuf, current_loop_addrs_padded_intg_buf;
   logic [1:0]  current_loop_intg_err;
 
   loop_info_t                  new_loop;
@@ -237,15 +237,22 @@
 
   assign current_loop.loop_iterations = loop_counters[loop_stack_rd_idx];
 
-  assign current_loop_addrs_padded_intg =
+  assign current_loop_addrs_padded_intg_unbuf =
     {current_loop.loop_addr_info.loop_addrs_intg,
      {(32 - (ImemAddrWidth * 2) - 1){1'b0}},
      current_loop.loop_addr_info.loop_start,
      current_loop.loop_addr_info.loop_end};
 
+  prim_buf #(
+    .Width(39)
+  ) u_current_loop_addrs_padded_intg_buf (
+    .in_i (current_loop_addrs_padded_intg_unbuf),
+    .out_o(current_loop_addrs_padded_intg_buf)
+  );
+
   //SEC_CM: LOOP_STACK.ADDR.INTEGRITY
   prim_secded_inv_39_32_dec u_loop_addrs_intg_dec (
-    .data_i     (current_loop_addrs_padded_intg),
+    .data_i     (current_loop_addrs_padded_intg_buf),
     .data_o     (),
     .syndrome_o (),
     .err_o      (current_loop_intg_err)