[prim_alert*/prim_esc*] Rework placement of size_only bufs/flops

This slightly reworks the placement of size_only bufs and flops.
In particular, all diff input and outputs should now either go through a
size_only flop or a size_only buf.

Signed-off-by: Michael Schaffner <msf@opentitan.org>
diff --git a/hw/ip/prim/prim_alert.core b/hw/ip/prim/prim_alert.core
index 01cfdf3..e1befe3 100644
--- a/hw/ip/prim/prim_alert.core
+++ b/hw/ip/prim/prim_alert.core
@@ -11,6 +11,7 @@
       - lowrisc:prim:assert
       - lowrisc:prim:diff_decode
       - lowrisc:prim:buf
+      - lowrisc:prim:flop
     files:
       - rtl/prim_alert_pkg.sv
       - rtl/prim_alert_receiver.sv
diff --git a/hw/ip/prim/rtl/prim_alert_receiver.sv b/hw/ip/prim/rtl/prim_alert_receiver.sv
index bfc1baf..338387c 100644
--- a/hw/ip/prim/rtl/prim_alert_receiver.sv
+++ b/hw/ip/prim/rtl/prim_alert_receiver.sv
@@ -56,15 +56,25 @@
   /////////////////////////////////
   // decode differential signals //
   /////////////////////////////////
-  logic alert_level, alert_sigint;
+  logic alert_level, alert_sigint, alert_p, alert_n;
+
+  // This prevents further tool optimizations of the differential signal.
+  prim_buf #(
+    .Width(2)
+  ) u_prim_buf (
+    .in_i({alert_tx_i.alert_n,
+           alert_tx_i.alert_p}),
+    .out_o({alert_n,
+            alert_p})
+  );
 
   prim_diff_decode #(
     .AsyncOn(AsyncOn)
-  ) i_decode_alert (
+  ) u_decode_alert (
     .clk_i,
     .rst_ni,
-    .diff_pi  ( alert_tx_i.alert_p ),
-    .diff_ni  ( alert_tx_i.alert_n ),
+    .diff_pi  ( alert_p            ),
+    .diff_ni  ( alert_n            ),
     .level_o  ( alert_level        ),
     .rise_o   (                    ),
     .fall_o   (                    ),
@@ -78,33 +88,43 @@
   typedef enum logic [1:0] {Idle, HsAckWait, Pause0, Pause1} state_e;
   state_e state_d, state_q;
   logic ping_rise;
-  logic ping_tog, ping_tog_dp, ping_tog_qp, ping_tog_dn, ping_tog_qn;
-  logic ack, ack_dp, ack_qp, ack_dn, ack_qn;
+  logic ping_tog_pd, ping_tog_pq, ping_tog_dn, ping_tog_nq;
+  logic ack_pd, ack_pq, ack_dn, ack_nq;
   logic ping_req_d, ping_req_q;
   logic ping_pending_d, ping_pending_q;
 
   // signal ping request upon positive transition on ping_req_i
   // signalling is performed by a level change event on the diff output
   assign ping_req_d  = ping_req_i;
-  assign ping_rise  = ping_req_i && !ping_req_q;
-  assign ping_tog = (ping_rise) ? ~ping_tog_qp : ping_tog_qp;
+  assign ping_rise   = ping_req_i && !ping_req_q;
+  assign ping_tog_pd = (ping_rise) ? ~ping_tog_pq : ping_tog_pq;
+
+  assign ack_dn = ~ack_pd;
+  assign ping_tog_dn = ~ping_tog_pd;
 
   // This prevents further tool optimizations of the differential signal.
-  prim_buf u_prim_buf_ack_p (
-    .in_i(ack),
-    .out_o(ack_dp)
+  prim_generic_flop #(
+    .Width     (2),
+    .ResetValue(2'b10)
+  ) u_prim_generic_flop_ack (
+    .clk_i,
+    .rst_ni,
+    .d_i({ack_dn,
+          ack_pd}),
+    .q_o({ack_nq,
+          ack_pq})
   );
-  prim_buf u_prim_buf_ack_n (
-    .in_i(~ack),
-    .out_o(ack_dn)
-  );
-  prim_buf u_prim_buf_ping_p (
-    .in_i(ping_tog),
-    .out_o(ping_tog_dp)
-  );
-  prim_buf u_prim_buf_ping_n (
-    .in_i(~ping_tog),
-    .out_o(ping_tog_dn)
+
+  prim_generic_flop #(
+    .Width     (2),
+    .ResetValue(2'b10)
+  ) u_prim_generic_flop_ping (
+    .clk_i,
+    .rst_ni,
+    .d_i({ping_tog_dn,
+          ping_tog_pd}),
+    .q_o({ping_tog_nq,
+          ping_tog_pq})
   );
 
   // the ping pending signal is used to in the FSM to distinguish whether the
@@ -115,11 +135,11 @@
   assign ping_pending_d = ping_rise | ((~ping_ok_o) & ping_req_i & ping_pending_q);
 
   // diff pair outputs
-  assign alert_rx_o.ack_p = ack_qp;
-  assign alert_rx_o.ack_n = ack_qn;
+  assign alert_rx_o.ack_p = ack_pq;
+  assign alert_rx_o.ack_n = ack_nq;
 
-  assign alert_rx_o.ping_p = ping_tog_qp;
-  assign alert_rx_o.ping_n = ping_tog_qn;
+  assign alert_rx_o.ping_p = ping_tog_pq;
+  assign alert_rx_o.ping_n = ping_tog_nq;
 
   // this FSM receives the four phase handshakes from the alert receiver
   // note that the latency of the alert_p/n input diff pair is at least one
@@ -128,7 +148,7 @@
   always_comb begin : p_fsm
     // default
     state_d      = state_q;
-    ack          = 1'b0;
+    ack_pd       = 1'b0;
     ping_ok_o    = 1'b0;
     integ_fail_o = 1'b0;
     alert_o      = 1'b0;
@@ -138,7 +158,7 @@
         // wait for handshake to be initiated
         if (alert_level) begin
           state_d = HsAckWait;
-          ack     = 1'b1;
+          ack_pd  = 1'b1;
           // signal either an alert or ping received on the output
           if (ping_pending_q) begin
             ping_ok_o = 1'b1;
@@ -152,7 +172,7 @@
         if (!alert_level) begin
           state_d  = Pause0;
         end else begin
-          ack      = 1'b1;
+          ack_pd = 1'b1;
         end
       end
       // pause cycles between back-to-back handshakes
@@ -164,7 +184,7 @@
     // override in case of sigint
     if (alert_sigint) begin
       state_d      = Idle;
-      ack          = 1'b0;
+      ack_pd       = 1'b0;
       ping_ok_o    = 1'b0;
       integ_fail_o = 1'b1;
       alert_o      = 1'b0;
@@ -174,18 +194,10 @@
   always_ff @(posedge clk_i or negedge rst_ni) begin : p_reg
     if (!rst_ni) begin
       state_q        <= Idle;
-      ack_qp         <= 1'b0;
-      ack_qn         <= 1'b1;
-      ping_tog_qp    <= 1'b0;
-      ping_tog_qn    <= 1'b1;
       ping_req_q     <= 1'b0;
       ping_pending_q <= 1'b0;
     end else begin
       state_q        <= state_d;
-      ack_qp         <= ack_dp;
-      ack_qn         <= ack_dn;
-      ping_tog_qp    <= ping_tog_dp;
-      ping_tog_qn    <= ping_tog_dn;
       ping_req_q     <= ping_req_d;
       ping_pending_q <= ping_pending_d;
     end
diff --git a/hw/ip/prim/rtl/prim_alert_sender.sv b/hw/ip/prim/rtl/prim_alert_sender.sv
index c1b385b..f3d7d5b 100644
--- a/hw/ip/prim/rtl/prim_alert_sender.sv
+++ b/hw/ip/prim/rtl/prim_alert_sender.sv
@@ -69,36 +69,56 @@
   /////////////////////////////////
   // decode differential signals //
   /////////////////////////////////
-  logic ping_sigint, ping_event;
+  logic ping_sigint, ping_event, ping_n, ping_p;
 
-  prim_diff_decode #(
-    .AsyncOn(AsyncOn)
-  ) i_decode_ping (
-    .clk_i,
-    .rst_ni,
-    .diff_pi  ( alert_rx_i.ping_p ),
-    .diff_ni  ( alert_rx_i.ping_n ),
-    .level_o  (                   ),
-    .rise_o   (                   ),
-    .fall_o   (                   ),
-    .event_o  ( ping_event        ),
-    .sigint_o ( ping_sigint       )
+  // This prevents further tool optimizations of the differential signal.
+  prim_buf #(
+    .Width(2)
+  ) u_prim_buf_ping (
+    .in_i({alert_rx_i.ping_n,
+           alert_rx_i.ping_p}),
+    .out_o({ping_n,
+            ping_p})
   );
 
-  logic ack_sigint, ack_level;
+  prim_diff_decode #(
+    .AsyncOn(AsyncOn)
+  ) u_decode_ping (
+    .clk_i,
+    .rst_ni,
+    .diff_pi  ( ping_p      ),
+    .diff_ni  ( ping_n      ),
+    .level_o  (             ),
+    .rise_o   (             ),
+    .fall_o   (             ),
+    .event_o  ( ping_event  ),
+    .sigint_o ( ping_sigint )
+  );
+
+  logic ack_sigint, ack_level, ack_n, ack_p;
+
+  // This prevents further tool optimizations of the differential signal.
+  prim_buf #(
+    .Width(2)
+  ) u_prim_buf_ack (
+    .in_i({alert_rx_i.ack_n,
+           alert_rx_i.ack_p}),
+    .out_o({ack_n,
+            ack_p})
+  );
 
   prim_diff_decode #(
     .AsyncOn(AsyncOn)
-  ) i_decode_ack (
+  ) u_decode_ack (
     .clk_i,
     .rst_ni,
-    .diff_pi  ( alert_rx_i.ack_p ),
-    .diff_ni  ( alert_rx_i.ack_n ),
-    .level_o  ( ack_level        ),
-    .rise_o   (                  ),
-    .fall_o   (                  ),
-    .event_o  (                  ),
-    .sigint_o ( ack_sigint       )
+    .diff_pi  ( ack_p      ),
+    .diff_ni  ( ack_n      ),
+    .level_o  ( ack_level  ),
+    .rise_o   (            ),
+    .fall_o   (            ),
+    .event_o  (            ),
+    .sigint_o ( ack_sigint )
   );
 
 
@@ -116,7 +136,7 @@
     Pause1
     } state_e;
   state_e state_d, state_q;
-  logic alert_p, alert_n, alert_pq, alert_nq, alert_pd, alert_nd;
+  logic alert_pq, alert_nq, alert_pd, alert_nd;
   logic sigint_detected;
 
   assign sigint_detected = ack_sigint | ping_sigint;
@@ -164,8 +184,8 @@
   always_comb begin : p_fsm
     // default
     state_d   = state_q;
-    alert_p   = 1'b0;
-    alert_n   = 1'b1;
+    alert_pd  = 1'b0;
+    alert_nd  = 1'b1;
     ping_clr  = 1'b0;
     alert_clr = 1'b0;
 
@@ -174,8 +194,8 @@
         // alert always takes precedence
         if (alert_trigger || ping_trigger) begin
           state_d = (alert_trigger) ? AlertHsPhase1 : PingHsPhase1;
-          alert_p = 1'b1;
-          alert_n = 1'b0;
+          alert_pd = 1'b1;
+          alert_nd = 1'b0;
         end
       end
       // waiting for ack from receiver
@@ -183,8 +203,8 @@
         if (ack_level) begin
           state_d  = AlertHsPhase2;
         end else begin
-          alert_p  = 1'b1;
-          alert_n  = 1'b0;
+          alert_pd = 1'b1;
+          alert_nd = 1'b0;
         end
       end
       // wait for deassertion of ack
@@ -199,8 +219,8 @@
         if (ack_level) begin
           state_d  = PingHsPhase2;
         end else begin
-          alert_p  = 1'b1;
-          alert_n  = 1'b0;
+          alert_pd = 1'b1;
+          alert_nd = 1'b0;
         end
       end
       // wait for deassertion of ack
@@ -229,8 +249,8 @@
         state_d  = Idle;
         if (sigint_detected) begin
           state_d  = SigInt;
-          alert_p  = ~alert_pq;
-          alert_n  = ~alert_pq;
+          alert_pd = ~alert_pq;
+          alert_nd = ~alert_pq;
         end
       end
       // catch parasitic states
@@ -239,35 +259,32 @@
     // bail out if a signal integrity issue has been detected
     if (sigint_detected && (state_q != SigInt)) begin
       state_d   = SigInt;
-      alert_p   = 1'b0;
-      alert_n   = 1'b0;
+      alert_pd  = 1'b0;
+      alert_nd  = 1'b0;
       ping_clr  = 1'b0;
       alert_clr = 1'b0;
     end
   end
 
   // This prevents further tool optimizations of the differential signal.
-  prim_buf u_prim_buf_p (
-    .in_i(alert_p),
-    .out_o(alert_pd)
-  );
-  prim_buf u_prim_buf_n (
-    .in_i(alert_n),
-    .out_o(alert_nd)
+  prim_generic_flop #(
+    .Width     (2),
+    .ResetValue(2'b10)
+  ) u_prim_generic_flop (
+    .clk_i,
+    .rst_ni,
+    .d_i({alert_nd, alert_pd}),
+    .q_o({alert_nq, alert_pq})
   );
 
   always_ff @(posedge clk_i or negedge rst_ni) begin : p_reg
     if (!rst_ni) begin
       state_q          <= Idle;
-      alert_pq         <= 1'b0;
-      alert_nq         <= 1'b1;
       alert_set_q      <= 1'b0;
       alert_test_set_q <= 1'b0;
       ping_set_q       <= 1'b0;
     end else begin
       state_q          <= state_d;
-      alert_pq         <= alert_pd;
-      alert_nq         <= alert_nd;
       alert_set_q      <= alert_set_d;
       alert_test_set_q <= alert_test_set_d;
       ping_set_q       <= ping_set_d;
diff --git a/hw/ip/prim/rtl/prim_esc_receiver.sv b/hw/ip/prim/rtl/prim_esc_receiver.sv
index 2ffe702..f3590c7 100644
--- a/hw/ip/prim/rtl/prim_esc_receiver.sv
+++ b/hw/ip/prim/rtl/prim_esc_receiver.sv
@@ -35,15 +35,25 @@
   // decode differential signals //
   /////////////////////////////////
 
-  logic esc_level, sigint_detected;
+  logic esc_level, esc_p, esc_n, sigint_detected;
+
+  // This prevents further tool optimizations of the differential signal.
+  prim_buf #(
+    .Width(2)
+  ) u_prim_buf_esc (
+    .in_i({esc_tx_i.esc_n,
+           esc_tx_i.esc_p}),
+    .out_o({esc_n,
+            esc_p})
+  );
 
   prim_diff_decode #(
     .AsyncOn(1'b0)
-  ) i_decode_esc (
+  ) u_decode_esc (
     .clk_i,
     .rst_ni,
-    .diff_pi  ( esc_tx_i.esc_p  ),
-    .diff_ni  ( esc_tx_i.esc_n  ),
+    .diff_pi  ( esc_p           ),
+    .diff_ni  ( esc_n           ),
     .level_o  ( esc_level       ),
     .rise_o   (                 ),
     .fall_o   (                 ),
@@ -57,17 +67,18 @@
 
   typedef enum logic [2:0] {Idle, Check, PingResp, EscResp, SigInt} state_e;
   state_e state_d, state_q;
-  logic resp_p, resp_pd, resp_pq;
-  logic resp_n, resp_nd, resp_nq;
+  logic resp_pd, resp_pq;
+  logic resp_nd, resp_nq;
 
   // This prevents further tool optimizations of the differential signal.
-  prim_buf u_prim_buf_p (
-    .in_i(resp_p),
-    .out_o(resp_pd)
-  );
-  prim_buf u_prim_buf_n (
-    .in_i(resp_n),
-    .out_o(resp_nd)
+  prim_generic_flop #(
+    .Width(2),
+    .ResetValue(2'b10)
+  ) u_prim_generic_flop (
+    .clk_i,
+    .rst_ni,
+    .d_i({resp_nd, resp_pd}),
+    .q_o({resp_nq, resp_pq})
   );
 
   assign esc_rx_o.resp_p = resp_pq;
@@ -76,8 +87,8 @@
   always_comb begin : p_fsm
     // default
     state_d  = state_q;
-    resp_p   = 1'b0;
-    resp_n   = 1'b1;
+    resp_pd  = 1'b0;
+    resp_nd  = 1'b1;
     esc_en_o = 1'b0;
 
     unique case (state_q)
@@ -85,8 +96,8 @@
       Idle: begin
         if (esc_level) begin
           state_d = Check;
-          resp_p  = 1'b1;
-          resp_n  = 1'b0;
+          resp_pd = 1'b1;
+          resp_nd = 1'b0;
         end
       end
       // we decide here whether this is only a ping request or
@@ -102,8 +113,8 @@
       // we got an escalation signal (pings cannot occur back to back)
       PingResp: begin
         state_d = Idle;
-        resp_p  = 1'b1;
-        resp_n  = 1'b0;
+        resp_pd = 1'b1;
+        resp_nd = 1'b0;
         if (esc_level) begin
           state_d  = EscResp;
           esc_en_o = 1'b1;
@@ -115,8 +126,8 @@
         state_d = Idle;
         if (esc_level) begin
           state_d  = EscResp;
-          resp_p   = ~resp_pq;
-          resp_n   = resp_pq;
+          resp_pd  = ~resp_pq;
+          resp_nd  = resp_pq;
           esc_en_o = 1'b1;
         end
       end
@@ -129,8 +140,8 @@
         state_d = Idle;
         if (sigint_detected) begin
           state_d = SigInt;
-          resp_p  = ~resp_pq;
-          resp_n  = ~resp_pq;
+          resp_pd = ~resp_pq;
+          resp_nd = ~resp_pq;
         end
       end
       default : state_d = Idle;
@@ -139,8 +150,8 @@
     // bail out if a signal integrity issue has been detected
     if (sigint_detected && (state_q != SigInt)) begin
       state_d  = SigInt;
-      resp_p   = 1'b0;
-      resp_n   = 1'b0;
+      resp_pd  = 1'b0;
+      resp_nd  = 1'b0;
     end
   end
 
@@ -152,12 +163,8 @@
   always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
     if (!rst_ni) begin
       state_q <= Idle;
-      resp_pq <= 1'b0;
-      resp_nq <= 1'b1;
     end else begin
       state_q <= state_d;
-      resp_pq <= resp_pd;
-      resp_nq <= resp_nd;
     end
   end
 
diff --git a/hw/ip/prim/rtl/prim_esc_sender.sv b/hw/ip/prim/rtl/prim_esc_sender.sv
index a738420..aae622e 100644
--- a/hw/ip/prim/rtl/prim_esc_sender.sv
+++ b/hw/ip/prim/rtl/prim_esc_sender.sv
@@ -43,15 +43,25 @@
   // decode differential signals //
   /////////////////////////////////
 
-  logic resp, sigint_detected;
+  logic resp, resp_n, resp_p, sigint_detected;
+
+  // This prevents further tool optimizations of the differential signal.
+  prim_buf #(
+    .Width(2)
+  ) u_prim_buf_resp (
+    .in_i({esc_rx_i.resp_n,
+           esc_rx_i.resp_p}),
+    .out_o({resp_n,
+            resp_p})
+  );
 
   prim_diff_decode #(
     .AsyncOn(1'b0)
-  ) i_decode_resp (
+  ) u_decode_resp (
     .clk_i,
     .rst_ni,
-    .diff_pi  ( esc_rx_i.resp_p ),
-    .diff_ni  ( esc_rx_i.resp_n ),
+    .diff_pi  ( resp_p          ),
+    .diff_ni  ( resp_n          ),
     .level_o  ( resp            ),
     .rise_o   (                 ),
     .fall_o   (                 ),
@@ -75,13 +85,13 @@
   assign esc_p = esc_req_i | esc_req_q | (ping_req_d & ~ping_req_q);
 
   // This prevents further tool optimizations of the differential signal.
-  prim_buf u_prim_buf_p (
-    .in_i(esc_p),
-    .out_o(esc_tx_o.esc_p)
-  );
-  prim_buf u_prim_buf_n (
-    .in_i(~esc_p),
-    .out_o(esc_tx_o.esc_n)
+  prim_buf #(
+    .Width(2)
+  ) u_prim_buf_esc (
+    .in_i({~esc_p,
+           esc_p}),
+    .out_o({esc_tx_o.esc_n,
+            esc_tx_o.esc_p})
   );
 
   //////////////