[mubi] Enhance mubi_sync with stability check

- Stability check is not needed for most of the design and defaults
  to 0.

- With stability enabled, a 3rd stage is compared to the sychronized
  output.  If the output is the same, the 3rd stage results are used,
  if not, the ResetValue is used.

- This addresses some of the concerns raised with the original mubi_sync
  in #8848.

Signed-off-by: Timothy Chen <timothytim@google.com>
diff --git a/hw/ip/prim/rtl/prim_mubi12_sync.sv b/hw/ip/prim/rtl/prim_mubi12_sync.sv
index fba6c86..453cc20 100644
--- a/hw/ip/prim/rtl/prim_mubi12_sync.sv
+++ b/hw/ip/prim/rtl/prim_mubi12_sync.sv
@@ -23,6 +23,12 @@
   // In special cases where the receiver is in the same clock domain as the sender,
   // this can be set to 0. However, it is recommended to leave this at 1.
   parameter bit AsyncOn = 1,
+  // This controls whether the mubi module institutes stability checks when
+  // AsyncOn is set.  If stability checks are on, a 3rd stage of storage is
+  // added after the synchronizers and the outputs only updated if the 3rd
+  // stage and sychronizer agree.  If they do not agree, the ResetValue is
+  // is output instead.
+  parameter bit StabilityCheck = 0,
   // Reset value for the sync flops
   parameter mubi12_t ResetValue = MuBi12False
 ) (
@@ -36,6 +42,7 @@
 
   logic [MuBi12Width-1:0] mubi;
   if (AsyncOn) begin : gen_flops
+    logic [MuBi12Width-1:0] mubi_sync;
     prim_flop_2sync #(
       .Width(MuBi12Width),
       .ResetValue(MuBi12Width'(ResetValue))
@@ -43,8 +50,60 @@
       .clk_i,
       .rst_ni,
       .d_i(MuBi12Width'(mubi_i)),
-      .q_o(mubi)
+      .q_o(mubi_sync)
     );
+
+    if (StabilityCheck) begin : gen_stable_chks
+      logic [MuBi12Width-1:0] mubi_q;
+      prim_flop #(
+        .Width(MuBi12Width),
+        .ResetValue(MuBi12Width'(ResetValue))
+      ) u_prim_flop_3rd_stage (
+        .clk_i,
+        .rst_ni,
+        .d_i(mubi_sync),
+        .q_o(mubi_q)
+      );
+
+      logic [MuBi12Width-1:0] sig_unstable;
+      prim_xor2 #(
+        .Width(MuBi12Width)
+      ) u_mubi_xor (
+        .in0_i(mubi_sync),
+        .in1_i(mubi_q),
+        .out_o(sig_unstable)
+      );
+
+      logic [MuBi12Width-1:0] reset_value;
+      assign reset_value = ResetValue;
+
+      for (genvar k = 0; k < MuBi12Width; k++) begin : gen_bufs_muxes
+        logic [MuBi12Width-1:0] sig_unstable_buf;
+
+        // each mux gets its own buffered output, this ensures the OR-ing
+        // cannot be defeated in one place.
+        prim_sec_anchor_buf #(
+          .Width(MuBi12Width)
+        ) u_sig_unstable_buf (
+          .in_i(sig_unstable),
+          .out_o(sig_unstable_buf)
+        );
+
+        // if any xor indicates signal is unstable, output the reset
+        // value.
+        prim_clock_mux2 #(
+          .NoFpgaBufG(1'b1)
+        ) u_mux (
+          .clk0_i(mubi_q[k]),
+          .clk1_i(reset_value[k]),
+          .sel_i(|sig_unstable_buf),
+          .clk_o(mubi[k])
+        );
+      end
+
+    end else begin : gen_no_stable_chks
+      assign mubi = mubi_sync;
+    end
   end else begin : gen_no_flops
     logic unused_clk;
     logic unused_rst;
diff --git a/hw/ip/prim/rtl/prim_mubi16_sync.sv b/hw/ip/prim/rtl/prim_mubi16_sync.sv
index cb5fd5a..0b052dd 100644
--- a/hw/ip/prim/rtl/prim_mubi16_sync.sv
+++ b/hw/ip/prim/rtl/prim_mubi16_sync.sv
@@ -23,6 +23,12 @@
   // In special cases where the receiver is in the same clock domain as the sender,
   // this can be set to 0. However, it is recommended to leave this at 1.
   parameter bit AsyncOn = 1,
+  // This controls whether the mubi module institutes stability checks when
+  // AsyncOn is set.  If stability checks are on, a 3rd stage of storage is
+  // added after the synchronizers and the outputs only updated if the 3rd
+  // stage and sychronizer agree.  If they do not agree, the ResetValue is
+  // is output instead.
+  parameter bit StabilityCheck = 0,
   // Reset value for the sync flops
   parameter mubi16_t ResetValue = MuBi16False
 ) (
@@ -36,6 +42,7 @@
 
   logic [MuBi16Width-1:0] mubi;
   if (AsyncOn) begin : gen_flops
+    logic [MuBi16Width-1:0] mubi_sync;
     prim_flop_2sync #(
       .Width(MuBi16Width),
       .ResetValue(MuBi16Width'(ResetValue))
@@ -43,8 +50,60 @@
       .clk_i,
       .rst_ni,
       .d_i(MuBi16Width'(mubi_i)),
-      .q_o(mubi)
+      .q_o(mubi_sync)
     );
+
+    if (StabilityCheck) begin : gen_stable_chks
+      logic [MuBi16Width-1:0] mubi_q;
+      prim_flop #(
+        .Width(MuBi16Width),
+        .ResetValue(MuBi16Width'(ResetValue))
+      ) u_prim_flop_3rd_stage (
+        .clk_i,
+        .rst_ni,
+        .d_i(mubi_sync),
+        .q_o(mubi_q)
+      );
+
+      logic [MuBi16Width-1:0] sig_unstable;
+      prim_xor2 #(
+        .Width(MuBi16Width)
+      ) u_mubi_xor (
+        .in0_i(mubi_sync),
+        .in1_i(mubi_q),
+        .out_o(sig_unstable)
+      );
+
+      logic [MuBi16Width-1:0] reset_value;
+      assign reset_value = ResetValue;
+
+      for (genvar k = 0; k < MuBi16Width; k++) begin : gen_bufs_muxes
+        logic [MuBi16Width-1:0] sig_unstable_buf;
+
+        // each mux gets its own buffered output, this ensures the OR-ing
+        // cannot be defeated in one place.
+        prim_sec_anchor_buf #(
+          .Width(MuBi16Width)
+        ) u_sig_unstable_buf (
+          .in_i(sig_unstable),
+          .out_o(sig_unstable_buf)
+        );
+
+        // if any xor indicates signal is unstable, output the reset
+        // value.
+        prim_clock_mux2 #(
+          .NoFpgaBufG(1'b1)
+        ) u_mux (
+          .clk0_i(mubi_q[k]),
+          .clk1_i(reset_value[k]),
+          .sel_i(|sig_unstable_buf),
+          .clk_o(mubi[k])
+        );
+      end
+
+    end else begin : gen_no_stable_chks
+      assign mubi = mubi_sync;
+    end
   end else begin : gen_no_flops
     logic unused_clk;
     logic unused_rst;
diff --git a/hw/ip/prim/rtl/prim_mubi4_sync.sv b/hw/ip/prim/rtl/prim_mubi4_sync.sv
index fa56ad6..b328cd1 100644
--- a/hw/ip/prim/rtl/prim_mubi4_sync.sv
+++ b/hw/ip/prim/rtl/prim_mubi4_sync.sv
@@ -23,6 +23,12 @@
   // In special cases where the receiver is in the same clock domain as the sender,
   // this can be set to 0. However, it is recommended to leave this at 1.
   parameter bit AsyncOn = 1,
+  // This controls whether the mubi module institutes stability checks when
+  // AsyncOn is set.  If stability checks are on, a 3rd stage of storage is
+  // added after the synchronizers and the outputs only updated if the 3rd
+  // stage and sychronizer agree.  If they do not agree, the ResetValue is
+  // is output instead.
+  parameter bit StabilityCheck = 0,
   // Reset value for the sync flops
   parameter mubi4_t ResetValue = MuBi4False
 ) (
@@ -36,6 +42,7 @@
 
   logic [MuBi4Width-1:0] mubi;
   if (AsyncOn) begin : gen_flops
+    logic [MuBi4Width-1:0] mubi_sync;
     prim_flop_2sync #(
       .Width(MuBi4Width),
       .ResetValue(MuBi4Width'(ResetValue))
@@ -43,8 +50,60 @@
       .clk_i,
       .rst_ni,
       .d_i(MuBi4Width'(mubi_i)),
-      .q_o(mubi)
+      .q_o(mubi_sync)
     );
+
+    if (StabilityCheck) begin : gen_stable_chks
+      logic [MuBi4Width-1:0] mubi_q;
+      prim_flop #(
+        .Width(MuBi4Width),
+        .ResetValue(MuBi4Width'(ResetValue))
+      ) u_prim_flop_3rd_stage (
+        .clk_i,
+        .rst_ni,
+        .d_i(mubi_sync),
+        .q_o(mubi_q)
+      );
+
+      logic [MuBi4Width-1:0] sig_unstable;
+      prim_xor2 #(
+        .Width(MuBi4Width)
+      ) u_mubi_xor (
+        .in0_i(mubi_sync),
+        .in1_i(mubi_q),
+        .out_o(sig_unstable)
+      );
+
+      logic [MuBi4Width-1:0] reset_value;
+      assign reset_value = ResetValue;
+
+      for (genvar k = 0; k < MuBi4Width; k++) begin : gen_bufs_muxes
+        logic [MuBi4Width-1:0] sig_unstable_buf;
+
+        // each mux gets its own buffered output, this ensures the OR-ing
+        // cannot be defeated in one place.
+        prim_sec_anchor_buf #(
+          .Width(MuBi4Width)
+        ) u_sig_unstable_buf (
+          .in_i(sig_unstable),
+          .out_o(sig_unstable_buf)
+        );
+
+        // if any xor indicates signal is unstable, output the reset
+        // value.
+        prim_clock_mux2 #(
+          .NoFpgaBufG(1'b1)
+        ) u_mux (
+          .clk0_i(mubi_q[k]),
+          .clk1_i(reset_value[k]),
+          .sel_i(|sig_unstable_buf),
+          .clk_o(mubi[k])
+        );
+      end
+
+    end else begin : gen_no_stable_chks
+      assign mubi = mubi_sync;
+    end
   end else begin : gen_no_flops
     logic unused_clk;
     logic unused_rst;
diff --git a/hw/ip/prim/rtl/prim_mubi8_sync.sv b/hw/ip/prim/rtl/prim_mubi8_sync.sv
index 533e20f..6ddfea1 100644
--- a/hw/ip/prim/rtl/prim_mubi8_sync.sv
+++ b/hw/ip/prim/rtl/prim_mubi8_sync.sv
@@ -23,6 +23,12 @@
   // In special cases where the receiver is in the same clock domain as the sender,
   // this can be set to 0. However, it is recommended to leave this at 1.
   parameter bit AsyncOn = 1,
+  // This controls whether the mubi module institutes stability checks when
+  // AsyncOn is set.  If stability checks are on, a 3rd stage of storage is
+  // added after the synchronizers and the outputs only updated if the 3rd
+  // stage and sychronizer agree.  If they do not agree, the ResetValue is
+  // is output instead.
+  parameter bit StabilityCheck = 0,
   // Reset value for the sync flops
   parameter mubi8_t ResetValue = MuBi8False
 ) (
@@ -36,6 +42,7 @@
 
   logic [MuBi8Width-1:0] mubi;
   if (AsyncOn) begin : gen_flops
+    logic [MuBi8Width-1:0] mubi_sync;
     prim_flop_2sync #(
       .Width(MuBi8Width),
       .ResetValue(MuBi8Width'(ResetValue))
@@ -43,8 +50,60 @@
       .clk_i,
       .rst_ni,
       .d_i(MuBi8Width'(mubi_i)),
-      .q_o(mubi)
+      .q_o(mubi_sync)
     );
+
+    if (StabilityCheck) begin : gen_stable_chks
+      logic [MuBi8Width-1:0] mubi_q;
+      prim_flop #(
+        .Width(MuBi8Width),
+        .ResetValue(MuBi8Width'(ResetValue))
+      ) u_prim_flop_3rd_stage (
+        .clk_i,
+        .rst_ni,
+        .d_i(mubi_sync),
+        .q_o(mubi_q)
+      );
+
+      logic [MuBi8Width-1:0] sig_unstable;
+      prim_xor2 #(
+        .Width(MuBi8Width)
+      ) u_mubi_xor (
+        .in0_i(mubi_sync),
+        .in1_i(mubi_q),
+        .out_o(sig_unstable)
+      );
+
+      logic [MuBi8Width-1:0] reset_value;
+      assign reset_value = ResetValue;
+
+      for (genvar k = 0; k < MuBi8Width; k++) begin : gen_bufs_muxes
+        logic [MuBi8Width-1:0] sig_unstable_buf;
+
+        // each mux gets its own buffered output, this ensures the OR-ing
+        // cannot be defeated in one place.
+        prim_sec_anchor_buf #(
+          .Width(MuBi8Width)
+        ) u_sig_unstable_buf (
+          .in_i(sig_unstable),
+          .out_o(sig_unstable_buf)
+        );
+
+        // if any xor indicates signal is unstable, output the reset
+        // value.
+        prim_clock_mux2 #(
+          .NoFpgaBufG(1'b1)
+        ) u_mux (
+          .clk0_i(mubi_q[k]),
+          .clk1_i(reset_value[k]),
+          .sel_i(|sig_unstable_buf),
+          .clk_o(mubi[k])
+        );
+      end
+
+    end else begin : gen_no_stable_chks
+      assign mubi = mubi_sync;
+    end
   end else begin : gen_no_flops
     logic unused_clk;
     logic unused_rst;
diff --git a/hw/ip/prim/rtl/prim_mubi_pkg.sv b/hw/ip/prim/rtl/prim_mubi_pkg.sv
index a92aa7f..dafff61 100644
--- a/hw/ip/prim/rtl/prim_mubi_pkg.sv
+++ b/hw/ip/prim/rtl/prim_mubi_pkg.sv
@@ -27,6 +27,11 @@
     return ~(val inside {MuBi4True, MuBi4False});
   endfunction : mubi4_test_invalid
 
+  // Convert a 1 input value to a mubi output
+  function automatic mubi4_e mubi4_bool_to_mubi(logic val);
+    return (val ? MuBi4True : MuBi4False);
+  endfunction : mubi4_bool_to_mubi
+
   // Test whether the multibit value signals an "enabled" condition.
   // The strict version of this function requires
   // the multibit value to equal True.
@@ -151,6 +156,11 @@
     return ~(val inside {MuBi8True, MuBi8False});
   endfunction : mubi8_test_invalid
 
+  // Convert a 1 input value to a mubi output
+  function automatic mubi8_e mubi8_bool_to_mubi(logic val);
+    return (val ? MuBi8True : MuBi8False);
+  endfunction : mubi8_bool_to_mubi
+
   // Test whether the multibit value signals an "enabled" condition.
   // The strict version of this function requires
   // the multibit value to equal True.
@@ -275,6 +285,11 @@
     return ~(val inside {MuBi12True, MuBi12False});
   endfunction : mubi12_test_invalid
 
+  // Convert a 1 input value to a mubi output
+  function automatic mubi12_e mubi12_bool_to_mubi(logic val);
+    return (val ? MuBi12True : MuBi12False);
+  endfunction : mubi12_bool_to_mubi
+
   // Test whether the multibit value signals an "enabled" condition.
   // The strict version of this function requires
   // the multibit value to equal True.
@@ -399,6 +414,11 @@
     return ~(val inside {MuBi16True, MuBi16False});
   endfunction : mubi16_test_invalid
 
+  // Convert a 1 input value to a mubi output
+  function automatic mubi16_e mubi16_bool_to_mubi(logic val);
+    return (val ? MuBi16True : MuBi16False);
+  endfunction : mubi16_bool_to_mubi
+
   // Test whether the multibit value signals an "enabled" condition.
   // The strict version of this function requires
   // the multibit value to equal True.
diff --git a/util/design/data/prim_mubi_pkg.sv.tpl b/util/design/data/prim_mubi_pkg.sv.tpl
index d75a02f..94d0b01 100644
--- a/util/design/data/prim_mubi_pkg.sv.tpl
+++ b/util/design/data/prim_mubi_pkg.sv.tpl
@@ -34,6 +34,11 @@
     return ~(val inside {MuBi${nbits}True, MuBi${nbits}False});
   endfunction : mubi${nbits}_test_invalid
 
+  // Convert a 1 input value to a mubi output
+  function automatic mubi${nbits}_e mubi${nbits}_bool_to_mubi(logic val);
+    return (val ? MuBi${nbits}True : MuBi${nbits}False);
+  endfunction : mubi${nbits}_bool_to_mubi
+
   // Test whether the multibit value signals an "enabled" condition.
   // The strict version of this function requires
   // the multibit value to equal True.
diff --git a/util/design/data/prim_mubi_sync.sv.tpl b/util/design/data/prim_mubi_sync.sv.tpl
index aadd75d..e44a0ce 100644
--- a/util/design/data/prim_mubi_sync.sv.tpl
+++ b/util/design/data/prim_mubi_sync.sv.tpl
@@ -23,6 +23,12 @@
   // In special cases where the receiver is in the same clock domain as the sender,
   // this can be set to 0. However, it is recommended to leave this at 1.
   parameter bit AsyncOn = 1,
+  // This controls whether the mubi module institutes stability checks when
+  // AsyncOn is set.  If stability checks are on, a 3rd stage of storage is
+  // added after the synchronizers and the outputs only updated if the 3rd
+  // stage and sychronizer agree.  If they do not agree, the ResetValue is
+  // is output instead.
+  parameter bit StabilityCheck = 0,
   // Reset value for the sync flops
   parameter mubi${n_bits}_t ResetValue = MuBi${n_bits}False
 ) (
@@ -36,6 +42,7 @@
 
   logic [MuBi${n_bits}Width-1:0] mubi;
   if (AsyncOn) begin : gen_flops
+    logic [MuBi${n_bits}Width-1:0] mubi_sync;
     prim_flop_2sync #(
       .Width(MuBi${n_bits}Width),
       .ResetValue(MuBi${n_bits}Width'(ResetValue))
@@ -43,8 +50,60 @@
       .clk_i,
       .rst_ni,
       .d_i(MuBi${n_bits}Width'(mubi_i)),
-      .q_o(mubi)
+      .q_o(mubi_sync)
     );
+
+    if (StabilityCheck) begin : gen_stable_chks
+      logic [MuBi${n_bits}Width-1:0] mubi_q;
+      prim_flop #(
+        .Width(MuBi${n_bits}Width),
+        .ResetValue(MuBi${n_bits}Width'(ResetValue))
+      ) u_prim_flop_3rd_stage (
+        .clk_i,
+        .rst_ni,
+        .d_i(mubi_sync),
+        .q_o(mubi_q)
+      );
+
+      logic [MuBi${n_bits}Width-1:0] sig_unstable;
+      prim_xor2 #(
+        .Width(MuBi${n_bits}Width)
+      ) u_mubi_xor (
+        .in0_i(mubi_sync),
+        .in1_i(mubi_q),
+        .out_o(sig_unstable)
+      );
+
+      logic [MuBi${n_bits}Width-1:0] reset_value;
+      assign reset_value = ResetValue;
+
+      for (genvar k = 0; k < MuBi${n_bits}Width; k++) begin : gen_bufs_muxes
+        logic [MuBi${n_bits}Width-1:0] sig_unstable_buf;
+
+        // each mux gets its own buffered output, this ensures the OR-ing
+        // cannot be defeated in one place.
+        prim_sec_anchor_buf #(
+          .Width(MuBi${n_bits}Width)
+        ) u_sig_unstable_buf (
+          .in_i(sig_unstable),
+          .out_o(sig_unstable_buf)
+        );
+
+        // if any xor indicates signal is unstable, output the reset
+        // value.
+        prim_clock_mux2 #(
+          .NoFpgaBufG(1'b1)
+        ) u_mux (
+          .clk0_i(mubi_q[k]),
+          .clk1_i(reset_value[k]),
+          .sel_i(|sig_unstable_buf),
+          .clk_o(mubi[k])
+        );
+      end
+
+    end else begin : gen_no_stable_chks
+      assign mubi = mubi_sync;
+    end
   end else begin : gen_no_flops
     logic unused_clk;
     logic unused_rst;