[flash] Fixes to host_gnt_err handling for more graceful handling

- currently if the conditions to host_gnt_err trigger, the flash controller
  will most definitely lock-up.  While this is somewhat acceptable
  behvaior since thse conditions occur only when attacked, it is still a
  little difficult for software to use.

- This commit massages the logic around host_gnt_err to make it slightly
  more recoverable.  It is not 100% proof, since fault attacks in certain
  parts of the logic will just be unrecoverable.

- Specifically, do not use ctrl_fsm_idle to qualify host returns, instead
  use host_outstanding.  This eliminates a single point of failure that
  could lead to lock-ups.

- Better handling for host grants to info partitions.  When such an error
  occurs, future host transactions are not accepted until the existing
  pipeline of reads are fully flushed.  This ensures we get back to a clean
  state.

Signed-off-by: Timothy Chen <timothytim@google.com>
diff --git a/hw/ip/flash_ctrl/data/flash_ctrl.sv.tpl b/hw/ip/flash_ctrl/data/flash_ctrl.sv.tpl
index 9d51d02..91b3f70 100644
--- a/hw/ip/flash_ctrl/data/flash_ctrl.sv.tpl
+++ b/hw/ip/flash_ctrl/data/flash_ctrl.sv.tpl
@@ -1450,6 +1450,9 @@
      `ASSERT_PRIM_COUNT_ERROR_TRIGGER_ALERT(PhyRdDataFifoRPtr_A,
        `PHY_CORE.u_rd.u_rd_storage.gen_normal_fifo.u_fifo_cnt.gen_secure_ptrs.u_rptr,
        alert_tx_o[1])
+
+     `ASSERT_PRIM_COUNT_ERROR_TRIGGER_ALERT(PhyHostCnt_A,
+       `PHY_CORE.u_host_outstanding_cnt, alert_tx_o[1])
    end
    `endif
 
diff --git a/hw/ip/flash_ctrl/rtl/flash_ctrl.sv b/hw/ip/flash_ctrl/rtl/flash_ctrl.sv
index 1d041f8..7dfa97e 100644
--- a/hw/ip/flash_ctrl/rtl/flash_ctrl.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_ctrl.sv
@@ -1451,6 +1451,9 @@
      `ASSERT_PRIM_COUNT_ERROR_TRIGGER_ALERT(PhyRdDataFifoRPtr_A,
        `PHY_CORE.u_rd.u_rd_storage.gen_normal_fifo.u_fifo_cnt.gen_secure_ptrs.u_rptr,
        alert_tx_o[1])
+
+     `ASSERT_PRIM_COUNT_ERROR_TRIGGER_ALERT(PhyHostCnt_A,
+       `PHY_CORE.u_host_outstanding_cnt, alert_tx_o[1])
    end
    `endif
 
diff --git a/hw/ip/flash_ctrl/rtl/flash_phy.sv b/hw/ip/flash_ctrl/rtl/flash_phy.sv
index 6b8b52b..b6fa63d 100644
--- a/hw/ip/flash_ctrl/rtl/flash_phy.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_phy.sv
@@ -110,6 +110,9 @@
   logic [NumBanks-1:0]     rsp_fifo_err;
   logic [NumBanks-1:0]     core_fifo_err;
 
+  // outstanding count error per bank
+  logic [NumBanks-1:0]     cnt_err;
+
   // select which bank each is operating on
   assign host_bank_sel = host_req_i ? host_addr_i[BusAddrW-1 -: BankW] : '0;
   assign ctrl_bank_sel = flash_ctrl_i.addr[BusAddrW-1 -: BankW];
@@ -136,9 +139,10 @@
   assign flash_ctrl_o.fsm_err = |fsm_err;
   assign flash_ctrl_o.spurious_ack = |spurious_acks;
   assign flash_ctrl_o.arb_err = |arb_err;
-  assign flash_ctrl_o.host_gnt_err = |host_gnt_err;
+  assign flash_ctrl_o.host_gnt_err = |{host_gnt_err, cnt_err} ;
   assign flash_ctrl_o.fifo_err = |{rsp_fifo_err, core_fifo_err};
 
+
   // This fifo holds the expected return order
   prim_fifo_sync #(
     .Width   (BankW),
@@ -295,7 +299,8 @@
       .spurious_ack_o(spurious_acks[bank]),
       .arb_err_o(arb_err[bank]),
       .host_gnt_err_o(host_gnt_err[bank]),
-      .fifo_err_o(core_fifo_err[bank])
+      .fifo_err_o(core_fifo_err[bank]),
+      .cnt_err_o(cnt_err[bank])
     );
   end // block: gen_flash_banks
 
diff --git a/hw/ip/flash_ctrl/rtl/flash_phy_core.sv b/hw/ip/flash_ctrl/rtl/flash_phy_core.sv
index 92ddf0f..7e3d6bd 100644
--- a/hw/ip/flash_ctrl/rtl/flash_phy_core.sv
+++ b/hw/ip/flash_ctrl/rtl/flash_phy_core.sv
@@ -60,7 +60,8 @@
   output logic                       spurious_ack_o,
   output logic                       arb_err_o,
   output logic                       host_gnt_err_o,
-  output logic                       fifo_err_o
+  output logic                       fifo_err_o,
+  output logic                       cnt_err_o
 );
 
 
@@ -186,58 +187,108 @@
     .mubi_o(flash_disable)
   );
 
-  logic ctrl_fsm_idle;
-  logic host_req;
-  assign host_req = host_req_i & (arb_cnt < ArbCnt[CntWidth-1:0]) & ctrl_fsm_idle &
-                    mubi4_test_false_strict(flash_disable[HostDisableIdx]);
-  assign host_sel = host_req;
-  assign host_gnt = host_req & host_req_rdy_o;
-  assign host_req_done_o = ctrl_fsm_idle & rd_stage_data_valid;
-
-  // oustanding width is slightly larger to ensure a faulty increment is able to reach
+  // Oustanding width is slightly larger to ensure a faulty increment is able to reach
   // the higher value. For example if RspOrderDepth were 3, a clog2 of 3 would still be 2
   // and not allow the counter to increment to 4.
   localparam int OutstandingRdWidth = $clog2(RspOrderDepth+2);
   logic [OutstandingRdWidth-1:0] host_outstanding;
+  logic ctrl_fsm_idle;
+  logic host_req;
+  // SEC_CM: PHY_HOST_GRANT.CTRL.CONSISTENCY
+  // A host transaction was granted to the muxed partition, this is illegal
+  logic host_gnt_err_event;
+  assign host_gnt_err_event = (host_gnt && muxed_part != flash_ctrl_pkg::FlashPartData);
+  // Controller fsm became non idle when there are pending host transactions, this is
+  // illegal.
+  logic host_outstanding_err_event;
+  assign host_outstanding_err_event = |host_outstanding & !ctrl_fsm_idle;
+
   always_ff @(posedge clk_i or negedge rst_ni) begin
     if (!rst_ni) begin
-      host_outstanding <= '0;
-    end else if (host_gnt && !host_req_done_o && (host_outstanding <= RspOrderDepth)) begin
-      host_outstanding <= host_outstanding + 1'b1;
-    end else if (!host_gnt && host_req_done_o && |host_outstanding) begin
-      host_outstanding <= host_outstanding - 1'b1;
+      host_gnt_err_o <= '0;
+    end else if (host_gnt_err_event | host_outstanding_err_event) begin
+      host_gnt_err_o <= 1'b1;
     end
   end
-  `ASSERT(RdTxnCheck_A, host_outstanding <= RspOrderDepth)
 
-  // a host transaction granted in the previous cycle
-  logic host_gnt_q;
+  // When host grant errors occur, also create in band error responses.
+  // The error condition is held until all existing host transactions are
+  // processed.
+  logic host_gnt_rd_err;
   always_ff @(posedge clk_i or negedge rst_ni) begin
     if (!rst_ni) begin
-      host_gnt_q <= '0;
-    end else begin
-      host_gnt_q <= host_gnt;
+      host_gnt_rd_err <= '0;
+    end else if (host_outstanding == '0) begin
+      host_gnt_rd_err <= '0;
+    end else if (host_gnt_err_event) begin
+      host_gnt_rd_err <= 1'b1;
+    end
+  end
+
+  // When host outstanding errors occur, also create in band error responses.
+  // The error condition is held until all existing host and controller
+  // transactions are processed.
+  logic host_outstanding_rd_err;
+  always_ff @(posedge clk_i or negedge rst_ni) begin
+    if (!rst_ni) begin
+      host_outstanding_rd_err <= '0;
+    end else if (host_outstanding == '0 && ctrl_fsm_idle) begin
+      host_outstanding_rd_err <= '0;
+    end else if (host_outstanding_err_event) begin
+      host_outstanding_rd_err <= 1'b1;
     end
   end
 
   // SEC_CM: PHY_HOST_GRANT.CTRL.CONSISTENCY
-  // two error conditions
-  // 1. a host transaction was granted to the muxed partition, this is illegal
-  // 2. a host transaction was granted last cycle but somehow controller
-  //    operations have been kicked off, this is illegal
-  always_ff @(posedge clk_i or negedge rst_ni) begin
-    if (!rst_ni) begin
-      host_gnt_err_o <= '0;
-    end else if ((host_gnt && muxed_part != flash_ctrl_pkg::FlashPartData) ||
-                 (host_gnt_q && !ctrl_fsm_idle)) begin
-      host_gnt_err_o <= 1'b1;
-    end
-  end
+  prim_count #(
+    .Width(OutstandingRdWidth),
+    .ResetValue('0)
+  ) u_host_outstanding_cnt (
+    .clk_i,
+    .rst_ni,
+    .clr_i('0),
+    .set_i('0),
+    .set_cnt_i('0),
+    .incr_en_i(host_gnt && !host_req_done_o && (host_outstanding <= RspOrderDepth)),
+    .decr_en_i(!host_gnt && host_req_done_o && |host_outstanding),
+    .step_i(OutstandingRdWidth'(1'b1)),
+    .cnt_o(host_outstanding),
+    .cnt_next_o(),
+    .err_o(cnt_err_o)
+  );
+
+  // If host_outstanding is non-zero, the controller fsm must be idle..
+  // This assertion needs to be disabled for sec_cm testing
+  `ASSERT(HostTransIdleChk_A, |host_outstanding |-> ctrl_fsm_idle)
+
+  //always_ff @(posedge clk_i or negedge rst_ni) begin
+  //  if (!rst_ni) begin
+  //    host_outstanding <= '0;
+  //  end else if (host_gnt && !host_req_done_o && (host_outstanding <= RspOrderDepth)) begin
+  //    host_outstanding <= host_outstanding + 1'b1;
+  //  end else if (!host_gnt && host_req_done_o && |host_outstanding) begin
+  //    host_outstanding <= host_outstanding - 1'b1;
+  //  end
+  //end
+
+  `ASSERT(RdTxnCheck_A, host_outstanding <= RspOrderDepth)
+
+  // The host request is suppressed under a variety of conditions:
+  // 1. If a controller transaction is already ongoing.
+  // 2. If a grant or outstanding error has already been observed but not yet
+  //    fully processed.
+  assign host_req = host_req_i & (arb_cnt < ArbCnt[CntWidth-1:0]) & ctrl_fsm_idle &
+                    !host_gnt_rd_err & !host_outstanding_rd_err &
+                    mubi4_test_false_strict(flash_disable[HostDisableIdx]);
+  assign host_sel = host_req;
+  assign host_gnt = host_req & host_req_rdy_o;
+  assign host_req_done_o = |host_outstanding & rd_stage_data_valid;
 
   // controller request can only win after the entire read pipeline
   // clears
   logic ctrl_req;
   assign ctrl_req = req_i & rd_stage_idle &
+                    !host_gnt_rd_err & !host_outstanding_rd_err &
                     mubi4_test_false_strict(flash_disable[CtrlDisableIdx]);
 
   logic [1:0] data_tie_off [2];
@@ -366,6 +417,21 @@
   logic [DataWidth-1:0] rd_scrambled_data;
   logic [DataWidth-1:0] rd_descrambled_data;
 
+  // if host grant is encountered, transactions return in-band
+  // error until all transactions are flushed.
+  logic phy_rd_err;
+  assign rd_err_o = phy_rd_err |
+                    // After host_gnt_rd_err asserts, no more host requests
+                    // are granted until all transactions are flushed. This means
+                    // the last outstanding transaction is by definition the "error".
+                    (host_gnt_rd_err & host_req_done_o & host_outstanding == 1'b1) |
+                    // If ctrl_fsm_idle inexplicably goes low while there are host transactions
+                    // the transaction handling may be irreversibly broken.
+                    // The host_oustanding_rd_err makes a best effort attempt to cleanly
+                    // recover.  It responds with in-band error controller transactions until the
+                    // all pending transactions are flushed.
+                    (host_outstanding_rd_err & rd_done_o);
+
   flash_phy_rd u_rd (
     .clk_i,
     .rst_ni,
@@ -383,7 +449,7 @@
     .info_sel_i(info_sel_i),
     .rdy_o(rd_stage_rdy),
     .data_valid_o(rd_stage_data_valid),
-    .data_err_o(rd_err_o),
+    .data_err_o(phy_rd_err),
     .data_o(rd_data_o),
     .idle_o(rd_stage_idle),
     .req_o(flash_rd_req),
diff --git a/hw/top_earlgrey/ip/flash_ctrl/rtl/autogen/flash_ctrl.sv b/hw/top_earlgrey/ip/flash_ctrl/rtl/autogen/flash_ctrl.sv
index 72a9257..99e897e 100644
--- a/hw/top_earlgrey/ip/flash_ctrl/rtl/autogen/flash_ctrl.sv
+++ b/hw/top_earlgrey/ip/flash_ctrl/rtl/autogen/flash_ctrl.sv
@@ -1457,6 +1457,9 @@
      `ASSERT_PRIM_COUNT_ERROR_TRIGGER_ALERT(PhyRdDataFifoRPtr_A,
        `PHY_CORE.u_rd.u_rd_storage.gen_normal_fifo.u_fifo_cnt.gen_secure_ptrs.u_rptr,
        alert_tx_o[1])
+
+     `ASSERT_PRIM_COUNT_ERROR_TRIGGER_ALERT(PhyHostCnt_A,
+       `PHY_CORE.u_host_outstanding_cnt, alert_tx_o[1])
    end
    `endif