[i2c, rtl] Loopback test and other changes

1. Enabled line loopback test
2. Implemented target waiting for host to pull SCL low before stretching the clock
3. Increased depth from 32 to 64 in all four FIFOs
4. Fixed event_nak implementation
5. Added synchronizers for SCL/SDA inputs
6. Modified clock stretching in the target mode: no stretching for the first address match when both en_addr and stop are set; stop is deasserted after this so stretching occurs for the second address match
7. Other minor changes to the target mode

Signed-off-by: Igor Kouznetsov <igor.kouznetsov@wdc.com>
diff --git a/hw/ip/i2c/data/i2c.hjson b/hw/ip/i2c/data/i2c.hjson
index b14ce8a..f432c1b 100644
--- a/hw/ip/i2c/data/i2c.hjson
+++ b/hw/ip/i2c/data/i2c.hjson
@@ -87,6 +87,14 @@
                 Enable Target I2C functionality
                 '''
         }
+        { bits: "2"
+          resval: "0"
+          name: "LLPBK"
+          desc: '''
+                Enable I2C line loopback test
+                If line loopback is enabled, the internal design sees ACQ and RX data as "1"
+                '''
+        }
       ]
     }
     { name:     "STATUS"
@@ -500,21 +508,24 @@
     { name: "STRETCH_CTRL"
       desc: "I2C target clock stretching control"
       swaccess: "rw"
-      hwaccess: "hro"
       fields: [
         { bits: "0"
+          hwaccess: "hro"
           name: "ENABLEADDR"
           desc: "Enable clock stretching after address matching completes"
         }
         { bits: "1"
+          hwaccess: "hro"
           name: "ENABLETX"
           desc: "Enable clock stretching after ongoing transmit (read) transaction completes"
         }
         { bits: "2"
+          hwaccess: "hro"
           name: "ENABLEACQ"
           desc: "Enable clock stretching after ongoing acquire (write) transaction completes"
         }
         { bits: "3"
+          hwaccess: "hrw"
           name: "STOP"
           desc: "Stop clock stretching and resume normal operation"
         }
diff --git a/hw/ip/i2c/rtl/i2c_core.sv b/hw/ip/i2c/rtl/i2c_core.sv
index f4ba525..f5c1f36 100644
--- a/hw/ip/i2c/rtl/i2c_core.sv
+++ b/hw/ip/i2c/rtl/i2c_core.sv
@@ -50,8 +50,11 @@
   logic        stretch_en_tx;
   logic        stretch_en_acq;
   logic        stretch_stop;
+  logic        stretch_stop_clr;
   logic [31:0] host_timeout;
 
+  logic scl_sync;
+  logic sda_sync;
   logic scl_out_fsm;
   logic sda_out_fsm;
 
@@ -133,6 +136,7 @@
 
   logic        host_enable;
   logic        target_enable;
+  logic        line_loopback;
 
   logic [6:0]  target_address0;
   logic [6:0]  target_mask0;
@@ -152,7 +156,7 @@
   assign hw2reg.status.hostidle.d = host_idle;
   assign hw2reg.status.targetidle.d = target_idle;
   assign hw2reg.status.rxempty.d = ~rx_fifo_rvalid;
-  assign hw2reg.rdata.d = rx_fifo_rdata;
+  assign hw2reg.rdata.d = line_loopback ? 8'hff : rx_fifo_rdata;
   assign hw2reg.fifo_status.fmtlvl.d = fmt_fifo_depth;
   assign hw2reg.fifo_status.rxlvl.d = rx_fifo_depth;
   assign hw2reg.val.scl_rx.d = scl_rx_val;
@@ -164,8 +168,10 @@
   assign hw2reg.status.acqempty.d = ~acq_fifo_rvalid;
   assign hw2reg.fifo_status.txlvl.d = tx_fifo_depth;
   assign hw2reg.fifo_status.acqlvl.d = acq_fifo_depth;
-  assign hw2reg.acqdata.abyte.d = acq_fifo_rdata[7:0];
-  assign hw2reg.acqdata.signal.d = acq_fifo_rdata[9:8];
+  assign hw2reg.acqdata.abyte.d = line_loopback ? 8'hff : acq_fifo_rdata[7:0];
+  assign hw2reg.acqdata.signal.d = line_loopback ? 2'b11 : acq_fifo_rdata[9:8];
+  assign hw2reg.stretch_ctrl.stop.d = 1'b0;
+  assign hw2reg.stretch_ctrl.stop.de = stretch_stop_clr;
 
   assign override = reg2hw.ovrd.txovrden;
 
@@ -174,6 +180,7 @@
 
   assign host_enable = reg2hw.ctrl.enablehost.q;
   assign target_enable = reg2hw.ctrl.enabletarget.q;
+  assign line_loopback = reg2hw.ctrl.llpbk.q;
 
   assign target_address0 = reg2hw.target_id.address0.q;
   assign target_mask0 = reg2hw.target_id.mask0.q;
@@ -257,18 +264,19 @@
   // The fifo write enable is controlled by fbyte, start, stop, read, rcont,
   // and nakok field qe bits.
   // When all qe bits are asserted, fdata is injected into the fifo.
-  assign fmt_fifo_wvalid     = reg2hw.fdata.fbyte.qe &
+  assign fmt_fifo_wvalid     = line_loopback ? 1'b1 :
+                               reg2hw.fdata.fbyte.qe &
                                reg2hw.fdata.start.qe &
                                reg2hw.fdata.stop.qe  &
                                reg2hw.fdata.read.qe  &
                                reg2hw.fdata.rcont.qe &
                                reg2hw.fdata.nakok.qe;
-  assign fmt_fifo_wdata[7:0] = reg2hw.fdata.fbyte.q;
-  assign fmt_fifo_wdata[8]   = reg2hw.fdata.start.q;
-  assign fmt_fifo_wdata[9]   = reg2hw.fdata.stop.q;
-  assign fmt_fifo_wdata[10]  = reg2hw.fdata.read.q;
-  assign fmt_fifo_wdata[11]  = reg2hw.fdata.rcont.q;
-  assign fmt_fifo_wdata[12]  = reg2hw.fdata.nakok.q;
+  assign fmt_fifo_wdata[7:0] = line_loopback ? rx_fifo_rdata : reg2hw.fdata.fbyte.q;
+  assign fmt_fifo_wdata[8]   = line_loopback ? 1'b0 : reg2hw.fdata.start.q;
+  assign fmt_fifo_wdata[9]   = line_loopback ? 1'b0 : reg2hw.fdata.stop.q;
+  assign fmt_fifo_wdata[10]  = line_loopback ? 1'b0 : reg2hw.fdata.read.q;
+  assign fmt_fifo_wdata[11]  = line_loopback ? 1'b0 : reg2hw.fdata.rcont.q;
+  assign fmt_fifo_wdata[12]  = line_loopback ? 1'b1 : reg2hw.fdata.nakok.q;
 
   assign fmt_byte               = fmt_fifo_rvalid ? fmt_fifo_rdata[7:0] : '0;
   assign fmt_flag_start_before  = fmt_fifo_rvalid ? fmt_fifo_rdata[8] : '0;
@@ -287,7 +295,7 @@
   prim_fifo_sync #(
     .Width   (13),
     .Pass    (1'b1),
-    .Depth   (32)
+    .Depth   (64)
   ) u_i2c_fmtfifo (
     .clk_i,
     .rst_ni,
@@ -307,7 +315,7 @@
   prim_fifo_sync #(
     .Width   (8),
     .Pass    (1'b0),
-    .Depth   (32)
+    .Depth   (64)
   ) u_i2c_rxfifo (
     .clk_i,
     .rst_ni,
@@ -326,13 +334,13 @@
   assign event_tx_overflow = tx_fifo_wvalid & ~tx_fifo_wready;
   assign event_acq_overflow = acq_fifo_wvalid & ~acq_fifo_wready;
 
-  assign tx_fifo_wvalid = reg2hw.txdata.qe;
-  assign tx_fifo_wdata  = reg2hw.txdata.q;
+  assign tx_fifo_wvalid = line_loopback ? 1'b1 : reg2hw.txdata.qe;
+  assign tx_fifo_wdata  = line_loopback ? acq_fifo_rdata[7:0] : reg2hw.txdata.q;
 
   prim_fifo_sync #(
     .Width(8),
     .Pass(1'b1),
-    .Depth(32)
+    .Depth(64)
   ) u_i2c_txfifo (
     .clk_i,
     .rst_ni,
@@ -352,7 +360,7 @@
   prim_fifo_sync #(
     .Width(10),
     .Pass(1'b0),
-    .Depth(32)
+    .Depth(64)
   ) u_i2c_acqfifo (
     .clk_i,
     .rst_ni,
@@ -367,13 +375,34 @@
     .full_o  ()
   );
 
+  // sync the incoming SCL and SDA signals
+  prim_flop_2sync #(
+    .Width(1),
+    .ResetValue(1'b1)
+  ) u_i2c_sync_scl (
+    .clk_i,
+    .rst_ni,
+    .d_i (scl_i),
+    .q_o (scl_sync)
+  );
+
+  prim_flop_2sync #(
+    .Width(1),
+    .ResetValue(1'b1)
+  ) u_i2c_sync_sda (
+    .clk_i,
+    .rst_ni,
+    .d_i (sda_i),
+    .q_o (sda_sync)
+  );
+
   i2c_fsm u_i2c_fsm (
     .clk_i,
     .rst_ni,
 
-    .scl_i,
+    .scl_i                   (scl_sync),
     .scl_o                   (scl_out_fsm),
-    .sda_i,
+    .sda_i                   (sda_sync),
     .sda_o                   (sda_out_fsm),
 
     .host_enable_i           (host_enable),
@@ -430,6 +459,8 @@
     .target_address1_i       (target_address1),
     .target_mask1_i          (target_mask1),
 
+    .stretch_stop_clr_o      (stretch_stop_clr),
+
     .event_nak_o             (event_nak),
     .event_scl_interference_o(event_scl_interference),
     .event_sda_interference_o(event_sda_interference),
diff --git a/hw/ip/i2c/rtl/i2c_fsm.sv b/hw/ip/i2c/rtl/i2c_fsm.sv
index 18a318a..64a947b 100644
--- a/hw/ip/i2c/rtl/i2c_fsm.sv
+++ b/hw/ip/i2c/rtl/i2c_fsm.sv
@@ -66,6 +66,8 @@
   input logic [6:0] target_address1_i,
   input logic [6:0] target_mask1_i,
 
+  output logic stretch_stop_clr_o,       // hardware to deassert stretch_stop bit
+
   output logic event_nak_o,              // target didn't Ack when expected
   output logic event_scl_interference_o, // other device forcing SCL low
   output logic event_sda_interference_o, // other device forcing SDA low
@@ -115,12 +117,14 @@
   logic [7:0]  input_byte;    // register for reads from host
   logic        input_byte_clr;// clear input_byte contents
   logic [31:0] scl_high_cnt;  // counter for continuously released scl_i
+  logic        addr_stop;     // indicates stretch_stop and stretch_en_addr are both asserted
+  logic        stretch_stop_clr;
 
   // Target bit counter variables
   logic [3:0]  bit_idx;       // bit index including ack/nack
   logic        bit_ack;       // indicates ACK bit been sent or received
   logic        rw_bit;        // indicates host wants to read (1) or write (0)
-  logic        host_ack;      // indicates host acqnowledged transmitted byte
+  logic        host_ack;      // indicates host acknowledged transmitted byte
 
   // Clock counter implementation
   typedef enum logic [3:0] {
@@ -266,16 +270,17 @@
   end
 
   // Bit counter on the target side
-  assign bit_ack = (bit_idx == 4'd8) && !start_det; // ack
+  assign bit_ack = (bit_idx == 4'd9) && !start_det; // ack
 
   // Increment counter on negative SCL edge
   always_ff @ (posedge clk_i or negedge rst_ni) begin : tgt_bit_counter
     if (!rst_ni) begin
       bit_idx <= 4'd0;
-    end else if (start_det || bit_ack) begin
+    end else if (start_det) begin
       bit_idx <= 4'd0;
     end else if (scl_i_q && !scl_i) begin
-      bit_idx <= bit_idx + 1'b1;
+      if (bit_ack) bit_idx <= 4'd0;
+      else bit_idx <= bit_idx + 1'b1;
     end else begin
       bit_idx <= bit_idx;
     end
@@ -318,6 +323,17 @@
     end
   end
 
+  // Deasserting stretch_stop bit after the first target address match
+  always_ff @ (posedge clk_i or negedge rst_ni) begin : stretch_addr_sp
+    if (!rst_ni) begin
+      stretch_stop_clr <= 1'b0;
+    end else if (addr_stop) begin
+      stretch_stop_clr <= 1'b1;
+    end
+  end
+
+  assign stretch_stop_clr_o = stretch_stop_clr;
+
   // State definitions
   typedef enum logic [5:0] {
     Idle, PopFmtFifo, SetupStart, HoldStart, SetupStop, HoldStop,
@@ -326,11 +342,11 @@
         ReadClockLow, ReadSetupBit, ReadClockPulse, ReadHoldBit,
         HostClockLowAck, HostSetupBitAck, HostClockPulseAck, HostHoldBitAck,
         Active, ClockStart, ClockStop,
-        AddrRead, AddrAckWait, AddrAckSetup, AddrAckPulse, AddrAckHold,
+        AcquireStart, AddrRead, AddrAckWait, AddrAckSetup, AddrAckPulse, AddrAckHold,
         TransmitWait, TransmitSetup, TransmitPulse, TransmitHold, TransmitAck,
         AcquireByte, AcquireAckWait, AcquireAckSetup, AcquireAckPulse, AcquireAckHold,
         PopTxFifo, AcquireSrP, StretchTxEmpty, StretchAcqFull, StretchAddr,
-        StretchAcquire, StretchTransmit
+        StretchAcquire, StretchTransmit, StretchTransmitWait
   } state_e;
 
   state_e state_q, state_d;
@@ -433,7 +449,7 @@
         host_idle_o = 1'b0;
         sda_temp = 1'b1;
         scl_temp = 1'b1;
-        if (!sda_i && !fmt_flag_nak_ok_i) event_nak_o = 1'b1;
+        if (sda_i && !fmt_flag_nak_ok_i) event_nak_o = 1'b1;
         if ((stretch > stretch_timeout_i) && timeout_enable_i) begin
           event_stretch_timeout_o = 1'b1;
         end
@@ -546,6 +562,10 @@
         else scl_temp = 1'b0;
         fmt_fifo_rready_o = 1'b1;
       end
+      // AcquireStart: hold start condition
+      AcquireStart : begin
+        target_idle_o = 1'b0;
+      end
       // AddrRead: read and compare target address
       AddrRead : begin
         target_idle_o = 1'b0;
@@ -648,6 +668,10 @@
         target_idle_o = 1'b0;
         scl_temp = 1'b0;
       end
+      // StretchTransmitWait: target waits for host to pull SCL low before stretching the clock
+      StretchTransmitWait : begin
+        target_idle_o = 1'b0;
+      end
       // StretchTransmit: target stretches the clock after transmitting a byte
       StretchTransmit : begin
         target_idle_o = 1'b0;
@@ -705,6 +729,7 @@
     log_stop = 1'b0;
     restart = 1'b0;
     input_byte_clr = 1'b0;
+    addr_stop = 1'b0;
 
     unique case (state_q)
       // Idle: initial state, SDA and SCL are released (high)
@@ -715,10 +740,7 @@
           else state_d = Active;
         end else if (target_enable_i) begin
           if (!start_det) state_d = Idle;
-          else begin
-            state_d = AddrRead;
-            input_byte_clr = 1'b1;
-          end
+          else state_d = AcquireStart;
         end
       end
 
@@ -981,6 +1003,14 @@
         end
       end
 
+      // AcquireStart: hold start condition
+      AcquireStart : begin
+        if (scl_i_q && !scl_i) begin
+          state_d = AddrRead;
+          input_byte_clr = 1'b1;
+        end
+      end
+
       // AddrRead: read and compare target address
       AddrRead : begin
         if (bit_ack) begin
@@ -995,8 +1025,13 @@
       // AddrAckWait: pause before acknowledging
       AddrAckWait : begin
         if (tcount_q == 20'd1) begin
-          if (stretch_en_addr_i) state_d = StretchAddr;
-          else state_d = AddrAckSetup;
+          if (!scl_i) begin
+            if (stretch_en_addr_i && !stretch_stop_i) state_d = StretchAddr;
+            else if (stretch_en_addr_i && stretch_stop_i) begin
+              state_d = AddrAckSetup;
+              addr_stop = 1'b1;
+            end else state_d = AddrAckSetup;
+          end
         end
       end
       // AddrAckSetup: target pulls SDA low while SCL is low
@@ -1061,7 +1096,7 @@
       TransmitAck : begin
         if (scl_i) begin
           if (host_ack) begin
-            if (stretch_en_tx_i) state_d = StretchTransmit;
+            if (stretch_en_tx_i) state_d = StretchTransmitWait;
             else state_d = PopTxFifo;
           end else begin
             if (start_det || stop_det) state_d = AcquireSrP;
@@ -1094,8 +1129,10 @@
       // AcquireAckWait: pause before acknowledging
       AcquireAckWait : begin
         if (tcount_q == 20'd1) begin
-          if (stretch_en_acq_i) state_d = StretchAcquire;
-          else state_d = AcquireAckSetup;
+          if (!scl_i) begin
+            if (stretch_en_acq_i) state_d = StretchAcquire;
+            else state_d = AcquireAckSetup;
+          end
         end
       end
       // AcquireAckSetup: target pulls SDA low while SCL is low
@@ -1137,6 +1174,11 @@
         else state_d = AcquireAckSetup;
       end
 
+      // StretchTransmitWait: target waits for host to pull SCL low before stretching the clock
+      StretchTransmitWait : begin
+        if (!scl_i) state_d = StretchTransmit;
+      end
+
       // StretchTransmit: target stretches the clock after transmitting a byte
       StretchTransmit : begin
         if (!stretch_stop_i) state_d = StretchTransmit;
@@ -1175,6 +1217,7 @@
         log_stop = 1'b0;
         restart = 1'b0;
         input_byte_clr = 1'b0;
+        addr_stop = 1'b0;
       end
     endcase
   end
diff --git a/hw/ip/i2c/rtl/i2c_reg_pkg.sv b/hw/ip/i2c/rtl/i2c_reg_pkg.sv
index 607635a..9eceb8a 100644
--- a/hw/ip/i2c/rtl/i2c_reg_pkg.sv
+++ b/hw/ip/i2c/rtl/i2c_reg_pkg.sv
@@ -189,6 +189,9 @@
     struct packed {
       logic        q;
     } enabletarget;
+    struct packed {
+      logic        q;
+    } llpbk;
   } i2c_reg2hw_ctrl_reg_t;
 
   typedef struct packed {
@@ -503,12 +506,19 @@
     } signal;
   } i2c_hw2reg_acqdata_reg_t;
 
+  typedef struct packed {
+    struct packed {
+      logic        d;
+      logic        de;
+    } stop;
+  } i2c_hw2reg_stretch_ctrl_reg_t;
+
   // Register -> HW type
   typedef struct packed {
-    i2c_reg2hw_intr_state_reg_t intr_state; // [388:373]
-    i2c_reg2hw_intr_enable_reg_t intr_enable; // [372:357]
-    i2c_reg2hw_intr_test_reg_t intr_test; // [356:325]
-    i2c_reg2hw_ctrl_reg_t ctrl; // [324:323]
+    i2c_reg2hw_intr_state_reg_t intr_state; // [389:374]
+    i2c_reg2hw_intr_enable_reg_t intr_enable; // [373:358]
+    i2c_reg2hw_intr_test_reg_t intr_test; // [357:326]
+    i2c_reg2hw_ctrl_reg_t ctrl; // [325:323]
     i2c_reg2hw_rdata_reg_t rdata; // [322:314]
     i2c_reg2hw_fdata_reg_t fdata; // [313:295]
     i2c_reg2hw_fifo_ctrl_reg_t fifo_ctrl; // [294:280]
@@ -528,12 +538,13 @@
 
   // HW -> register type
   typedef struct packed {
-    i2c_hw2reg_intr_state_reg_t intr_state; // [115:84]
-    i2c_hw2reg_status_reg_t status; // [83:74]
-    i2c_hw2reg_rdata_reg_t rdata; // [73:66]
-    i2c_hw2reg_fifo_status_reg_t fifo_status; // [65:42]
-    i2c_hw2reg_val_reg_t val; // [41:10]
-    i2c_hw2reg_acqdata_reg_t acqdata; // [9:0]
+    i2c_hw2reg_intr_state_reg_t intr_state; // [117:86]
+    i2c_hw2reg_status_reg_t status; // [85:76]
+    i2c_hw2reg_rdata_reg_t rdata; // [75:68]
+    i2c_hw2reg_fifo_status_reg_t fifo_status; // [67:44]
+    i2c_hw2reg_val_reg_t val; // [43:12]
+    i2c_hw2reg_acqdata_reg_t acqdata; // [11:2]
+    i2c_hw2reg_stretch_ctrl_reg_t stretch_ctrl; // [1:0]
   } i2c_hw2reg_t;
 
   // Register offsets
diff --git a/hw/ip/i2c/rtl/i2c_reg_top.sv b/hw/ip/i2c/rtl/i2c_reg_top.sv
index 6e98f64..1665b78 100644
--- a/hw/ip/i2c/rtl/i2c_reg_top.sv
+++ b/hw/ip/i2c/rtl/i2c_reg_top.sv
@@ -238,6 +238,9 @@
   logic ctrl_enabletarget_qs;
   logic ctrl_enabletarget_wd;
   logic ctrl_enabletarget_we;
+  logic ctrl_llpbk_qs;
+  logic ctrl_llpbk_wd;
+  logic ctrl_llpbk_we;
   logic status_fmtfull_qs;
   logic status_fmtfull_re;
   logic status_rxfull_qs;
@@ -1510,6 +1513,32 @@
   );
 
 
+  //   F[llpbk]: 2:2
+  prim_subreg #(
+    .DW      (1),
+    .SWACCESS("RW"),
+    .RESVAL  (1'h0)
+  ) u_ctrl_llpbk (
+    .clk_i   (clk_i    ),
+    .rst_ni  (rst_ni  ),
+
+    // from register interface
+    .we     (ctrl_llpbk_we),
+    .wd     (ctrl_llpbk_wd),
+
+    // from internal hardware
+    .de     (1'b0),
+    .d      ('0  ),
+
+    // to internal hardware
+    .qe     (),
+    .q      (reg2hw.ctrl.llpbk.q ),
+
+    // to register interface (read)
+    .qs     (ctrl_llpbk_qs)
+  );
+
+
   // R[status]: V(True)
 
   //   F[fmtfull]: 0:0
@@ -2740,8 +2769,8 @@
     .wd     (stretch_ctrl_stop_wd),
 
     // from internal hardware
-    .de     (1'b0),
-    .d      ('0  ),
+    .de     (hw2reg.stretch_ctrl.stop.de),
+    .d      (hw2reg.stretch_ctrl.stop.d ),
 
     // to internal hardware
     .qe     (),
@@ -2987,6 +3016,9 @@
   assign ctrl_enabletarget_we = addr_hit[3] & reg_we & !reg_error;
   assign ctrl_enabletarget_wd = reg_wdata[1];
 
+  assign ctrl_llpbk_we = addr_hit[3] & reg_we & !reg_error;
+  assign ctrl_llpbk_wd = reg_wdata[2];
+
   assign status_fmtfull_re = addr_hit[4] & reg_re & !reg_error;
 
   assign status_rxfull_re = addr_hit[4] & reg_re & !reg_error;
@@ -3200,6 +3232,7 @@
       addr_hit[3]: begin
         reg_rdata_next[0] = ctrl_enablehost_qs;
         reg_rdata_next[1] = ctrl_enabletarget_qs;
+        reg_rdata_next[2] = ctrl_llpbk_qs;
       end
 
       addr_hit[4]: begin