[dv/spi_device] Add dummy spi transfer and random delay

1. Add dummy sck and dummy csb
2. Add random delay between sck and word
3. Add a test for these 2 items
Signed-off-by: Weicai Yang <weicai@google.com>
diff --git a/hw/dv/sv/spi_agent/seq_lib/spi_host_dummy_seq.sv b/hw/dv/sv/spi_agent/seq_lib/spi_host_dummy_seq.sv
new file mode 100755
index 0000000..f1fde38
--- /dev/null
+++ b/hw/dv/sv/spi_agent/seq_lib/spi_host_dummy_seq.sv
@@ -0,0 +1,20 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+class spi_host_dummy_seq extends spi_base_seq;
+  `uvm_object_utils(spi_host_dummy_seq)
+  `uvm_object_new
+
+  virtual task body();
+    req = spi_item::type_id::create("req");
+    start_item(req);
+    `DV_CHECK_RANDOMIZE_WITH_FATAL(req,
+                                   item_type inside {SpiTransSckNoCsb, SpiTransCsbNoScb};
+                                   data.size() == 1; // no used, set to 1 to simpify randomization
+                                   )
+    finish_item(req);
+    get_response(rsp);
+  endtask
+
+endclass
diff --git a/hw/dv/sv/spi_agent/seq_lib/spi_host_seq.sv b/hw/dv/sv/spi_agent/seq_lib/spi_host_seq.sv
index de16e0a..5afb3bf 100755
--- a/hw/dv/sv/spi_agent/seq_lib/spi_host_seq.sv
+++ b/hw/dv/sv/spi_agent/seq_lib/spi_host_seq.sv
@@ -18,6 +18,7 @@
     req = spi_item::type_id::create("req");
     start_item(req);
     `DV_CHECK_RANDOMIZE_WITH_FATAL(req,
+                                   item_type == SpiTransNormal;
                                    data.size() == local::data.size();
                                    foreach (data[i]) {
                                      data[i] == local::data[i];
diff --git a/hw/dv/sv/spi_agent/seq_lib/spi_seq_list.sv b/hw/dv/sv/spi_agent/seq_lib/spi_seq_list.sv
index 9c939ec..6169475 100644
--- a/hw/dv/sv/spi_agent/seq_lib/spi_seq_list.sv
+++ b/hw/dv/sv/spi_agent/seq_lib/spi_seq_list.sv
@@ -4,3 +4,4 @@
 
 `include "spi_base_seq.sv"
 `include "spi_host_seq.sv"
+`include "spi_host_dummy_seq.sv"
diff --git a/hw/dv/sv/spi_agent/spi_agent.core b/hw/dv/sv/spi_agent/spi_agent.core
index 2e3ac51..1dc7027 100644
--- a/hw/dv/sv/spi_agent/spi_agent.core
+++ b/hw/dv/sv/spi_agent/spi_agent.core
@@ -24,6 +24,7 @@
       - seq_lib/spi_seq_list.sv: {is_include_file: true}
       - seq_lib/spi_base_seq.sv: {is_include_file: true}
       - seq_lib/spi_host_seq.sv: {is_include_file: true}
+      - seq_lib/spi_host_dummy_seq.sv: {is_include_file: true}
     file_type: systemVerilogSource
 
 targets:
diff --git a/hw/dv/sv/spi_agent/spi_agent_cfg.sv b/hw/dv/sv/spi_agent/spi_agent_cfg.sv
index 6b959ed..3436c23 100755
--- a/hw/dv/sv/spi_agent/spi_agent_cfg.sv
+++ b/hw/dv/sv/spi_agent/spi_agent_cfg.sv
@@ -21,6 +21,16 @@
   // how many bytes monitor samples per transaction
   int             num_bytes_per_trans_in_mon = 4;
 
+  // enable randomly injecting extra delay between 2 sck/word
+  bit  en_extra_dly_btw_sck;
+  uint max_extra_dly_ns_btw_sck     = 100;  // small delay to avoid transfer timeout
+  uint extra_dly_chance_pc_btw_sck  = 5;    // percentage of extra delay btw each spi clock edge
+  // Note: can't handle word delay, if a word is splitted into multiple csb.
+  // In that case, control delay in seq level
+  bit  en_extra_dly_btw_word;
+  uint max_extra_dly_ns_btw_word    = 1000; // no timeout btw word
+  uint extra_dly_chance_pc_btw_word = 5;    // percentage of extra delay btw each word
+
   // interface handle used by driver, monitor & the sequencer
   virtual spi_if  vif;
 
@@ -33,6 +43,12 @@
     `uvm_field_int (sck_phase,        UVM_DEFAULT)
     `uvm_field_int (host_bit_dir,     UVM_DEFAULT)
     `uvm_field_int (device_bit_dir,   UVM_DEFAULT)
+    `uvm_field_int (en_extra_dly_btw_sck,         UVM_DEFAULT)
+    `uvm_field_int (max_extra_dly_ns_btw_sck,     UVM_DEFAULT)
+    `uvm_field_int (extra_dly_chance_pc_btw_sck,  UVM_DEFAULT)
+    `uvm_field_int (en_extra_dly_btw_word,        UVM_DEFAULT)
+    `uvm_field_int (max_extra_dly_ns_btw_word,    UVM_DEFAULT)
+    `uvm_field_int (extra_dly_chance_pc_btw_word, UVM_DEFAULT)
   `uvm_object_utils_end
 
   `uvm_object_new
diff --git a/hw/dv/sv/spi_agent/spi_agent_pkg.sv b/hw/dv/sv/spi_agent/spi_agent_pkg.sv
index d52781a..f1bc2b4 100755
--- a/hw/dv/sv/spi_agent/spi_agent_pkg.sv
+++ b/hw/dv/sv/spi_agent/spi_agent_pkg.sv
@@ -13,6 +13,13 @@
   `include "dv_macros.svh"
 
   // local types
+  // transaction type
+  typedef enum {
+    SpiTransNormal,    // normal SPI trans
+    SpiTransSckNoCsb,  // bad SPI trans with clk but no sb
+    SpiTransCsbNoScb   // bad SPI trans with csb but no clk
+  } spi_trans_type_e;
+
   // sck edge type - used by driver and monitor to wait for the right edge based on CPOL / CPHA
   typedef enum {
     LeadingEdge,
diff --git a/hw/dv/sv/spi_agent/spi_host_driver.sv b/hw/dv/sv/spi_agent/spi_host_driver.sv
index 208c0ef..75225d1 100644
--- a/hw/dv/sv/spi_agent/spi_host_driver.sv
+++ b/hw/dv/sv/spi_agent/spi_host_driver.sv
@@ -35,10 +35,12 @@
       forever begin
         if (sck_pulses > 0 || cfg.sck_on) begin
           cfg.vif.sck <= ~cfg.vif.sck;
-          #(cfg.sck_period_ps / 2 * 1ps);
+          #((cfg.sck_period_ps / 2 + get_rand_extra_delay_ns_btw_sck() * 1000) * 1ps);
           cfg.vif.sck <= ~cfg.vif.sck;
-          #(cfg.sck_period_ps / 2 * 1ps);
+          #((cfg.sck_period_ps / 2 + get_rand_extra_delay_ns_btw_sck() * 1000) * 1ps);
           if (sck_pulses > 0) sck_pulses--;
+          // dly after a word transfer is completed
+          if (sck_pulses % 32 == 0) #(get_rand_extra_delay_ns_btw_word() * 1ns);
         end else begin
           @(cfg.sck_on, sck_pulses);
           if (sck_pulses > 0) begin
@@ -67,38 +69,77 @@
       $cast(rsp, req.clone());
       rsp.set_id_info(req);
       `uvm_info(`gfn, $sformatf("spi_host_driver: rcvd item:\n%0s", req.sprint()), UVM_HIGH)
-      cfg.vif.csb <= 1'b0;
-      sck_pulses = req.data.size() * 8;
-
-      // for mode 1 and 3, get the leading edges out of the way
-      cfg.wait_sck_edge(LeadingEdge);
-
-      // drive data
-      for (int i = 0; i < req.data.size(); i++) begin
-        logic [7:0] host_byte;
-        logic [7:0] device_byte;
-        int         which_bit;
-        host_byte = req.data[i];
-        for (int j = 0; j < 8; j++) begin
-          // drive mosi early so that it is stable at the sampling edge
-          which_bit = cfg.host_bit_dir ? j : 7 - j;
-          cfg.vif.mosi <= host_byte[which_bit];
-          // wait for sampling edge to sample miso (half cycle)
-          cfg.wait_sck_edge(SamplingEdge);
-          which_bit = cfg.device_bit_dir ? j : 7 - j;
-          device_byte[which_bit] = cfg.vif.miso;
-          // wait for driving edge to complete 1 cycle
-          if (i != req.data.size() - 1 || j != 7) cfg.wait_sck_edge(DrivingEdge);
-        end
-        rsp.data[i] = device_byte;
-      end
-
-      wait(sck_pulses == 0);
-      cfg.vif.csb <= 1'b1;
-      cfg.vif.mosi <= 1'bx;
+      case (req.item_type)
+        SpiTransNormal:   drive_normal_item();
+        SpiTransSckNoCsb: drive_sck_no_csb_item();
+        SpiTransCsbNoScb: drive_csb_no_sck_item();
+      endcase
       `uvm_info(`gfn, "spi_host_driver: item sent", UVM_HIGH)
       seq_item_port.item_done(rsp);
     end
   endtask
 
+  task drive_normal_item();
+    cfg.vif.csb <= 1'b0;
+    sck_pulses = req.data.size() * 8;
+
+    // for mode 1 and 3, get the leading edges out of the way
+    cfg.wait_sck_edge(LeadingEdge);
+
+    // drive data
+    for (int i = 0; i < req.data.size(); i++) begin
+      logic [7:0] host_byte;
+      logic [7:0] device_byte;
+      int         which_bit;
+      host_byte = req.data[i];
+      for (int j = 0; j < 8; j++) begin
+        // drive mosi early so that it is stable at the sampling edge
+        which_bit = cfg.host_bit_dir ? j : 7 - j;
+        cfg.vif.mosi <= host_byte[which_bit];
+        // wait for sampling edge to sample miso (half cycle)
+        cfg.wait_sck_edge(SamplingEdge);
+        which_bit = cfg.device_bit_dir ? j : 7 - j;
+        device_byte[which_bit] = cfg.vif.miso;
+        // wait for driving edge to complete 1 cycle
+        if (i != req.data.size() - 1 || j != 7) cfg.wait_sck_edge(DrivingEdge);
+      end
+      rsp.data[i] = device_byte;
+    end
+
+    wait(sck_pulses == 0);
+    cfg.vif.csb <= 1'b1;
+    cfg.vif.mosi <= 1'bx;
+  endtask
+
+  task drive_sck_no_csb_item();
+    repeat (req.dummy_clk_cnt) begin
+      #($urandom_range(1, 100) * 1ns);
+      cfg.vif.sck <= ~cfg.vif.sck;
+    end
+    cfg.vif.sck <= cfg.sck_polarity;
+    #1ps; // make sure sck and csb (for next item) not change at the same time
+  endtask
+
+  task drive_csb_no_sck_item();
+    cfg.vif.csb <= 1'b0;
+    #(req.dummy_sck_length_ns * 1ns);
+    cfg.vif.csb <= 1'b1;
+  endtask
+
+  function uint get_rand_extra_delay_ns_btw_sck();
+    if (cfg.en_extra_dly_btw_sck && ($urandom % 100) < cfg.extra_dly_chance_pc_btw_sck) begin
+      return $urandom_range(1, cfg.max_extra_dly_ns_btw_sck);
+    end else begin
+      return 0;
+    end
+  endfunction
+
+  function uint get_rand_extra_delay_ns_btw_word();
+    if (cfg.en_extra_dly_btw_word && ($urandom % 100) < cfg.extra_dly_chance_pc_btw_word) begin
+      return $urandom_range(1, cfg.max_extra_dly_ns_btw_word);
+    end else begin
+      return 0;
+    end
+  endfunction
+
 endclass
diff --git a/hw/dv/sv/spi_agent/spi_item.sv b/hw/dv/sv/spi_agent/spi_item.sv
index 094bbdb..ddcf0ac 100755
--- a/hw/dv/sv/spi_agent/spi_item.sv
+++ b/hw/dv/sv/spi_agent/spi_item.sv
@@ -4,16 +4,31 @@
 
 class spi_item extends uvm_sequence_item;
 
+  rand spi_trans_type_e item_type;
   // byte of data sent or received
   rand logic [7:0] data[$];
 
+  rand uint dummy_clk_cnt;
+  rand uint dummy_sck_length_ns;
+
   // constrain size of data sent / received to be at most 64kB
   constraint data_size_c {
     data.size() inside {[1:65536]};
   }
 
+  constraint dummy_clk_cnt_c {
+    dummy_clk_cnt inside {[1:1000]};
+  }
+
+  constraint dummy_sck_length_c {
+    dummy_sck_length_ns inside {[1:1000]};
+  }
+
   `uvm_object_utils_begin(spi_item)
-    `uvm_field_queue_int(data, UVM_DEFAULT)
+    `uvm_field_enum(spi_trans_type_e, item_type, UVM_DEFAULT)
+    `uvm_field_queue_int(data,              UVM_DEFAULT)
+    `uvm_field_int(dummy_clk_cnt,           UVM_DEFAULT)
+    `uvm_field_int(dummy_sck_length_ns,     UVM_DEFAULT)
   `uvm_object_utils_end
 
   `uvm_object_new
diff --git a/hw/dv/sv/spi_agent/spi_monitor.sv b/hw/dv/sv/spi_agent/spi_monitor.sv
index 21d8205..fc2c510 100755
--- a/hw/dv/sv/spi_agent/spi_monitor.sv
+++ b/hw/dv/sv/spi_agent/spi_monitor.sv
@@ -42,8 +42,6 @@
   endtask
 
   virtual protected task collect_curr_trans();
-    // for mode 1 and 3, get the leading edges out of the way
-    cfg.wait_sck_edge(LeadingEdge);
 
     fork
       begin: isolation_thread
@@ -51,47 +49,51 @@
           begin: csb_deassert_thread
             wait(cfg.vif.csb == 1'b1);
           end
-          forever begin: sample_thread
-            logic [7:0] host_byte;    // from mosi
-            logic [7:0] device_byte;  // from miso
-            int         which_bit;
-            for (int i = 0; i < 8; i++) begin
-              // wait for the sampling edge
-              cfg.wait_sck_edge(SamplingEdge);
-              // check mosi/miso not x or z
-              if (cfg.en_monitor_checks) begin
-                `DV_CHECK_CASE_NE(cfg.vif.mosi, 1'bx)
-                `DV_CHECK_CASE_NE(cfg.vif.mosi, 1'bz)
-                `DV_CHECK_CASE_NE(cfg.vif.miso, 1'bx)
-                `DV_CHECK_CASE_NE(cfg.vif.miso, 1'bz)
+          begin: sample_thread
+            // for mode 1 and 3, get the leading edges out of the way
+            cfg.wait_sck_edge(LeadingEdge);
+            forever begin
+              logic [7:0] host_byte;    // from mosi
+              logic [7:0] device_byte;  // from miso
+              int         which_bit;
+              for (int i = 0; i < 8; i++) begin
+                // wait for the sampling edge
+                cfg.wait_sck_edge(SamplingEdge);
+                // check mosi/miso not x or z
+                if (cfg.en_monitor_checks) begin
+                  `DV_CHECK_CASE_NE(cfg.vif.mosi, 1'bx)
+                  `DV_CHECK_CASE_NE(cfg.vif.mosi, 1'bz)
+                  `DV_CHECK_CASE_NE(cfg.vif.miso, 1'bx)
+                  `DV_CHECK_CASE_NE(cfg.vif.miso, 1'bz)
+                end
+                // sample mosi
+                which_bit = cfg.host_bit_dir ? i : 7 - i;
+                host_byte[which_bit] = cfg.vif.mosi;
+                cfg.vif.host_bit = which_bit;
+                cfg.vif.host_byte = host_byte;
+                // sample miso
+                which_bit = cfg.device_bit_dir ? i : 7 - i;
+                device_byte[which_bit] = cfg.vif.miso;
+                cfg.vif.device_bit = which_bit;
+                cfg.vif.device_byte = device_byte;
               end
-              // sample mosi
-              which_bit = cfg.host_bit_dir ? i : 7 - i;
-              host_byte[which_bit] = cfg.vif.mosi;
-              cfg.vif.host_bit = which_bit;
-              cfg.vif.host_byte = host_byte;
-              // sample miso
-              which_bit = cfg.device_bit_dir ? i : 7 - i;
-              device_byte[which_bit] = cfg.vif.miso;
-              cfg.vif.device_bit = which_bit;
-              cfg.vif.device_byte = device_byte;
-            end
-            host_item.data.push_back(host_byte);
-            device_item.data.push_back(device_byte);
+              host_item.data.push_back(host_byte);
+              device_item.data.push_back(device_byte);
 
-            // sending transactions when collect a word data
-            if (host_item.data.size == cfg.num_bytes_per_trans_in_mon &&
-                device_item.data.size == cfg.num_bytes_per_trans_in_mon) begin
-              `uvm_info(`gfn, $sformatf("spi_monitor: host packet:\n%0s", host_item.sprint()),
-                        UVM_HIGH)
-              `uvm_info(`gfn, $sformatf("spi_monitor: device packet:\n%0s", device_item.sprint()),
-                        UVM_HIGH)
-              host_analysis_port.write(host_item);
-              device_analysis_port.write(device_item);
-              host_item   = spi_item::type_id::create("host_item", this);
-              device_item = spi_item::type_id::create("device_item", this);
-            end
-          end
+              // sending transactions when collect a word data
+              if (host_item.data.size == cfg.num_bytes_per_trans_in_mon &&
+                  device_item.data.size == cfg.num_bytes_per_trans_in_mon) begin
+                `uvm_info(`gfn, $sformatf("spi_monitor: host packet:\n%0s", host_item.sprint()),
+                          UVM_HIGH)
+                `uvm_info(`gfn, $sformatf("spi_monitor: device packet:\n%0s", device_item.sprint()),
+                          UVM_HIGH)
+                host_analysis_port.write(host_item);
+                device_analysis_port.write(device_item);
+                host_item   = spi_item::type_id::create("host_item", this);
+                device_item = spi_item::type_id::create("device_item", this);
+              end
+            end // forever
+          end: sample_thread
         join_any
         disable fork;
       end
diff --git a/hw/ip/spi_device/data/spi_device_testplan.hjson b/hw/ip/spi_device/data/spi_device_testplan.hjson
index 1414498..e3251f0 100644
--- a/hw/ip/spi_device/data/spi_device_testplan.hjson
+++ b/hw/ip/spi_device/data/spi_device_testplan.hjson
@@ -28,7 +28,7 @@
             - Send SPI transfer unless TX is empty or RX is full
             - Read RX memory unless RX is empty'''
       milestone: V2
-      tests: [""]
+      tests: ["spi_device_txrx"]
     }
     {
       name: fifo_full
@@ -37,7 +37,7 @@
             - Reduce delay to write TX memory
             - Increase delay to read RX memory'''
       milestone: V2
-      tests: [""]
+      tests: ["spi_device_fifo_full"]
     }
     {
       name: fifo_underflow_overflow
@@ -49,14 +49,31 @@
               mis-aligned
             - Ensure underflow/overflow is triggered correctly'''
       milestone: V2
-      tests: [""]
+      tests: ["spi_device_fifo_underflow_overflow"]
+    }
+    {
+      name: dummy_sck_and_dummy_csb
+      desc: '''
+            Drive dummy sck without csb or drive dummy csb without sck, and test no impact on the
+            design'''
+      milestone: V2
+      tests: ["spi_device_dummy_item_extra_dly"]
+    }
+    {
+      name: extra_delay_on_spi
+      desc: '''
+            Add extra delay between spi clock edge or extra delay between 2 words data
+            This is to test host pause transfer for a while without turning off csb and then stream
+            in data again'''
+      milestone: V2
+      tests: ["spi_device_dummy_item_extra_dly"]
     }
     {
       name: async_fifo_reset
       desc: '''Reset async fifo when SPI interface is idle
             TODO: fifo may be fetching data from SRAM? What is the actual usage?'''
       milestone: V2
-      tests: [""]
+      tests: ["spi_device_async_fifo_reset"]
     }
     {
       name: interrupts
@@ -67,20 +84,20 @@
             - rx error
             - overflow/underflow'''
       milestone: V2
-      tests: [""]
+      tests: ["spi_device_interrupts"]
     }
     {
       name: abort
       desc: '''
             TODO: Need to clarify the behavior in spec'''
       milestone: V2
-      tests: [""]
+      tests: ["spi_device_abort"]
     }
     {
       name: byte_transfer_on_spi
       desc: '''send spi transfer on byte granularity, and make sure the timer never expires'''
       milestone: V2
-      tests: [""]
+      tests: ["spi_device_byte_transfer"]
     }
     {
       name: rx_timeout
@@ -90,7 +107,7 @@
               model the timer feature
             - Note: Timeout only for RX'''
       milestone: V2
-      tests: [""]
+      tests: ["spi_device_rx_timeout"]
     }
     {
       name: bit_transfer_on_spi
@@ -99,19 +116,19 @@
             - If TX drives < 7 bits, this byte will be sent in next CSB.
             - If TX drives 7 bits and set CSB to high, this byte won't be sent in next CSB'''
       milestone: V2
-      tests: [""]
+      tests: ["spi_device_bit_transfer"]
     }
     {
       name: extreme_fifo_setting
       desc: '''Set fifo size to 4 bytes(minimum), 2k-4bytes(maximum) and others'''
       milestone: V2
-      tests: [""]
+      tests: ["spi_device_extreme_fifo_size"]
     }
     {
       name: mode
       desc: '''TODO :only support fw mode now'''
       milestone: V2
-      tests: [""]
+      tests: ["spi_device_mode"]
     }
     {
       name: mem_ecc
@@ -120,27 +137,13 @@
             - Just cover basic functionality and connectivity
             - Complete verification will be done by PFV'''
       milestone: V2
-      tests: [""]
+      tests: ["spi_device_mem_ecc"]
     }
     {
       name: perf
       desc: '''Run spi_device_fifi_full_vseq with very small delays'''
       milestone: V2
-      tests: [""]
-    }
-    {
-      name: stress_all
-      desc: '''
-            - Combine above sequences in one test to run sequentially, except csr sequence
-            - Randomly add reset between each sequence'''
-      milestone: V2
-      tests: [""]
-    }
-    {
-      name: stress_all_with_reset
-      desc: '''Have random reset in parallel with stress_all and tl_errors sequences'''
-      milestone: V2
-      tests: [""]
+      tests: ["spi_device_perf"]
     }
   ]
 }
diff --git a/hw/ip/spi_device/dv/Makefile b/hw/ip/spi_device/dv/Makefile
index 6c968df..030983e 100644
--- a/hw/ip/spi_device/dv/Makefile
+++ b/hw/ip/spi_device/dv/Makefile
@@ -49,6 +49,10 @@
   UVM_TEST_SEQ   = spi_device_extreme_fifo_size_vseq
 endif
 
+ifeq (${TEST_NAME},spi_device_dummy_item_extra_dly)
+  UVM_TEST_SEQ   = spi_device_dummy_item_extra_dly_vseq
+endif
+
 ####################################################################################################
 ## Include the tool Makefile below                                                                ##
 ## Dont add anything else below it!                                                               ##
diff --git a/hw/ip/spi_device/dv/env/seq_lib/spi_device_base_vseq.sv b/hw/ip/spi_device/dv/env/seq_lib/spi_device_base_vseq.sv
index 31b4fd5..d5aad8e 100644
--- a/hw/ip/spi_device/dv/env/seq_lib/spi_device_base_vseq.sv
+++ b/hw/ip/spi_device/dv/env/seq_lib/spi_device_base_vseq.sv
@@ -131,7 +131,7 @@
 
   // set a byte of data via host agent, receive a byte of data from spi_device
   virtual task spi_host_xfer_byte(logic [7:0] host_data, ref logic [7:0] device_data);
-    spi_host_seq m_spi_host_seq = spi_host_seq::type_id::create("m_spi_host_seq");
+    spi_host_seq m_spi_host_seq;
     `uvm_create_on(m_spi_host_seq, p_sequencer.spi_sequencer_h)
     `DV_CHECK_RANDOMIZE_WITH_FATAL(m_spi_host_seq,
                                    data.size() == 1;
@@ -142,7 +142,7 @@
 
   // set a word (32 bits) of data via host agent, receive a word of data from spi_device
   virtual task spi_host_xfer_word(logic [31:0] host_data, ref logic [31:0] device_data);
-    spi_host_seq m_spi_host_seq = spi_host_seq::type_id::create("m_spi_host_seq");
+    spi_host_seq m_spi_host_seq;
     byte data_bytes[SRAM_WORD_SIZE];
     {<<8{data_bytes}} = host_data;
     `uvm_create_on(m_spi_host_seq, p_sequencer.spi_sequencer_h)
@@ -156,7 +156,7 @@
   // set a random chunk of bytes of data via host agent and receive same number of data from device
   virtual task spi_host_xfer_bytes(int num_bytes = $urandom_range(1, 512),
                                    ref logic [7:0] device_data[$]);
-    spi_host_seq m_spi_host_seq = spi_host_seq::type_id::create("m_spi_host_seq");
+    spi_host_seq m_spi_host_seq;
     `uvm_create_on(m_spi_host_seq, p_sequencer.spi_sequencer_h)
     `DV_CHECK_RANDOMIZE_WITH_FATAL(m_spi_host_seq, data.size() == num_bytes;)
     `uvm_send(m_spi_host_seq)
diff --git a/hw/ip/spi_device/dv/env/seq_lib/spi_device_dummy_item_extra_dly_vseq.sv b/hw/ip/spi_device/dv/env/seq_lib/spi_device_dummy_item_extra_dly_vseq.sv
new file mode 100644
index 0000000..1101bd6
--- /dev/null
+++ b/hw/ip/spi_device/dv/env/seq_lib/spi_device_dummy_item_extra_dly_vseq.sv
@@ -0,0 +1,36 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// test with more dummy csk/csb and more extra delay btw csk/word
+class spi_device_dummy_item_extra_dly_vseq extends spi_device_txrx_vseq;
+  `uvm_object_utils(spi_device_dummy_item_extra_dly_vseq)
+  `uvm_object_new
+
+  constraint en_dummy_host_xfer_c {
+    en_dummy_host_xfer == 1;
+  }
+
+  constraint en_extra_dly_c {
+    en_extra_dly == 1;
+  }
+
+  virtual task spi_device_init();
+    super.spi_device_init();
+    // use more aggressive delay, but if higher than below values, timeout may happen
+    randcase
+      1: begin
+        cfg.m_spi_agent_cfg.max_extra_dly_ns_btw_sck    = 100;
+        cfg.m_spi_agent_cfg.extra_dly_chance_pc_btw_sck = 20;
+      end
+      1: begin
+        cfg.m_spi_agent_cfg.max_extra_dly_ns_btw_sck    = 50;
+        cfg.m_spi_agent_cfg.extra_dly_chance_pc_btw_sck = 50;
+      end
+    endcase
+    // no timeout concern for delay between word
+    cfg.m_spi_agent_cfg.max_extra_dly_ns_btw_word = 10000;
+    cfg.m_spi_agent_cfg.extra_dly_chance_pc_btw_word = 20;
+  endtask
+
+endclass : spi_device_dummy_item_extra_dly_vseq
diff --git a/hw/ip/spi_device/dv/env/seq_lib/spi_device_fifo_underflow_overflow_vseq.sv b/hw/ip/spi_device/dv/env/seq_lib/spi_device_fifo_underflow_overflow_vseq.sv
index 8d6a0b4..16514a3 100644
--- a/hw/ip/spi_device/dv/env/seq_lib/spi_device_fifo_underflow_overflow_vseq.sv
+++ b/hw/ip/spi_device/dv/env/seq_lib/spi_device_fifo_underflow_overflow_vseq.sv
@@ -29,13 +29,12 @@
     end
     // there are some underflow data in fifo, clean them up
     // repeat twice in case some data in async_fifo when sram fifo is full
-    repeat (2) begin
+    for (uint i = 0; i < 2; i++) begin
       read_rx_avail_bytes(SramDataAvail, rx_avail_bytes);
       if (rx_avail_bytes == 0) break;
       read_host_words_rcvd(rx_avail_bytes / SRAM_WORD_SIZE, device_words_q);
-      // if sram fifo was full, data in async fifo will transfer to sram after sram fifo ptr
-      // is updated. Wait until data transfer is done
-      if (rx_avail_bytes == `get_rx_allocated_sram_size_bytes) begin
+      // in case data is transferred from async fifo, wait until transfer is done
+      if (i == 0) begin
         csr_spinwait(.ptr(ral.async_fifo_level.rxlvl), .exp_data(0));
         cfg.clk_rst_vif.wait_clks(2); // 2 cycle for fifo ptr to be updated
       end
diff --git a/hw/ip/spi_device/dv/env/seq_lib/spi_device_txrx_vseq.sv b/hw/ip/spi_device/dv/env/seq_lib/spi_device_txrx_vseq.sv
index 5dea488..9d3ba24 100644
--- a/hw/ip/spi_device/dv/env/seq_lib/spi_device_txrx_vseq.sv
+++ b/hw/ip/spi_device/dv/env/seq_lib/spi_device_txrx_vseq.sv
@@ -24,6 +24,9 @@
   rand uint host_sram_word_size;
   rand uint device_sram_word_size;
 
+  rand bit  en_dummy_host_xfer;
+  rand bit  en_extra_dly;
+
   // semaphores to avoid updating fifo ptr when over/underflow is happening. Issue #103
   semaphore tx_ptr_sema, rx_ptr_sema;
   bit       allow_underflow_overflow;
@@ -65,6 +68,13 @@
     num_trans == 5;
   }
 
+  constraint en_dummy_host_xfer_c {
+    en_dummy_host_xfer dist {
+      0 :/ 4,
+      1 :/ 1 // 20% enable dummy transfer
+    };
+  }
+
   // lower 2 bits are ignored, use word granularity to contrain the sram setting
   constraint sram_constraints_c {
     // if limit is 0, it means 1 word
@@ -95,12 +105,17 @@
       0 :/ 1
     };
   }
+  virtual task spi_device_init();
+    super.spi_device_init();
+    cfg.m_spi_agent_cfg.en_extra_dly_btw_sck  = en_extra_dly;
+    cfg.m_spi_agent_cfg.en_extra_dly_btw_word = en_extra_dly;
+  endtask
 
   virtual task body();
     tx_ptr_sema = new(1);
     rx_ptr_sema = new(1);
     for (int i = 1; i <= num_trans; i++) begin
-      bit done_tx_write, done_rx_read;
+      bit done_tx_write, done_rx_read, done_xfer;
       `uvm_info(`gfn, $sformatf("starting sequence %0d/%0d", i, num_trans), UVM_LOW)
       `DV_CHECK_RANDOMIZE_FATAL(this)
       spi_device_init();
@@ -115,6 +130,14 @@
         end
         begin
           while (!done_tx_write || !done_rx_read) process_spi_xfer();
+          done_xfer = 1;
+        end
+        begin // drive dummy host item
+          while (!done_xfer && en_dummy_host_xfer) begin
+            `DV_CHECK_MEMBER_RANDOMIZE_FATAL(tx_delay)
+            cfg.clk_rst_vif.wait_clks(tx_delay);
+            spi_host_xfer_dummy_item();
+          end
         end
       join
       check_for_tx_rx_idle();
@@ -232,4 +255,11 @@
     `uvm_info(`gfn, "done process_spi_xfer", UVM_MEDIUM)
   endtask : process_spi_xfer
 
+  // send dummy item
+  virtual task spi_host_xfer_dummy_item();
+    spi_host_dummy_seq m_spi_host_seq;
+    `uvm_create_on(m_spi_host_seq, p_sequencer.spi_sequencer_h)
+    `uvm_send(m_spi_host_seq)
+  endtask
+
 endclass : spi_device_txrx_vseq
diff --git a/hw/ip/spi_device/dv/env/seq_lib/spi_device_vseq_list.sv b/hw/ip/spi_device/dv/env/seq_lib/spi_device_vseq_list.sv
index 643e7bf..c98998e 100644
--- a/hw/ip/spi_device/dv/env/seq_lib/spi_device_vseq_list.sv
+++ b/hw/ip/spi_device/dv/env/seq_lib/spi_device_vseq_list.sv
@@ -9,3 +9,4 @@
 `include "spi_device_fifo_full_vseq.sv"
 `include "spi_device_fifo_underflow_overflow_vseq.sv"
 `include "spi_device_extreme_fifo_size_vseq.sv"
+`include "spi_device_dummy_item_extra_dly_vseq.sv"
diff --git a/hw/ip/spi_device/dv/env/spi_device_env.core b/hw/ip/spi_device/dv/env/spi_device_env.core
index c0e49a7..2ab1222 100644
--- a/hw/ip/spi_device/dv/env/spi_device_env.core
+++ b/hw/ip/spi_device/dv/env/spi_device_env.core
@@ -25,6 +25,7 @@
       - seq_lib/spi_device_extreme_fifo_size_vseq.sv: {is_include_file: true}
       - seq_lib/spi_device_txrx_vseq.sv: {is_include_file: true}
       - seq_lib/spi_device_fifo_full_vseq.sv: {is_include_file: true}
+      - seq_lib/spi_device_dummy_item_extra_dly_vseq.sv: {is_include_file: true}
     file_type: systemVerilogSource
 
 targets:
diff --git a/hw/ip/spi_device/dv/spi_device_sim_cfg.hjson b/hw/ip/spi_device/dv/spi_device_sim_cfg.hjson
index 88e06ff..7e3fc4a 100644
--- a/hw/ip/spi_device/dv/spi_device_sim_cfg.hjson
+++ b/hw/ip/spi_device/dv/spi_device_sim_cfg.hjson
@@ -25,9 +25,11 @@
                 "{proj_root}/hw/dv/data/common_sim_cfg.hjson",
                 // Common CIP test lists
                 "{proj_root}/hw/dv/data/tests/csr_tests.hjson",
+                "{proj_root}/hw/dv/data/tests/mem_tests.hjson",
                 "{proj_root}/hw/dv/data/tests/intr_test.hjson",
-                "{proj_root}/hw/dv/data/tests/tl_access_tests.hjson",
-                "{proj_root}/hw/dv/data/tests/stress_tests.hjson"]
+                "{proj_root}/hw/dv/data/tests/tl_access_tests.hjson"
+                //"{proj_root}/hw/dv/data/tests/stress_tests.hjson" // no test yet
+                ]
 
   // Add additional tops for simulation.
   sim_tops: ["-top spi_device_bind"]
@@ -65,5 +67,10 @@
       name: spi_device_extreme_fifo_size
       uvm_test_seq: spi_device_extreme_fifo_size_vseq
     }
+
+    {
+      name: spi_device_dummy_item_extra_dly
+      uvm_test_seq: spi_device_dummy_item_extra_dly_vseq
+    }
   ]
 }