[dv/top-level] use SPI to load bootstrap

Use spi to load sw image, but its sim time is 10x more than using
backdoor. Only apply to uart test for now

Signed-off-by: Weicai Yang <weicai@google.com>
diff --git a/hw/dv/data/sim.mk b/hw/dv/data/sim.mk
index 72d111a..d69f8f8 100644
--- a/hw/dv/data/sim.mk
+++ b/hw/dv/data/sim.mk
@@ -76,6 +76,15 @@
 	# Compile the sw test code and generate the image.
 	${LOCK_SW_BUILD} "ninja -C ${sw_build_dir}/build-out \
 		${sw_test}_export_${sw_build_device}"
+	# Convert sw image to frame format
+	# TODO only needed for loading sw image through SPI. Can enhance this later
+	${LOCK_SW_BUILD} "ninja -C ${sw_build_dir}/build-out sw/host/spiflash/spiflash_export"
+	${LOCK_SW_BUILD} "${sw_build_dir}/build-bin/sw/host/spiflash/spiflash --input \
+		${sw_build_dir}/build-bin/${sw_test}_${sw_build_device}.bin \
+		--dump-frames=${run_dir}/sw.frames.bin"
+	${LOCK_SW_BUILD} "srec_cat ${run_dir}/sw.frames.bin --binary \
+		--offset 0x0 --byte-swap 4 --fill 0xff -within ${run_dir}/sw.frames.bin -binary -range-pad 4 \
+		--output ${run_dir}/sw.frames.vmem --vmem"
 	# Extract the sw test logs.
 	${proj_root}/util/device_sw_utils/extract_sw_logs.py \
 		-e "${sw_build_dir}/build-out/${sw_test}_${sw_build_device}.elf" \
diff --git a/hw/dv/sv/spi_agent/spi_agent_cfg.sv b/hw/dv/sv/spi_agent/spi_agent_cfg.sv
index 3436c23..cf8e025 100755
--- a/hw/dv/sv/spi_agent/spi_agent_cfg.sv
+++ b/hw/dv/sv/spi_agent/spi_agent_cfg.sv
@@ -23,11 +23,13 @@
 
   // enable randomly injecting extra delay between 2 sck/word
   bit  en_extra_dly_btw_sck;
+  uint min_extra_dly_ns_btw_sck     = 1;
   uint max_extra_dly_ns_btw_sck     = 100;  // small delay to avoid transfer timeout
   uint extra_dly_chance_pc_btw_sck  = 5;    // percentage of extra delay btw each spi clock edge
   // Note: can't handle word delay, if a word is splitted into multiple csb.
   // In that case, control delay in seq level
   bit  en_extra_dly_btw_word;
+  uint min_extra_dly_ns_btw_word    = 1;
   uint max_extra_dly_ns_btw_word    = 1000; // no timeout btw word
   uint extra_dly_chance_pc_btw_word = 5;    // percentage of extra delay btw each word
 
diff --git a/hw/dv/sv/spi_agent/spi_host_driver.sv b/hw/dv/sv/spi_agent/spi_host_driver.sv
index e01f613..6263214 100644
--- a/hw/dv/sv/spi_agent/spi_host_driver.sv
+++ b/hw/dv/sv/spi_agent/spi_host_driver.sv
@@ -128,7 +128,7 @@
 
   function uint get_rand_extra_delay_ns_btw_sck();
     if (cfg.en_extra_dly_btw_sck && ($urandom % 100) < cfg.extra_dly_chance_pc_btw_sck) begin
-      return $urandom_range(1, cfg.max_extra_dly_ns_btw_sck);
+      return $urandom_range(cfg.min_extra_dly_ns_btw_sck, cfg.max_extra_dly_ns_btw_sck);
     end else begin
       return 0;
     end
@@ -136,7 +136,7 @@
 
   function uint get_rand_extra_delay_ns_btw_word();
     if (cfg.en_extra_dly_btw_word && ($urandom % 100) < cfg.extra_dly_chance_pc_btw_word) begin
-      return $urandom_range(1, cfg.max_extra_dly_ns_btw_word);
+      return $urandom_range(cfg.min_extra_dly_ns_btw_word, cfg.max_extra_dly_ns_btw_word);
     end else begin
       return 0;
     end
diff --git a/hw/top_earlgrey/dv/chip_sim_cfg.hjson b/hw/top_earlgrey/dv/chip_sim_cfg.hjson
index 9e896b1..6e683cc 100644
--- a/hw/top_earlgrey/dv/chip_sim_cfg.hjson
+++ b/hw/top_earlgrey/dv/chip_sim_cfg.hjson
@@ -138,6 +138,12 @@
       sw_test: sw/device/tests/uart_tx_rx_test
     }
     {
+      name: chip_uart_tx_rx_bootstrap
+      uvm_test_seq: chip_sw_uart_tx_rx_vseq
+      sw_test: sw/device/tests/uart_tx_rx_test
+      run_opts: ["+use_spi_load_bootstrap=1"]
+    }
+    {
       name: chip_aes_encr
       uvm_test_seq: chip_sw_base_vseq
       sw_test: sw/device/tests/aes_test
diff --git a/hw/top_earlgrey/dv/env/chip_env_cfg.sv b/hw/top_earlgrey/dv/env/chip_env_cfg.sv
index 4c61ff3..c486987 100644
--- a/hw/top_earlgrey/dv/env/chip_env_cfg.sv
+++ b/hw/top_earlgrey/dv/env/chip_env_cfg.sv
@@ -14,6 +14,9 @@
   // Write logs from sw test to separate log file as well, in addition to the simulator log file.
   bit                 write_sw_logs_to_file = 1'b1;
 
+  // use spi or backdoor to load bootstrap
+  bit                 use_spi_load_bootstrap = 0;
+
   // chip top interfaces
   virtual clk_rst_if  usb_clk_rst_vif;
   gpio_vif            gpio_vif;
@@ -28,6 +31,7 @@
   sw_logger_vif             sw_logger_vif;
   int sw_image_widths[]     = '{32, 64};
   string                    sw_images[string];
+  string                    sw_frame_image = "sw.frames.vmem";
   virtual sw_test_status_if sw_test_status_vif;
   uint                      sw_test_timeout_ns = 5_000_000; // 5ms
 
diff --git a/hw/top_earlgrey/dv/env/chip_env_pkg.sv b/hw/top_earlgrey/dv/env/chip_env_pkg.sv
index 50d7370..49c161d 100644
--- a/hw/top_earlgrey/dv/env/chip_env_pkg.sv
+++ b/hw/top_earlgrey/dv/env/chip_env_pkg.sv
@@ -27,6 +27,7 @@
 
   // local parameters and types
   parameter uint NUM_GPIOS = 16;
+  parameter uint SPI_FRAME_BYTE_SIZE = 1024;
 
   // SW constants
   parameter bit [TL_AW-1:0] SW_DV_LOG_ADDR = 32'h1000fffc;
diff --git a/hw/top_earlgrey/dv/env/seq_lib/chip_base_vseq.sv b/hw/top_earlgrey/dv/env/seq_lib/chip_base_vseq.sv
index f04b74b..3b5c69c 100644
--- a/hw/top_earlgrey/dv/env/seq_lib/chip_base_vseq.sv
+++ b/hw/top_earlgrey/dv/env/seq_lib/chip_base_vseq.sv
@@ -70,7 +70,7 @@
     if (do_strap_pins_init) begin
       cfg.srst_n_vif.drive(1'b1);
       cfg.jtag_spi_n_vif.drive(1'b1); // Select JTAG.
-      cfg.bootstrap_vif.drive(1'b0);
+      cfg.bootstrap_vif.drive(cfg.use_spi_load_bootstrap);
     end
 
     // Now safe to do DUT init.
diff --git a/hw/top_earlgrey/dv/env/seq_lib/chip_sw_base_vseq.sv b/hw/top_earlgrey/dv/env/seq_lib/chip_sw_base_vseq.sv
index 6bebea0..993a510 100644
--- a/hw/top_earlgrey/dv/env/seq_lib/chip_sw_base_vseq.sv
+++ b/hw/top_earlgrey/dv/env/seq_lib/chip_sw_base_vseq.sv
@@ -39,7 +39,11 @@
     // Backdoor load memories with sw images.
     cfg.mem_bkdr_vifs[Rom].load_mem_from_file(cfg.sw_images["rom"]);
     // TODO: the location of the main execution image should be randomized for either bank in future
-    cfg.mem_bkdr_vifs[FlashBank0].load_mem_from_file(cfg.sw_images["sw"]);
+    if (cfg.use_spi_load_bootstrap) begin
+      spi_device_load_bootstrap();
+    end else begin
+      cfg.mem_bkdr_vifs[FlashBank0].load_mem_from_file(cfg.sw_images["sw"]);
+    end
     cfg.sw_test_status_vif.sw_test_status = SwTestStatusBooted;
   endtask
 
@@ -82,4 +86,60 @@
     endcase
   endfunction
 
+  virtual task spi_device_load_bootstrap();
+    spi_host_seq m_spi_host_seq;
+    byte sw_byte_q[$];
+    uint byte_cnt;
+
+    // wait until spi init is done
+    // TODO, in some cases though, we might use UART logger instead of SW logger - need to keep that
+    // in mind
+    wait(cfg.sw_logger_vif.printed_log == "HW initialisation completed, waiting for SPI input...");
+    cfg.jtag_spi_n_vif.drive(0); // Select SPI
+
+    // for the first frame of data, sdo from chip is unknown, ignore checking that
+    cfg.m_spi_agent_cfg.en_monitor_checks = 0;
+
+    read_sw_frames(sw_byte_q);
+
+    `DV_CHECK_EQ_FATAL((sw_byte_q.size % SPI_FRAME_BYTE_SIZE), 0,
+                       "SPI data isn't aligned with frame size")
+
+    while (sw_byte_q.size > byte_cnt) begin
+      `uvm_create_on(m_spi_host_seq, p_sequencer.spi_sequencer_h)
+      `DV_CHECK_RANDOMIZE_WITH_FATAL(m_spi_host_seq,
+                                     data.size() == SPI_FRAME_BYTE_SIZE;
+                                     foreach (data[i]) {data[i] == sw_byte_q[byte_cnt+i];})
+      `uvm_send(m_spi_host_seq)
+      if (byte_cnt == 0) begin
+        // SW erase flash after receiving 1st frame
+        wait(cfg.sw_logger_vif.printed_log == "Flash erase successful");
+        // sdo for next frame shouldn't be unknown
+        cfg.m_spi_agent_cfg.en_monitor_checks = 1;
+      end
+
+      cfg.clk_rst_vif.wait_clks(20_000);
+      byte_cnt += SPI_FRAME_BYTE_SIZE;
+    end
+  endtask
+
+  virtual function void read_sw_frames(ref byte sw_byte_q[$]);
+    int num_returns;
+    int mem_fd = $fopen(cfg.sw_frame_image, "r");
+    bit [31:0] word_data[7];
+    string addr;
+
+    while (!$feof(mem_fd)) begin
+      num_returns = $fscanf(mem_fd, "%s %h %h %h %h %h %h %h", addr, word_data[0], word_data[1],
+                            word_data[2], word_data[3], word_data[4], word_data[5], word_data[6]);
+      if (num_returns <= 1) continue;
+      for (int i = 0; i < num_returns - 1; i++) begin
+        repeat (4) begin
+          sw_byte_q.push_back(word_data[i][7:0]);
+          word_data[i] = word_data[i] >> 8;
+        end
+      end
+    end
+    $fclose(mem_fd);
+  endfunction
 endclass : chip_sw_base_vseq
diff --git a/hw/top_earlgrey/dv/tests/chip_base_test.sv b/hw/top_earlgrey/dv/tests/chip_base_test.sv
index ba1460d..5b33c56 100644
--- a/hw/top_earlgrey/dv/tests/chip_base_test.sv
+++ b/hw/top_earlgrey/dv/tests/chip_base_test.sv
@@ -39,6 +39,10 @@
 
     // Knob to pre-initialize RAM to 0s (disabled by default).
     void'($value$plusargs("initialize_ram=%0b", cfg.initialize_ram));
+
+    // Knob to use spi or backdoor to load bootstrap
+    void'($value$plusargs("use_spi_load_bootstrap=%0b", cfg.use_spi_load_bootstrap));
+
   endfunction : build_phase
 
 endclass : chip_base_test