[spi_device/dv] randomize sram setting and add test for extreme size

1. randomly allocate memory
2. add extreme test with min/max sram fifo size
diff --git a/hw/ip/spi_device/dv/Makefile b/hw/ip/spi_device/dv/Makefile
index 78f11b9..808862a 100644
--- a/hw/ip/spi_device/dv/Makefile
+++ b/hw/ip/spi_device/dv/Makefile
@@ -77,6 +77,10 @@
   UVM_TEST_SEQ   = spi_device_fifo_underflow_overflow_vseq
 endif
 
+ifeq (${TEST_NAME},spi_device_extreme_fifo_size)
+  UVM_TEST_SEQ   = spi_device_extreme_fifo_size_vseq
+endif
+
 ####################################################################################################
 ## Include the tool Makefile below                                                                ##
 ## Dont add anything else below it!                                                               ##
diff --git a/hw/ip/spi_device/dv/env/seq_lib/spi_device_base_vseq.sv b/hw/ip/spi_device/dv/env/seq_lib/spi_device_base_vseq.sv
index 30ac945..2ca6cc5 100644
--- a/hw/ip/spi_device/dv/env/seq_lib/spi_device_base_vseq.sv
+++ b/hw/ip/spi_device/dv/env/seq_lib/spi_device_base_vseq.sv
@@ -10,7 +10,8 @@
     );
   `uvm_object_utils(spi_device_base_vseq)
 
-  bit do_spi_device_init = 1'b0;
+  bit do_spi_device_init    = 1'b0;
+  bit do_spi_device_mem_cfg = 1'b1;
 
   bit [1:0] spi_mode = 0; // TODO fixed value in spec now
 
@@ -19,13 +20,13 @@
   rand bit host_bit_dir;
   rand bit device_bit_dir;
 
-  rand bit [31:0] sram_host_base_addr;
-  rand bit [31:0] sram_host_limit_addr;
-  rand bit [31:0] sram_device_base_addr;
-  rand bit [31:0] sram_device_limit_addr;
+  rand uint sram_host_base_addr;
+  rand uint sram_host_limit_addr;
+  rand uint sram_device_base_addr;
+  rand uint sram_device_limit_addr;
 
-  // TODO: remove this eventually
-  constraint sanity_constraints_c {
+  // override it in random seq
+  constraint sram_constraints_c {
     // host and device addr space within sram should not overlap
     sram_host_base_addr == 32'h0;
     sram_host_limit_addr == 32'h1ff; // 512 bytes
@@ -45,10 +46,6 @@
     if (do_spi_device_init) spi_device_init();
   endtask
 
-  virtual task dut_shutdown();
-    super.dut_shutdown();
-  endtask
-
   // check if any remaining data
   virtual task check_for_tx_rx_idle();
     uint tx_avail_bytes, rx_avail_bytes;
@@ -79,9 +76,12 @@
     ral.cfg.rx_order.set(host_bit_dir);
     //ral.cfg.timer_v.set(rx_timer); TODO do it later
     csr_update(.csr(ral.cfg));
-
-    set_sram_host_addr_range(sram_host_base_addr, sram_host_limit_addr);
-    set_sram_device_addr_range(sram_device_base_addr, sram_device_limit_addr);
+    if (do_spi_device_mem_cfg) begin
+      set_sram_host_addr_range(sram_host_base_addr, sram_host_limit_addr);
+      set_sram_device_addr_range(sram_device_base_addr, sram_device_limit_addr);
+      // only configure sram once
+      do_spi_device_mem_cfg = 0;
+    end
   endtask
 
   virtual task reset_fifo(bit txfifo, bit rxfifo);
@@ -151,8 +151,9 @@
     tx_wptr = ral.txf_ptr.wptr.get_mirrored_value();
     foreach (device_data[i]) begin
       bit [TL_DW-1:0] tx_wptr_addr;
-      tx_wptr_addr = cfg.sram_start_addr + ral.txf_addr.base.get_mirrored_value()
-                     + tx_wptr[SRAM_MSB:0];
+      bit [TL_DW-1:0] tx_base_addr = ral.txf_addr.base.get_mirrored_value();
+      tx_base_addr[1:0] = 0; // ignore lower 2 bits
+      tx_wptr_addr = cfg.sram_start_addr + tx_base_addr + tx_wptr[SRAM_MSB:0];
       `uvm_info(`gfn, $sformatf({"tx_wptr[SRAM_MSB:0] = 0x%0h, tx_wptr_phase_bit = 0x%0h, ",
                                  "tx_sram_size_bytes = 0x%0h, tx_wptr_addr = 0x%0h"},
                                  tx_wptr[SRAM_MSB:0], tx_wptr[SRAM_PTR_PHASE_BIT],
@@ -181,8 +182,9 @@
     repeat (num_words) begin
       bit   [TL_DW-1:0] rx_rptr_addr;
       logic [TL_DW-1:0] word_data;
-      rx_rptr_addr = cfg.sram_start_addr + ral.rxf_addr.base.get_mirrored_value()
-                     + rx_rptr[SRAM_MSB:0];
+      bit   [TL_DW-1:0] rx_base_addr = ral.rxf_addr.base.get_mirrored_value();
+      rx_base_addr[1:0] = 0; // ignore lower 2 bits
+      rx_rptr_addr = cfg.sram_start_addr + rx_base_addr + rx_rptr[SRAM_MSB:0];
       `uvm_info(`gfn, $sformatf({"rx_rptr[SRAM_MSB:0] = 0x%0h, rx_rptr_phase_bit = 0x%0h, ",
                                  "rx_sram_size_bytes = 0x%0h, rx_rptr_addr = 0x%0h"},
                                  rx_rptr[SRAM_MSB:0], rx_rptr[SRAM_PTR_PHASE_BIT],
diff --git a/hw/ip/spi_device/dv/env/seq_lib/spi_device_extreme_fifo_size_vseq.sv b/hw/ip/spi_device/dv/env/seq_lib/spi_device_extreme_fifo_size_vseq.sv
new file mode 100644
index 0000000..9c2ec58
--- /dev/null
+++ b/hw/ip/spi_device/dv/env/seq_lib/spi_device_extreme_fifo_size_vseq.sv
@@ -0,0 +1,35 @@
+// Copyright lowRISC contributors.
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+// test tx/rx sram fifo with extreme setting, size 1 word and SRAM_SIZE-1 word
+class spi_device_extreme_fifo_size_vseq extends spi_device_txrx_vseq;
+  `uvm_object_utils(spi_device_extreme_fifo_size_vseq)
+  `uvm_object_new
+
+  constraint sram_size_constraints_c {
+    host_sram_word_size dist {
+      1 :/ 1,                     // 1 word
+      SRAM_SIZE[31:2]/2     :/ 1, // half of the total mem
+      SRAM_SIZE[31:2]-1     :/ 1, // max size
+      [2:SRAM_SIZE[31:2]-2] :/ 1
+    };
+    device_sram_word_size dist {
+      1 :/ 1,                     // 1 word
+      SRAM_SIZE[31:2]/2     :/ 1, // half of the total mem
+      SRAM_SIZE[31:2]-1     :/ 1, // max size
+      [2:SRAM_SIZE[31:2]-2] :/ 1
+    };
+  }
+
+  // reduce total data to reduce sim time as fifo size is too small and it takes much longer time
+  // to finish
+  constraint tx_total_bytes_c {
+    tx_total_bytes inside {[SRAM_SIZE/2 : SRAM_SIZE*2]};
+    tx_total_bytes[1:0] == 0; // word aligned
+  }
+
+  constraint num_trans_c {
+    num_trans == 2;
+  }
+endclass : spi_device_extreme_fifo_size_vseq
diff --git a/hw/ip/spi_device/dv/env/seq_lib/spi_device_sanity_vseq.sv b/hw/ip/spi_device/dv/env/seq_lib/spi_device_sanity_vseq.sv
index 8adc8cd..a353353 100644
--- a/hw/ip/spi_device/dv/env/seq_lib/spi_device_sanity_vseq.sv
+++ b/hw/ip/spi_device/dv/env/seq_lib/spi_device_sanity_vseq.sv
@@ -8,7 +8,6 @@
   `uvm_object_utils(spi_device_sanity_vseq)
   `uvm_object_new
 
-
   constraint num_trans_c {
     num_trans inside {[1:6]};
   }
diff --git a/hw/ip/spi_device/dv/env/seq_lib/spi_device_txrx_vseq.sv b/hw/ip/spi_device/dv/env/seq_lib/spi_device_txrx_vseq.sv
index 81d806b..c4e04de 100644
--- a/hw/ip/spi_device/dv/env/seq_lib/spi_device_txrx_vseq.sv
+++ b/hw/ip/spi_device/dv/env/seq_lib/spi_device_txrx_vseq.sv
@@ -20,6 +20,10 @@
   rand uint rx_delay;
   rand uint spi_delay;
 
+  // helper variables for sram randomization
+  rand uint host_sram_word_size;
+  rand uint device_sram_word_size;
+
   constraint tx_total_bytes_c {
     tx_total_bytes inside {[SRAM_SIZE : 10 * SRAM_SIZE]};
     tx_total_bytes[1:0] == 0; // word aligned
@@ -54,7 +58,38 @@
   }
 
   constraint num_trans_c {
-    num_trans inside {[5:8]};
+    num_trans == 5;
+  }
+
+  // lower 2 bits are ignored, use word granularity to contrain the sram setting
+  constraint sram_constraints_c {
+    // if limit is 0, it means 1 word
+    sram_host_limit_addr[31:2]   < (SRAM_SIZE/SRAM_WORD_SIZE);
+    sram_device_limit_addr[31:2] < (SRAM_SIZE/SRAM_WORD_SIZE);
+
+    sram_host_base_addr   <= sram_host_limit_addr;
+    sram_device_base_addr <= sram_device_limit_addr;
+    // host and device addr space within sram should not overlap
+    if (sram_host_limit_addr < sram_device_base_addr) {
+      sram_host_limit_addr[31:2] < sram_device_base_addr[31:2];
+      sram_device_limit_addr < SRAM_SIZE;
+    } else {
+      sram_device_limit_addr[31:2] < sram_host_base_addr[31:2];
+      sram_host_limit_addr < SRAM_SIZE;
+    }
+    host_sram_word_size   == sram_host_limit_addr[31:2] - sram_host_base_addr[31:2] + 1;
+    device_sram_word_size == sram_device_limit_addr[31:2] - sram_device_base_addr[31:2] + 1;
+  }
+
+  // size from 25 to SRAM_SIZE/SRAM_WORD_SIZE-25
+  // override it if test extreme cases
+  constraint sram_size_constraints_c {
+    host_sram_word_size   inside {[25:SRAM_SIZE/SRAM_WORD_SIZE]};
+    device_sram_word_size inside {[25:SRAM_SIZE/SRAM_WORD_SIZE]};
+    host_sram_word_size == device_sram_word_size dist {
+      1 :/ 2,
+      0 :/ 1
+    };
   }
 
   virtual task body();
diff --git a/hw/ip/spi_device/dv/env/seq_lib/spi_device_vseq_list.sv b/hw/ip/spi_device/dv/env/seq_lib/spi_device_vseq_list.sv
index 26bdba1..643e7bf 100644
--- a/hw/ip/spi_device/dv/env/seq_lib/spi_device_vseq_list.sv
+++ b/hw/ip/spi_device/dv/env/seq_lib/spi_device_vseq_list.sv
@@ -8,3 +8,4 @@
 `include "spi_device_txrx_vseq.sv"
 `include "spi_device_fifo_full_vseq.sv"
 `include "spi_device_fifo_underflow_overflow_vseq.sv"
+`include "spi_device_extreme_fifo_size_vseq.sv"
diff --git a/hw/ip/spi_device/dv/env/spi_device_scoreboard.sv b/hw/ip/spi_device/dv/env/spi_device_scoreboard.sv
index 115f92b..666b5c0 100644
--- a/hw/ip/spi_device/dv/env/spi_device_scoreboard.sv
+++ b/hw/ip/spi_device/dv/env/spi_device_scoreboard.sv
@@ -82,6 +82,8 @@
       uint tx_limit = ral.txf_addr.limit.get_mirrored_value();
       uint rx_limit = ral.rxf_addr.limit.get_mirrored_value();
       uint mem_addr = item.a_addr - cfg.sram_start_addr;
+      tx_base[1:0] = 0;
+      rx_base[1:0] = 0;
       if (mem_addr inside {[tx_base : tx_base + tx_limit]}) begin // TX address
         if (write && channel == AddrChannel) begin
           tx_mem.write(mem_addr - tx_base, item.a_data);
diff --git a/hw/ip/spi_device/dv/tests/spi_device_base_test.sv b/hw/ip/spi_device/dv/tests/spi_device_base_test.sv
index ebbe910..51d99a7 100644
--- a/hw/ip/spi_device/dv/tests/spi_device_base_test.sv
+++ b/hw/ip/spi_device/dv/tests/spi_device_base_test.sv
@@ -9,7 +9,7 @@
 
   virtual function void build_phase(uvm_phase phase);
     max_quit_count  = 50;
-    test_timeout_ns = 400_000_000; // 400ms
+    test_timeout_ns = 600_000_000; // 600ms
     super.build_phase(phase);
     // configure the spi agent to be in Host mode
     cfg.m_spi_agent_cfg.mode = Host;