[dv] Fix 2 issues in top-level mem seq

1. limit the number of mem access
2. wait until all outstanding items are done

Signed-off-by: Weicai Yang <weicai@google.com>
diff --git a/hw/dv/sv/cip_lib/cip_base_vseq.sv b/hw/dv/sv/cip_lib/cip_base_vseq.sv
index 1aa2236..041c20e 100644
--- a/hw/dv/sv/cip_lib/cip_base_vseq.sv
+++ b/hw/dv/sv/cip_lib/cip_base_vseq.sv
@@ -461,69 +461,78 @@
 
   // test partial mem read with non-blocking random read/write
   virtual task run_mem_partial_access_vseq(int num_times);
-      int  num_words;
+    uint num_accesses;
+    // limit to 100k accesses if mem is very big
+    uint max_accesses = 100_000;
 
-      foreach (cfg.mem_ranges[i]) begin
-        if (get_mem_access_by_addr(ral, cfg.mem_ranges[i].start_addr) != "RO") begin
-          num_words += cfg.mem_ranges[i].end_addr - cfg.mem_ranges[i].start_addr;
+    void'($value$plusargs("max_accesses_for_partial_mem_access_vseq=%0d", max_accesses));
+
+    // calculate how many accesses to run based on mem size, up to 100k
+    foreach (cfg.mem_ranges[i]) begin
+      if (get_mem_access_by_addr(ral, cfg.mem_ranges[i].start_addr) != "RO") begin
+        num_accesses += (cfg.mem_ranges[i].end_addr - cfg.mem_ranges[i].start_addr) >> 2;
+        if (num_accesses >= max_accesses) begin
+          num_accesses = max_accesses;
+          break;
         end
       end
-      num_words = num_words >> 2;
+    end
 
-      repeat (num_words * num_times * 10) begin
-        fork
-          begin
-            bit [TL_AW-1:0]  addr;
-            bit [TL_DW-1:0]  data;
-            bit [TL_DBW-1:0] mask;
-            randcase
-              1: begin // write
-                dv_base_mem mem;
-                int mem_idx = $urandom_range(0, cfg.mem_ranges.size - 1);
+    repeat (num_accesses * num_times) begin
+      fork
+        begin
+          bit [TL_AW-1:0]  addr;
+          bit [TL_DW-1:0]  data;
+          bit [TL_DBW-1:0] mask;
+          randcase
+            1: begin // write
+              dv_base_mem mem;
+              int mem_idx = $urandom_range(0, cfg.mem_ranges.size - 1);
 
-                `DV_CHECK_STD_RANDOMIZE_WITH_FATAL(addr,
-                    addr inside {[cfg.mem_ranges[mem_idx].start_addr :
-                                  cfg.mem_ranges[mem_idx].end_addr]};)
+              `DV_CHECK_STD_RANDOMIZE_WITH_FATAL(addr,
+                  addr inside {[cfg.mem_ranges[mem_idx].start_addr :
+                                cfg.mem_ranges[mem_idx].end_addr]};)
 
-                if (get_mem_access_by_addr(ral, addr) != "RO") begin
-                  `downcast(mem, get_mem_by_addr(ral, cfg.mem_ranges[mem_idx].start_addr))
-                  if (mem.get_mem_partial_write_support()) mask = get_rand_contiguous_mask();
-                  else                                     mask = '1;
-                  data = $urandom;
-                  tl_access(.addr(addr), .write(1), .data(data), .mask(mask), .blocking(1));
+              if (get_mem_access_by_addr(ral, addr) != "RO") begin
+                `downcast(mem, get_mem_by_addr(ral, cfg.mem_ranges[mem_idx].start_addr))
+                if (mem.get_mem_partial_write_support()) mask = get_rand_contiguous_mask();
+                else                                     mask = '1;
+                data = $urandom;
+                tl_access(.addr(addr), .write(1), .data(data), .mask(mask), .blocking(1));
 
-                  if (!cfg.under_reset) begin
-                    addr[1:0] = 0;
-                    exp_mem[addr] = data;
-                    mem_exist_addr_q.push_back(addr);
-                  end
+                if (!cfg.under_reset) begin
+                  addr[1:0] = 0;
+                  exp_mem[addr] = data;
+                  mem_exist_addr_q.push_back(addr);
                 end
               end
-              // Randomly pick a previously written address for partial read.
-              exp_mem.size > 0: begin // read
-                // get all the programmed addresses and randomly pick one
-                addr = mem_exist_addr_q[$urandom_range(0, mem_exist_addr_q.size - 1)];
-                if (get_mem_access_by_addr(ral, addr) != "WO") begin;
-                  mask = get_rand_contiguous_mask();
-                  tl_access(.addr(addr), .write(0), .data(data), .mask(mask), .blocking(1));
+            end
+            // Randomly pick a previously written address for partial read.
+            exp_mem.size > 0: begin // read
+              // get all the programmed addresses and randomly pick one
+              addr = mem_exist_addr_q[$urandom_range(0, mem_exist_addr_q.size - 1)];
+              if (get_mem_access_by_addr(ral, addr) != "WO") begin;
+                mask = get_rand_contiguous_mask();
+                tl_access(.addr(addr), .write(0), .data(data), .mask(mask), .blocking(1));
 
-                  if (!cfg.under_reset) begin
-                    bit [TL_DW-1:0]  compare_mask;
-                    bit [TL_DW-1:0]  act_data, exp_data;
-                    // calculate compare_mask which is data width wide
-                    foreach (mask[i]) compare_mask[i*8+:8] = {8{mask[i]}};
-                    act_data = data & compare_mask;
-                    exp_data = exp_mem[addr] & compare_mask;
-                    `DV_CHECK_EQ(act_data, exp_data, $sformatf("addr 0x%0h read out mismatch", addr))
-                  end
+                if (!cfg.under_reset) begin
+                  bit [TL_DW-1:0]  compare_mask;
+                  bit [TL_DW-1:0]  act_data, exp_data;
+                  // calculate compare_mask which is data width wide
+                  foreach (mask[i]) compare_mask[i*8+:8] = {8{mask[i]}};
+                  act_data = data & compare_mask;
+                  exp_data = exp_mem[addr] & compare_mask;
+                  `DV_CHECK_EQ(act_data, exp_data, $sformatf("addr 0x%0h read out mismatch", addr))
                 end
               end
-            endcase
-          end
-        join_none
-        #0; // for outstanding_accesses to be updated
-        wait_if_max_outstanding_accesses_reached();
-      end
+            end
+          endcase
+        end
+      join_none
+      #0; // for outstanding_accesses to be updated
+      csr_utils_pkg::wait_if_max_outstanding_accesses_reached();
+    end
+    csr_utils_pkg::wait_no_outstanding_access();
   endtask
 
   // This task runs random csr and mem accesses in parallel, which can be used to cross with