[dv] Improve sim performace for tl_errors

Constraint took 99.5% of time and found tl_access_unmapped_addr took most of the time.
Use 2 ways to improve perf
1. Move for-loop calculation out of the constraint and use `inside` instead
2. Split the randomization into 2 parts and randomize addr first.
Sim time is reduced from 530s to 7s in worst case (running all tl_access_unmapped_addr)
and from 38s to 10s in normal case (running mixed tl error cases)

Signed-off-by: Weicai Yang <weicai@google.com>
diff --git a/hw/dv/sv/cip_lib/cip_base_vseq__tl_errors.svh b/hw/dv/sv/cip_lib/cip_base_vseq__tl_errors.svh
index efdbac9..fb8bba4 100644
--- a/hw/dv/sv/cip_lib/cip_base_vseq__tl_errors.svh
+++ b/hw/dv/sv/cip_lib/cip_base_vseq__tl_errors.svh
@@ -21,27 +21,34 @@
   end
 
 virtual task tl_access_unmapped_addr();
+  bit [TL_AW-1:0] normalized_csr_addrs[] = new[cfg.csr_addrs.size()];
+  bit [TL_AW-1:0] addr_mask = cfg.csr_addr_map_size - 1;
+  addr_mask[1:0] = 0;
+  // calculate normalized address outside the loop to improve perf
+  foreach (cfg.csr_addrs[i]) normalized_csr_addrs[i] = cfg.csr_addrs[i] - cfg.csr_base_addr;
+  // randomize unmapped_addr first to improve perf
   repeat ($urandom_range(10, 100)) begin
+    bit [TL_AW-1:0] unmapped_addr;
+    `DV_CHECK_STD_RANDOMIZE_WITH_FATAL(unmapped_addr, {
+        !((unmapped_addr & addr_mask) inside {normalized_csr_addrs});
+        foreach (cfg.mem_ranges[i]) {
+          !((unmapped_addr & addr_mask)
+              inside {[cfg.mem_ranges[i].start_addr : cfg.mem_ranges[i].end_addr]});}
+        })
     `create_tl_access_error_case(
         tl_access_unmapped_addr,
-        foreach (local::cfg.csr_addrs[i]) {
-          {addr[TL_AW-1:2], 2'b00} % local::cfg.csr_addr_map_size !=
-              local::cfg.csr_addrs[i] - local::cfg.csr_base_addr;
-        }
-        foreach (local::cfg.mem_ranges[i]) {
-          !(addr % local::cfg.csr_addr_map_size
-              inside {[local::cfg.mem_ranges[i].start_addr : local::cfg.mem_ranges[i].end_addr]});
-        })
+        addr == unmapped_addr;)
   end
 endtask
 
 virtual task tl_write_csr_word_unaligned_addr();
+  bit [TL_AW-1:0] addr_mask = cfg.csr_addr_map_size - 1;
   repeat ($urandom_range(10, 100)) begin
     `create_tl_access_error_case(
         tl_write_csr_word_unaligned_addr,
         opcode inside {tlul_pkg::PutFullData, tlul_pkg::PutPartialData};
         foreach (local::cfg.mem_ranges[i]) {
-          !(addr % local::cfg.csr_addr_map_size
+          !((addr & addr_mask)
               inside {[local::cfg.mem_ranges[i].start_addr : local::cfg.mem_ranges[i].end_addr]});
         }
         addr[1:0] != 2'b00;)
@@ -125,20 +132,26 @@
       cfg.devmode_vif.drive($urandom_range(0, 1));
     end
     // use multiple thread to create outstanding access
-    repeat ($urandom_range(5, 10)) fork
-      begin
-        randcase
-          1: tl_access_unmapped_addr();
-          1: tl_write_csr_word_unaligned_addr();
-          1: tl_write_less_than_csr_width();
-          1: tl_protocol_err();
-          // only run this task when the mem supports error response
-          test_mem_err_byte_write: tl_write_mem_less_than_word();
-          test_mem_err_read:       tl_read_mem_err();
-        endcase
-      end
-    join_none
-    wait fork;
+    fork
+      begin: isolation_fork
+        repeat ($urandom_range(10, 20)) begin
+          fork
+            begin
+              randcase
+                1: tl_access_unmapped_addr();
+                1: tl_write_csr_word_unaligned_addr();
+                1: tl_write_less_than_csr_width();
+                1: tl_protocol_err();
+                // only run this task when the mem supports error response
+                test_mem_err_byte_write: tl_write_mem_less_than_word();
+                test_mem_err_read:       tl_read_mem_err();
+              endcase
+            end
+          join_none
+        end
+        wait fork;
+      end: isolation_fork
+    join
     if (do_wait_clk) cfg.clk_rst_vif.wait_clks($urandom_range(500, 10_000));
   end // for
   cfg.tlul_assert_ctrl_vif.drive(1'b1);