diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_monitor.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_monitor.sv
index 6856bb2..cb8baa0 100644
--- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_monitor.sv
+++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_monitor.sv
@@ -97,7 +97,8 @@
 
 task rvs_monitor::rx_monitor();
   rvs_transaction tr;
-  logic [`VLEN-1:0] rt_vrf_strobe;
+  logic [`VLENB-1:0] rt_vrf_byte_strobe;
+  logic [`VLEN-1:0] rt_vrf_bit_strobe;
   bit vrf_overlap;
   tr = new("tr");
   forever begin
@@ -114,20 +115,21 @@
           // VRF
           if(rvs_if.rt_vrf_valid_rob2rt[rt_idx]) begin
             vrf_overlap = 0;
+            rt_vrf_byte_strobe = rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_strobe;
             for(int i=0; i<`VLENB; i++) begin
-              rt_vrf_strobe[i*8 +: 8] = {8{rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_strobe[i]}}; 
-            end
+              rt_vrf_bit_strobe[i*8 +: 8] = {8{rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_strobe[i]}}; 
+            end 
             foreach(tr.rt_vrf_index[i]) begin
               if(tr.rt_vrf_index[i] == rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_index) begin
-                tr.rt_vrf_strobe[i] |= rt_vrf_strobe;
-                tr.rt_vrf_data[i]   |= rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_data;
+                tr.rt_vrf_strobe[i] |= rt_vrf_byte_strobe;
+                tr.rt_vrf_data[i]   = rt_vrf_bit_strobe & rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_data | ~rt_vrf_bit_strobe & tr.rt_vrf_data[i];
                 vrf_overlap = 1;
                 `uvm_info(get_type_name(), $sformatf("Uops %0d also write vrf[%0d].", rt_idx, rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_index), UVM_HIGH)
               end
             end
             if(!vrf_overlap) begin
               tr.rt_vrf_index.push_back(rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_index);
-              tr.rt_vrf_strobe.push_back(rt_vrf_strobe);
+              tr.rt_vrf_strobe.push_back(rt_vrf_byte_strobe);
               tr.rt_vrf_data.push_back(rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_data);
             end
           end
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_transaction.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_transaction.sv
index c51b28a..7e2f95d 100644
--- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_transaction.sv
+++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_transaction.sv
@@ -12,6 +12,7 @@
   rand vtype_t           vtype;
   rand logic [`XLEN-1:0] vl;
        logic [`XLEN-1:0] vlmax;
+       logic [`XLEN-1:0] vlmax_max;
   rand logic [`XLEN-1:0] vstart;
   rand vxrm_e            vxrm;
 
@@ -60,7 +61,7 @@
 
   /* Write back info */
        reg_idx_t  rt_vrf_index  [$];
-       vrf_t      rt_vrf_strobe [$];
+       vrf_byte_t rt_vrf_strobe [$];
        vrf_t      rt_vrf_data   [$];
 
        reg_idx_t  rt_xrf_index [$];
@@ -85,6 +86,8 @@
     else  
       vl <= (`VLENB << vtype.vlmul) >> vtype.vsew;
     vstart <= vl;
+    vstart <= vlmax_max-1;
+    vl <= vlmax_max;
   }
 
   constraint c_vm {
@@ -269,6 +272,7 @@
   `uvm_object_utils_end
 
   extern function new(string name = "Trans");
+  extern function void pre_randomize();
   extern function void post_randomize();
   extern function void asm_string_gen();
 
@@ -279,7 +283,13 @@
   super.new(name);
 endfunction: new
 
+function void rvs_transaction::pre_randomize();
+  super.pre_randomize();
+  vlmax_max = 8 * `VLEN / 8;
+endfunction: pre_randomize
+
 function void rvs_transaction::post_randomize();
+  super.post_randomize();
   if(inst_type == ALU && (alu_inst inside {VADC, VSBC, VMADC, VMSBC, VMERGE_VMVV}))
     use_vm_to_cal = 1;
   else
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_behavior_model.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_behavior_model.sv
index 173b560..834f2a8 100644
--- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_behavior_model.sv
+++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_behavior_model.sv
@@ -36,7 +36,8 @@
   vrf_t [31:0] vrf;
   vrf_t [31:0] vrf_delay;
   vrf_t [31:0] vrf_temp;
-  vrf_t [31:0] vrf_strobe_temp;
+  vrf_t [31:0] vrf_bit_strobe_temp;
+  vrf_byte_t [31:0] vrf_byte_strobe_temp;
 
   logic [`XLEN-1:0] vlmax;
   logic [`XLEN-1:0] imm_data;
@@ -513,7 +514,7 @@
 
 
         vrf_temp = vrf;
-        vrf_strobe_temp = '0;
+        vrf_bit_strobe_temp = '0;
         `uvm_info("MDL",$sformatf("Prepare done!\nelm_idx_max=%0d\ndest_eew=%0d\nsrc2_eew=%0d\nsrc1_eew=%0d\ndest_emul=%2.4f\nsrc2_emul=%2.4f\nsrc1_emul=%2.4f\n",elm_idx_max,dest_eew,src2_eew,src1_eew,dest_emul,src2_emul,src1_emul),UVM_LOW)
         // --------------------------------------------------
         // 3. Operate elements
@@ -665,18 +666,29 @@
 
         // Writeback whole vrf
         vrf = vrf_temp;
+        for(int i=0; i<32; i++) begin
+          for(int j=0; j<`VLENB; j++) begin
+            vrf_byte_strobe_temp[i][j] = |vrf_bit_strobe_temp[i][j*8 +: 8];
+          end
+        end
         // --------------------------------------------------
         // 4. Retire transaction gen
         rt_tr   = new("rt_tr");
         rt_tr.copy(inst_tr);
         rt_tr.is_rt = 1;
         // VRF
+        // if(rt_tr.dest_type == VRF && !(|vrf_bit_strobe_temp)) begin
+        //   `uvm_warning("MDL/INST_CHECKER", $sformatf("pc=0x%8x: Instruction with no valid vrf wirte strobe will be ignored.",pc));
+        //   continue;
+        // end
         if(rt_tr.dest_type == VRF) begin
           for(int reg_idx=dest_reg_idx_base; reg_idx<dest_reg_idx_base+int'($ceil(dest_emul)); reg_idx++) begin
+            // FIXME: All 0s in vrf wirte strobe will not be executed in DUT.
+            // if(|vrf_byte_strobe_temp[reg_idx]) begin
             // All pre-start vreg will not be retired
             if((reg_idx - dest_reg_idx_base) >= (vstart / (`VLEN / dest_eew))) begin
               rt_tr.rt_vrf_index.push_back(reg_idx);
-              rt_tr.rt_vrf_strobe.push_back(vrf_strobe_temp[reg_idx]);
+              rt_tr.rt_vrf_strobe.push_back(vrf_byte_strobe_temp[reg_idx]);
               rt_tr.rt_vrf_data.push_back(vrf_temp[reg_idx]);
             end
           end
@@ -742,7 +754,7 @@
         elm_idx = elm_idx % (`VLEN / eew);
         for(int i=0; i<bit_count; i++) begin
           this.vrf_temp[reg_idx][elm_idx*bit_count + i] = result[i];
-          this.vrf_strobe_temp[reg_idx][elm_idx*bit_count + i] = 1'b1;
+          this.vrf_bit_strobe_temp[reg_idx][elm_idx*bit_count + i] = 1'b1;
         end
       end
       XRF: begin
@@ -1323,10 +1335,12 @@
     if(overflow) _vsaddu = '1;
   endfunction : _vsaddu
   function TD _vsadd(T2 src2, T1 src1);
-    logic signed [$bits(TD)-1:0] dest;
+    logic signed [$bits(TD):0] dest;
     dest = $signed(src2) + $signed(src1);
-    overflow  = dest[$bits(TD)-1] & ~src2[$bits(T2)-1] & ~src1[$bits(T1)-1]; 
-    underflow = ~dest[$bits(TD)-1] & src2[$bits(T2)-1] & src1[$bits(T1)-1]; 
+    // overflow  = dest[$bits(TD)-1] & ~src2[$bits(T2)-1] & ~src1[$bits(T1)-1]; 
+    // underflow = ~dest[$bits(TD)-1] & src2[$bits(T2)-1] & src1[$bits(T1)-1]; 
+    overflow  = dest[$bits(TD):$bits(TD)-1] == 2'b01;
+    underflow = dest[$bits(TD):$bits(TD)-1] == 2'b10;
     if(overflow)  begin _vsadd = '1; _vsadd[$bits(TD)-1] = 1'b0; end
     else if(underflow) begin _vsadd = '0; _vsadd[$bits(TD)-1] = 1'b1; end
     else begin _vsadd = dest; end
@@ -1336,10 +1350,12 @@
     if(underflow) _vssubu = '0;
   endfunction : _vssubu
   function TD _vssub(T2 src2, T1 src1);
-    logic unsigned [$bits(TD)-1:0] dest;
-    dest = $unsigned(src2) - $signed(src1);
-    overflow = dest[$bits(TD)-1] & ~src2[$bits(T2)-1] & src1[$bits(T1)-1]; 
-    underflow  = ~dest[$bits(TD)-1] & src2[$bits(T2)-1] & ~src1[$bits(T1)-1]; 
+    logic signed [$bits(TD):0] dest;
+    dest = $signed(src2) - $signed(src1);
+    // overflow = dest[$bits(TD)-1] & ~src2[$bits(T2)-1] & src1[$bits(T1)-1]; 
+    // underflow  = ~dest[$bits(TD)-1] & src2[$bits(T2)-1] & ~src1[$bits(T1)-1]; 
+    overflow  = dest[$bits(TD):$bits(TD)-1] == 2'b01;
+    underflow = dest[$bits(TD):$bits(TD)-1] == 2'b10;
     if(overflow)  begin _vssub = '1; _vssub[$bits(TD)-1] = 1'b0; end
     else if(underflow) begin _vssub = '0; _vssub[$bits(TD)-1] = 1'b1; end
     else begin _vssub = dest; end
@@ -1374,9 +1390,10 @@
     logic signed [$bits(TD)*2-1:0] dest;
     dest = $signed(src2) * $signed(src1);
     dest = _roundoff_signed(dest, $bits(TD)-1);
-    // FIXME: check
-    overflow = ^dest[$bits(TD):$bits(TD)-1];
+    overflow  = dest[$bits(TD):$bits(TD)-1] == 2'b01;
+    underflow = dest[$bits(TD):$bits(TD)-1] == 2'b10;
     if(overflow) begin _vsmul = '1; _vsmul[$bits(TD)-1] = 1'b0; end
+    else if(underflow) begin _vsmul = '0; _vsmul[$bits(TD)-1] = 1'b1; end
     else begin _vsmul = dest; end
   endfunction : _vsmul
 
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_scoreboard.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_scoreboard.sv
index 02863a4..67255c0 100644
--- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_scoreboard.sv
+++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_scoreboard.sv
@@ -103,7 +103,8 @@
 
   int        rt_vrf_num;
   reg_idx_t  rvs_rt_vrf_index , mdl_rt_vrf_index ;
-  vrf_t      rvs_rt_vrf_strobe, mdl_rt_vrf_strobe;
+  vrf_byte_t rvs_rt_vrf_byte_strobe, mdl_rt_vrf_byte_strobe;
+  vrf_t      rvs_rt_vrf_bit_strobe,  mdl_rt_vrf_bit_strobe;
   vrf_t      rvs_rt_vrf_data  , mdl_rt_vrf_data  ;
 
   string vreg_dut_val;
@@ -141,39 +142,46 @@
         rt_vrf_num = rvs_tr.rt_vrf_index.size();
         for(int i=0; i<rt_vrf_num; i++) begin
           rvs_rt_vrf_index  = rvs_tr.rt_vrf_index.pop_front();
-          rvs_rt_vrf_strobe = rvs_tr.rt_vrf_strobe.pop_front();
+          rvs_rt_vrf_byte_strobe = rvs_tr.rt_vrf_strobe.pop_front();
           rvs_rt_vrf_data   = rvs_tr.rt_vrf_data.pop_front();
           mdl_rt_vrf_index  = mdl_tr.rt_vrf_index.pop_front();
-          mdl_rt_vrf_strobe = mdl_tr.rt_vrf_strobe.pop_front();
+          mdl_rt_vrf_byte_strobe = mdl_tr.rt_vrf_strobe.pop_front();
           mdl_rt_vrf_data   = mdl_tr.rt_vrf_data.pop_front();
+          // Since RTL use byte_strobe for retire, 
+          //   MDL should generate byte_strobe, 
+          //   and SCB also need to expand byte_strobe to bit_strobe to compare data.
+          for(int bit_idx=0; bit_idx<`VLENB; bit_idx++) begin
+            rvs_rt_vrf_bit_strobe[bit_idx*8 +: 8] = {8{rvs_rt_vrf_byte_strobe[bit_idx]}}; 
+            mdl_rt_vrf_bit_strobe[bit_idx*8 +: 8] = {8{mdl_rt_vrf_byte_strobe[bit_idx]}}; 
+          end 
           if(rvs_rt_vrf_index !== mdl_rt_vrf_index) begin
             `uvm_error("RT_CHECKER", $sformatf("Retire VRF index mismatch:\nDUT retired vrf[%0d],\nMDL retired vrf[%0d].", 
                                                 rvs_rt_vrf_index, 
                                                 mdl_rt_vrf_index));
-          end else if(rvs_rt_vrf_strobe !== mdl_rt_vrf_strobe) begin
+          end else if(rvs_rt_vrf_byte_strobe !== mdl_rt_vrf_byte_strobe) begin
             vreg_dut_val = "0x";
             vreg_mdl_val = "0x";
             for(int i=`VLEN-1;i>=0;i-=16) begin
-              vreg_dut_val = $sformatf("%s%4h_",vreg_dut_val,rvs_rt_vrf_strobe[i-:16]);
-              vreg_mdl_val = $sformatf("%s%4h_",vreg_mdl_val,mdl_rt_vrf_strobe[i-:16]);
+              vreg_dut_val = $sformatf("%s%4h_",vreg_dut_val,rvs_rt_vrf_byte_strobe[i-:16]);
+              vreg_mdl_val = $sformatf("%s%4h_",vreg_mdl_val,mdl_rt_vrf_byte_strobe[i-:16]);
             end
             vreg_dut_val = vreg_dut_val.substr(0,vreg_dut_val.len()-2);
             vreg_mdl_val = vreg_mdl_val.substr(0,vreg_mdl_val.len()-2);
             `uvm_error("RT_CHECKER", $sformatf("Retire VRF strobe(bit) mismatch:\nDUT retired vrf_strobe[%0d] = %s,\nMDL retired vrf_strobe[%0d] = %s.", 
                                                rvs_rt_vrf_index, vreg_dut_val,
                                                mdl_rt_vrf_index, vreg_mdl_val));
-          end else if((rvs_rt_vrf_strobe & rvs_rt_vrf_data) !== (mdl_rt_vrf_strobe & mdl_rt_vrf_data)) begin
+          end else if((rvs_rt_vrf_bit_strobe & rvs_rt_vrf_data) !== (mdl_rt_vrf_bit_strobe & mdl_rt_vrf_data)) begin
             vreg_dut_val = "0x";
             vreg_mdl_val = "0x";
             for(int i=`VLEN-1;i>=0;i-=16) begin
-              vreg_dut_val = $sformatf("%s%4h_",vreg_dut_val,{rvs_rt_vrf_strobe & rvs_rt_vrf_data}[i-:16]);
-              vreg_mdl_val = $sformatf("%s%4h_",vreg_mdl_val,{mdl_rt_vrf_strobe & mdl_rt_vrf_data}[i-:16]);
+              vreg_dut_val = $sformatf("%s%4h_",vreg_dut_val,{rvs_rt_vrf_bit_strobe & rvs_rt_vrf_data}[i-:16]);
+              vreg_mdl_val = $sformatf("%s%4h_",vreg_mdl_val,{mdl_rt_vrf_bit_strobe & mdl_rt_vrf_data}[i-:16]);
             end
             vreg_dut_val = vreg_dut_val.substr(0,vreg_dut_val.len()-2);
             vreg_mdl_val = vreg_mdl_val.substr(0,vreg_mdl_val.len()-2);
             // `uvm_error("RT_CHECKER", $sformatf("Retire VRF mismatch:\nDUT retired vrf[%0d] = 0x%0x,\nMDL retired vrf[%0d] = 0x%0x.", 
-            //                                     rvs_rt_vrf_index, (rvs_rt_vrf_strobe & rvs_rt_vrf_data),
-            //                                     mdl_rt_vrf_index, (mdl_rt_vrf_strobe & mdl_rt_vrf_data)));
+            //                                     rvs_rt_vrf_index, (rvs_rt_vrf_bit_strobe & rvs_rt_vrf_data),
+            //                                     mdl_rt_vrf_index, (mdl_rt_vrf_bit_strobe & mdl_rt_vrf_data)));
             `uvm_error("RT_CHECKER", $sformatf("Retire VRF mismatch:\nDUT retired vrf[%0d] = %s,\nMDL retired vrf[%0d] = %s.", 
                                                 rvs_rt_vrf_index, vreg_dut_val,
                                                 mdl_rt_vrf_index, vreg_mdl_val));
