Adjust coding style to assign sub-fields of the same struct signal in a single always-block for tool-friendly.

Change-Id: I0587c25d95b0075387691b285198bc3eb035409c
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_addsub.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_addsub.sv
index d09a999..2d6aa0f 100644
--- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_addsub.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_addsub.sv
@@ -1627,21 +1627,14 @@
 //
 // submit result to ROB
 //
-`ifdef TB_SUPPORT
-  assign result.uop_pc = alu_uop.uop_pc;
-`endif
-
-  assign result.rob_entry = rob_entry;
-
-  // result data 
-  assign result.w_data = result_data;
-
-  // result type and valid signal
-  assign result.w_valid = result_valid;
-
-  // saturate signal
   always_comb begin
     // initial
+  `ifdef TB_SUPPORT
+    result.uop_pc    = alu_uop.uop_pc;
+  `endif
+    result.rob_entry = rob_entry;
+    result.w_data    = result_data;
+    result.w_valid   = result_valid;
     result.vsaturate = 'b0;
 
     case(uop_funct3) 
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_execution_p1.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_execution_p1.sv
index 80ad87b..4251810 100644
--- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_execution_p1.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_execution_p1.sv
@@ -58,15 +58,15 @@
 //
 // submit result to ROB
 //
-`ifdef TB_SUPPORT
-  assign  result.uop_pc = alu_uop.uop_pc;
-`endif
-  assign  result.rob_entry = alu_uop.rob_entry;
-
   // get result_uop
   always_comb begin
     // initial the data
-    result.w_data = alu_uop.result_data; 
+    `ifdef TB_SUPPORT
+    result.uop_pc    = alu_uop.uop_pc;
+    `endif
+    result.rob_entry = alu_uop.rob_entry;
+    result.w_valid   = alu_uop_valid;
+    result.w_data    = alu_uop.result_data;
     result.vsaturate = alu_uop.vsaturate;
 
     // calculate result data
@@ -99,7 +99,4 @@
     endcase
   end
 
-  assign  result.w_valid = alu_uop_valid;
-
-
 endmodule
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask.sv
index f1e02f3..a6d88fb 100644
--- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask.sv
@@ -502,24 +502,21 @@
 //
 // submit result to ROB
 //
-`ifdef TB_SUPPORT
-  assign  result.uop_pc = alu_uop.uop_pc;
-`endif
-  assign  result.rob_entry = rob_entry;
-
-  assign  result.vd_eew = vd_eew;
-
-  assign  result.uop_index = uop_index;
-
-  assign  result.alu_sub_opcode = alu_sub_opcode;
-
-  // result data
   assign vstart_onehot = 1'b1<<vstart;
   assign vstart_onehot_sub1 = vstart_onehot - 1'b1;
 
   always_comb begin
     // initial
-    result.result_data = 'b0;
+    `ifdef TB_SUPPORT
+    result.uop_pc           = alu_uop.uop_pc;
+    `endif
+    result.rob_entry        = rob_entry;
+    result.vd_eew           = vd_eew;
+    result.uop_index        = uop_index;
+    result.alu_sub_opcode   = alu_sub_opcode;
+    result.data_viota_per64 = data_viota_per64;
+    result.vsaturate        = 'b0;
+    result.result_data      = 'b0;
 
     case(uop_funct3)
       OPIVV,
@@ -575,9 +572,4 @@
     endcase
   end   
 
-  assign result.data_viota_per64 = data_viota_per64;
-  
-  // saturate signal
-  assign result.vsaturate = 'b0;
-
 endmodule
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_other.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_other.sv
index 55ef0ac..a9fd507 100644
--- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_other.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_other.sv
@@ -509,10 +509,10 @@
 // submit result to ROB
 //
 `ifdef TB_SUPPORT
-  assign  result.uop_pc = alu_uop.uop_pc;
+  assign result.uop_pc = alu_uop.uop_pc;
 `endif
 
-  assign  result.rob_entry = rob_entry;
+  assign result.rob_entry = rob_entry;
 
   assign result.w_data = result_data;
 
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_shift.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_shift.sv
index ff1cb91..ed55d3e 100755
--- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_shift.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_shift.sv
@@ -994,18 +994,15 @@
 //
 // submit result to ROB
 //
-`ifdef TB_SUPPORT
-  assign result.uop_pc = alu_uop.uop_pc;
-`endif
-  assign result.rob_entry = rob_entry;
-
-  assign result.w_data = result_data;
-
-  assign result.w_valid = result_valid;
-
   // saturate signal
   always_comb begin
     // initial
+    `ifdef TB_SUPPORT
+    result.uop_pc    = alu_uop.uop_pc;
+    `endif
+    result.rob_entry = rob_entry;
+    result.w_data    = result_data;
+    result.w_valid   = result_valid;
     result.vsaturate = 'b0;
 
     case(uop_funct3) 
diff --git a/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv b/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv
index 3bc17d4..d6232da 100644
--- a/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv
@@ -46,10 +46,10 @@
   RVVConfigState                                  vector_csr_ari;
   logic   [`VSTART_WIDTH-1:0]                     csr_vstart;
   logic   [`VL_WIDTH-1:0]                         csr_vl;
-  logic   [`VL_WIDTH-1:0]                         vs_evl;
+  logic   [`VL_WIDTH-1:0]                         evl;
   RVVSEW                                          csr_sew;
   RVVLMUL                                         csr_lmul;
-  logic   [`XLEN-1:0] 	                          rs1_data;
+  logic   [`XLEN-1:0] 	                          rs1;
   EMUL_e                                          emul_vd;          
   EMUL_e                                          emul_vs2;          
   EMUL_e                                          emul_vs1;          
@@ -86,6 +86,42 @@
   // enum/union
   FUNCT6_u                                        funct6_ari;
 
+  // result
+`ifdef TB_SUPPORT
+  logic   [`NUM_DE_UOP-1:0][`PC_WIDTH-1:0]            uop_pc;
+`endif
+  logic   [`NUM_DE_UOP-1:0][`FUNCT3_WIDTH-1:0]        uop_funct3;
+  FUNCT6_u        [`NUM_DE_UOP-1:0]                   uop_funct6;
+  EXE_UNIT_e      [`NUM_DE_UOP-1:0]                   uop_exe_unit; 
+  UOP_CLASS_e     [`NUM_DE_UOP-1:0]                   uop_class;   
+  RVVConfigState  [`NUM_DE_UOP-1:0]                   vector_csr;  
+  logic   [`NUM_DE_UOP-1:0][`VL_WIDTH-1:0]            vs_evl;             
+  logic   [`NUM_DE_UOP-1:0]                           ignore_vma;
+  logic   [`NUM_DE_UOP-1:0]                           ignore_vta;
+  logic   [`NUM_DE_UOP-1:0]                           force_vma_agnostic; 
+  logic   [`NUM_DE_UOP-1:0]                           force_vta_agnostic; 
+  logic   [`NUM_DE_UOP-1:0]                           vm;                 
+  logic   [`NUM_DE_UOP-1:0]                           v0_valid;           
+  logic   [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vd_index;           
+  EEW_e   [`NUM_DE_UOP-1:0]                           vd_eew;  
+  logic   [`NUM_DE_UOP-1:0]                           vd_valid;
+  logic   [`NUM_DE_UOP-1:0]                           vs3_valid;          
+  logic   [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs1;              
+  EEW_e   [`NUM_DE_UOP-1:0]                           vs1_eew;            
+  logic   [`NUM_DE_UOP-1:0]                           vs1_index_valid;
+  logic   [`NUM_DE_UOP-1:0]                           vs1_opcode_valid;
+  logic   [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs2_index; 	        
+  EEW_e   [`NUM_DE_UOP-1:0]                           vs2_eew;
+  logic   [`NUM_DE_UOP-1:0]                           vs2_valid;
+  logic   [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] rd_index; 	        
+  logic   [`NUM_DE_UOP-1:0]                           rd_index_valid; 
+  logic   [`NUM_DE_UOP-1:0][`XLEN-1:0] 	              rs1_data;           
+  logic   [`NUM_DE_UOP-1:0]     	                    rs1_data_valid;     
+  logic   [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-1:0]     uop_index;          
+  logic   [`NUM_DE_UOP-1:0]                           first_uop_valid;    
+  logic   [`NUM_DE_UOP-1:0]                           last_uop_valid;     
+  logic   [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-2:0]     seg_field_index;    
+
   // use for for-loop 
   genvar                                          j;
 
@@ -110,7 +146,7 @@
   assign csr_vl               = inst.arch_state.vl;
   assign csr_sew              = inst.arch_state.sew;
   assign csr_lmul             = inst.arch_state.lmul;
-  assign rs1_data             = inst.rs1;
+  assign rs1                  = inst.rs1;
 
   // decode arithmetic instruction funct6
   assign funct6_ari.ari_funct6 = inst_valid ? inst_funct6 : 'b0;
@@ -3216,7 +3252,7 @@
   
   // get evl
   always_comb begin
-    vs_evl = csr_vl;
+    evl = csr_vl;
   
     case(inst_funct3)
       OPIVI: begin
@@ -3227,52 +3263,52 @@
               EMUL1: begin
                 case(eew_max)
                   EEW8: begin
-                    vs_evl = 1*`VLEN/8;
+                    evl = 1*`VLEN/8;
                   end
                   EEW16: begin
-                    vs_evl = 1*`VLEN/16;
+                    evl = 1*`VLEN/16;
                   end
                   EEW32: begin
-                    vs_evl = 1*`VLEN/32;
+                    evl = 1*`VLEN/32;
                   end
                 endcase
               end
               EMUL2: begin
                 case(eew_max)
                   EEW8: begin
-                    vs_evl = 2*`VLEN/8;
+                    evl = 2*`VLEN/8;
                   end
                   EEW16: begin
-                    vs_evl = 2*`VLEN/16;
+                    evl = 2*`VLEN/16;
                   end
                   EEW32: begin
-                    vs_evl = 2*`VLEN/32;
+                    evl = 2*`VLEN/32;
                   end
                 endcase
               end
               EMUL4: begin
                 case(eew_max)
                   EEW8: begin
-                    vs_evl = 4*`VLEN/8;
+                    evl = 4*`VLEN/8;
                   end
                   EEW16: begin
-                    vs_evl = 4*`VLEN/16;
+                    evl = 4*`VLEN/16;
                   end
                   EEW32: begin
-                    vs_evl = 4*`VLEN/32;
+                    evl = 4*`VLEN/32;
                   end
                 endcase
               end
               EMUL8: begin
                 case(eew_max)
                   EEW8: begin
-                    vs_evl = 8*`VLEN/8;
+                    evl = 8*`VLEN/8;
                   end
                   EEW16: begin
-                    vs_evl = 8*`VLEN/16;
+                    evl = 8*`VLEN/16;
                   end
                   EEW32: begin
-                    vs_evl = 8*`VLEN/32;
+                    evl = 8*`VLEN/32;
                   end
                 endcase
               end
@@ -3285,7 +3321,7 @@
         case(funct6_ari.ari_funct6)
           VWXUNARY0: begin
             if(vs2_opcode_vrxunary==VMV_S_X) begin
-              vs_evl = 'b1;
+              evl = 'b1;
             end
           end
         endcase
@@ -3296,8 +3332,8 @@
   // check evl is not 0
   // check vstart < evl
   always_comb begin
-    check_evl_not_0 = vs_evl!='b0;
-    check_vstart_sle_evl = {1'b0,csr_vstart} < vs_evl;
+    check_evl_not_0 = evl!='b0;
+    check_vstart_sle_evl = {1'b0,csr_vstart} < evl;
     
     // Instructions that write an x register or f register do so even when vstart >= vl, including when vl=0.
     case({valid_opm,funct6_ari.ari_funct6})
@@ -3340,31 +3376,31 @@
 `ifdef TB_SUPPORT
   // assign uop pc
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_PC
-      uop[i].uop_pc = inst.inst_pc;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_PC
+      uop_pc[i] = inst.inst_pc;
     end
   end
 `endif
 
   // update uop funct3
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_FUNCT3
-      uop[i].uop_funct3 = inst_funct3;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_FUNCT3
+      uop_funct3[i] = inst_funct3;
     end
   end
 
   // update uop funct6
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_FUNCT6
-      uop[i].uop_funct6 = funct6_ari;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_FUNCT6
+      uop_funct6[i] = funct6_ari;
     end
   end
 
   // allocate uop to execution unit
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_EXE_UNIT
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_EXE_UNIT
       // initial
-      uop[i].uop_exe_unit = ALU;
+      uop_exe_unit[i] = ALU;
       
       case(1'b1)
         valid_opi: begin
@@ -3396,7 +3432,7 @@
             VSSRA,
             VNCLIPU,
             VNCLIP: begin
-              uop[i].uop_exe_unit     = ALU;
+              uop_exe_unit[i]     = ALU;
             end 
             
             // Although comparison instructions belong to ALU previously, 
@@ -3413,27 +3449,27 @@
             VMSLE,
             VMSGTU,
             VMSGT:begin
-              uop[i].uop_exe_unit = CMP;
+              uop_exe_unit[i] = CMP;
             end
             VWREDSUMU,
             VWREDSUM: begin
-              uop[i].uop_exe_unit = RDT;
+              uop_exe_unit[i] = RDT;
             end
 
             VSLIDEUP_RGATHEREI16,
             VSLIDEDOWN,
             VRGATHER: begin
-              uop[i].uop_exe_unit = PMT;
+              uop_exe_unit[i] = PMT;
             end
 
             VSMUL_VMVNRR: begin
               case(inst_funct3)
                 OPIVV,
                 OPIVX: begin
-                  uop[i].uop_exe_unit = MUL;
+                  uop_exe_unit[i] = MUL;
                 end
                 OPIVI: begin 
-                  uop[i].uop_exe_unit = ALU;
+                  uop_exe_unit[i] = ALU;
                 end
               endcase
             end
@@ -3466,7 +3502,7 @@
             VMXNOR,
             VWXUNARY0,
             VMUNARY0: begin
-              uop[i].uop_exe_unit = ALU;
+              uop_exe_unit[i] = ALU;
             end
 
             VMUL,
@@ -3476,14 +3512,14 @@
             VWMUL,
             VWMULU,
             VWMULSU: begin
-              uop[i].uop_exe_unit = MUL;
+              uop_exe_unit[i] = MUL;
             end
 
             VDIVU,
             VDIV,
             VREMU,
             VREM: begin
-              uop[i].uop_exe_unit = DIV;
+              uop_exe_unit[i] = DIV;
             end
             
             VMACC,
@@ -3494,7 +3530,7 @@
             VWMACC,
             VWMACCSU,
             VWMACCUS: begin
-              uop[i].uop_exe_unit = MAC;
+              uop_exe_unit[i] = MAC;
             end
 
             // reduction
@@ -3506,13 +3542,13 @@
             VREDAND,
             VREDOR,
             VREDXOR: begin
-              uop[i].uop_exe_unit = RDT;
+              uop_exe_unit[i] = RDT;
             end
 
             VSLIDE1UP,
             VSLIDE1DOWN,
             VCOMPRESS: begin
-              uop[i].uop_exe_unit = PMT;
+              uop_exe_unit[i] = PMT;
             end
           endcase
         end
@@ -3569,9 +3605,11 @@
   end
 
   // calculate the uop_index used in decoding uops 
-  for(j=0;j<`NUM_DE_UOP;j=j+1) begin: GET_UOP_INDEX
-    assign uop_index_current[j] = j[`UOP_INDEX_WIDTH:0]+uop_index_base;
-  end
+  generate
+    for(j=0;j<`NUM_DE_UOP;j++) begin: GET_UOP_INDEX
+      assign uop_index_current[j] = j[`UOP_INDEX_WIDTH:0]+uop_index_base;
+    end
+  endgenerate
 
   // get the max uop index 
   always_comb begin
@@ -3595,7 +3633,7 @@
 
   // generate uop valid
   always_comb begin        
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_VALID
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_VALID
       if ((uop_index_current[i]<={1'b0,uop_index_max})&inst_valid) 
         uop_valid[i]  = inst_encoding_correct;
       else
@@ -3605,9 +3643,9 @@
 
   // update uop class
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_CLASS
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_CLASS
       // initial 
-      uop[i].uop_class = XXX;
+      uop_class[i] = XXX;
       
       case(1'b1)
         valid_opi: begin
@@ -3633,11 +3671,11 @@
             VRGATHER: begin
               case(inst_funct3)
                 OPIVV: begin
-                  uop[i].uop_class  = XVV;
+                  uop_class[i]  = XVV;
                 end
                 OPIVX,
                 OPIVI: begin
-                  uop[i].uop_class  = XVX;
+                  uop_class[i]  = XVX;
                 end 
               endcase
             end
@@ -3652,10 +3690,10 @@
             VSSUB: begin
               case(inst_funct3)
                 OPIVV: begin
-                  uop[i].uop_class  = XVV;
+                  uop_class[i]  = XVV;
                 end
                 OPIVX: begin
-                  uop[i].uop_class  = XVX;
+                  uop_class[i]  = XVX;
                 end 
               endcase
             end
@@ -3665,7 +3703,7 @@
               case(inst_funct3)
                 OPIVX,
                 OPIVI: begin
-                  uop[i].uop_class  = XVX;
+                  uop_class[i]  = XVX;
                 end 
               endcase
             end
@@ -3677,11 +3715,11 @@
             VMSLE: begin
               case(inst_funct3)
                 OPIVV: begin
-                  uop[i].uop_class  = VVV;
+                  uop_class[i]  = VVV;
                 end
                 OPIVX,
                 OPIVI: begin
-                  uop[i].uop_class  = VVX;
+                  uop_class[i]  = VVX;
                 end
               endcase
             end
@@ -3691,10 +3729,10 @@
             VMSLT: begin
               case(inst_funct3)
                 OPIVV: begin
-                  uop[i].uop_class  = VVV;
+                  uop_class[i]  = VVV;
                 end
                 OPIVX: begin
-                  uop[i].uop_class  = VVX;
+                  uop_class[i]  = VVX;
                 end
               endcase
             end
@@ -3704,7 +3742,7 @@
               case(inst_funct3)
                 OPIVX,
                 OPIVI: begin
-                  uop[i].uop_class  = VVX;
+                  uop_class[i]  = VVX;
                 end 
               endcase
             end
@@ -3713,16 +3751,16 @@
               case(inst_funct3)
                 OPIVV: begin
                   if (inst_vm==1'b0)
-                    uop[i].uop_class  = XVV;
+                    uop_class[i]  = XVV;
                   else
-                    uop[i].uop_class  = XXV;
+                    uop_class[i]  = XXV;
                 end
                 OPIVX,
                 OPIVI: begin
                   if (inst_vm==1'b0)
-                    uop[i].uop_class  = XVX;
+                    uop_class[i]  = XVX;
                   else
-                    uop[i].uop_class  = XXX;
+                    uop_class[i]  = XXX;
                 end
               endcase
             end
@@ -3731,7 +3769,7 @@
             VWREDSUM: begin
               case(inst_funct3)
                 OPIVV: begin
-                  uop[i].uop_class  = XVV;
+                  uop_class[i]  = XVV;
                 end
               endcase
             end
@@ -3739,11 +3777,11 @@
             VSLIDEUP_RGATHEREI16: begin
               case(inst_funct3)
                 OPIVV: begin
-                  uop[i].uop_class  = XVV;
+                  uop_class[i]  = XVV;
                 end
                 OPIVX,
                 OPIVI: begin
-                  uop[i].uop_class  = VVX;
+                  uop_class[i]  = VVX;
                 end
               endcase
             end
@@ -3778,10 +3816,10 @@
             VASUB: begin
               case(inst_funct3)
                 OPMVV: begin
-                  uop[i].uop_class  = XVV;
+                  uop_class[i]  = XVV;
                 end
                 OPMVX: begin
-                  uop[i].uop_class  = XVX;
+                  uop_class[i]  = XVX;
                 end
               endcase
             end 
@@ -3789,7 +3827,7 @@
             VXUNARY0: begin
               case(inst_funct3)
                 OPMVV: begin
-                  uop[i].uop_class  = XVX;
+                  uop_class[i]  = XVX;
                 end
               endcase
             end
@@ -3803,10 +3841,10 @@
             VWMACCSU: begin
               case(inst_funct3)
                 OPMVV: begin
-                  uop[i].uop_class  = VVV;
+                  uop_class[i]  = VVV;
                 end
                 OPMVX: begin
-                  uop[i].uop_class  = VVX;
+                  uop_class[i]  = VVX;
                 end
               endcase
             end
@@ -3814,7 +3852,7 @@
             VWMACCUS: begin
               case(inst_funct3)
                 OPMVX: begin
-                  uop[i].uop_class  = VVX;
+                  uop_class[i]  = VVX;
                 end
               endcase
             end 
@@ -3830,7 +3868,7 @@
             VREDXOR: begin
               case(inst_funct3)
                 OPMVV: begin
-                  uop[i].uop_class  = XVV;
+                  uop_class[i]  = XVV;
                 end
               endcase
             end
@@ -3840,9 +3878,9 @@
               case(inst_funct3)
                 OPMVV: begin
                   if (uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_vstart) 
-                    uop[i].uop_class  = VVV;
+                    uop_class[i]  = VVV;
                   else
-                    uop[i].uop_class  = VVX;
+                    uop_class[i]  = VVX;
                 end
               endcase
             end
@@ -3858,7 +3896,7 @@
             VMXNOR: begin
               case(inst_funct3)
                 OPMVV: begin
-                  uop[i].uop_class  = VVV;
+                  uop_class[i]  = VVV;
                 end
               endcase
             end
@@ -3866,10 +3904,10 @@
             VWXUNARY0: begin
               case(inst_funct3)
                 OPMVV: begin
-                  uop[i].uop_class  = XVX;
+                  uop_class[i]  = XVX;
                 end
                 OPMVX: begin
-                  uop[i].uop_class  = XXX;
+                  uop_class[i]  = XXX;
                 end
               endcase
             end
@@ -3883,15 +3921,15 @@
                     VMSOF: begin
                       if (inst_vm==1'b0)
                         // need vd as vs3
-                        uop[i].uop_class  = VVX;
+                        uop_class[i]  = VVX;
                       else
-                        uop[i].uop_class  = XVX;
+                        uop_class[i]  = XVX;
                     end
                     VIOTA: begin
-                      uop[i].uop_class  = XVX;
+                      uop_class[i]  = XVX;
                     end
                     VID: begin
-                      uop[i].uop_class  = XXX;
+                      uop_class[i]  = XXX;
                     end
                   endcase
                 end
@@ -3902,7 +3940,7 @@
             VSLIDE1DOWN: begin
               case(inst_funct3)
                 OPMVX: begin
-                  uop[i].uop_class  = XVX;
+                  uop_class[i]  = XVX;
                 end
               endcase
             end 
@@ -3914,8 +3952,8 @@
 
   // update vector_csr and vstart
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_VCSR
-      uop[i].vector_csr = vector_csr_ari;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_VCSR
+      vector_csr[i] = vector_csr_ari;
 
       // update vstart of every uop
       if(uop_index_current[i]>{1'b0,uop_vstart}) begin
@@ -3935,18 +3973,18 @@
               VMSGT,
               VWREDSUMU,
               VWREDSUM: begin
-                uop[i].vector_csr.vstart = vector_csr_ari.vstart;
+                vector_csr[i].vstart = vector_csr_ari.vstart;
               end
               default: begin 
                 case(eew_max)
                   EEW8: begin
-                    uop[i].vector_csr.vstart  = {uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLENB)){1'b0}}};
+                    vector_csr[i].vstart  = {uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLENB)){1'b0}}};
                   end
                   EEW16: begin
-                    uop[i].vector_csr.vstart  = {1'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`HWORD_WIDTH)){1'b0}}};
+                    vector_csr[i].vstart  = {1'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`HWORD_WIDTH)){1'b0}}};
                   end
                   EEW32: begin
-                    uop[i].vector_csr.vstart  = {2'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`WORD_WIDTH)){1'b0}}};
+                    vector_csr[i].vstart  = {2'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`WORD_WIDTH)){1'b0}}};
                   end
                 endcase
               end
@@ -3964,18 +4002,18 @@
               VREDOR,
               VREDXOR,
               VCOMPRESS: begin
-                uop[i].vector_csr.vstart = vector_csr_ari.vstart;
+                vector_csr[i].vstart = vector_csr_ari.vstart;
               end
               default: begin 
                 case(eew_max)
                   EEW8: begin
-                    uop[i].vector_csr.vstart  = {uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLENB)){1'b0}}};
+                    vector_csr[i].vstart  = {uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLENB)){1'b0}}};
                   end
                   EEW16: begin
-                    uop[i].vector_csr.vstart  = {1'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`HWORD_WIDTH)){1'b0}}};
+                    vector_csr[i].vstart  = {1'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`HWORD_WIDTH)){1'b0}}};
                   end
                   EEW32: begin
-                    uop[i].vector_csr.vstart  = {2'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`WORD_WIDTH)){1'b0}}};
+                    vector_csr[i].vstart  = {2'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`WORD_WIDTH)){1'b0}}};
                   end
                 endcase
               end
@@ -3988,8 +4026,8 @@
 
   // update vs_evl
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_EVL
-      uop[i].vs_evl = vs_evl;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_EVL
+      vs_evl[i] = evl;
     end
   end
  
@@ -3997,10 +4035,11 @@
   // some instructions use vm as an extra opcode, so it needs ignore vma policy.
   // the instructions whose EEW_vd=1b can write the result to TAIL elements, so it needs ignore vta policy.
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_IGNORE
-      uop[i].ignore_vma = 'b0;
-      uop[i].ignore_vta = 'b0;
+    // initial 
+    ignore_vma = 'b0;
+    ignore_vta = 'b0;
       
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_IGNORE
       case(inst_funct3) 
         OPIVV,
         OPIVX,
@@ -4008,8 +4047,8 @@
           case(funct6_ari.ari_funct6)
             VADC,
             VSBC: begin
-              uop[i].ignore_vma = 'b1;
-              uop[i].ignore_vta = 'b0;
+              ignore_vma[i] = 1'b1;
+              ignore_vta[i] = 1'b0;
             end
             VMADC,
             VMSBC,
@@ -4021,12 +4060,12 @@
             VMSLE,
             VMSGTU,
             VMSGT: begin
-              uop[i].ignore_vma = 'b1;
-              uop[i].ignore_vta = 'b1;
+              ignore_vma[i] = 1'b1;
+              ignore_vta[i] = 1'b1;
             end
             VMERGE_VMV: begin
               if (inst_vm=='b0) begin
-                uop[i].ignore_vma = 'b1;
+                ignore_vma[i] = 1'b1;
               end
             end
           endcase
@@ -4042,16 +4081,16 @@
             VMNAND,
             VMNOR,
             VMXNOR: begin
-              uop[i].ignore_vma = 'b1;
-              uop[i].ignore_vta = 'b1;
+              ignore_vma[i] = 1'b1;
+              ignore_vta[i] = 1'b1;
             end
             VMUNARY0: begin
               case(vs1_opcode_vmunary)
                 VMSBF,
                 VMSOF,
                 VMSIF: begin
-                  uop[i].ignore_vma = 'b1;
-                  uop[i].ignore_vta = 'b1;
+                  ignore_vma[i] = 1'b1;
+                  ignore_vta[i] = 1'b1;
                 end
               endcase
             end
@@ -4063,36 +4102,36 @@
   
   // update force_vma_agnostic
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_FORCE_VMA
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_FORCE_VMA
       //When source and destination registers overlap and have different EEW, the instruction is mask- and tail-agnostic.
-      uop[i].force_vma_agnostic = ((check_vd_overlap_vs2==1'b0)&(eew_vd!=eew_vs2)&(eew_vd!=EEW_NONE)&(eew_vs2!=EEW_NONE)) |
-                                  ((check_vd_overlap_vs1==1'b0)&(eew_vd!=eew_vs1)&(eew_vd!=EEW_NONE)&(eew_vs1!=EEW_NONE));
+      force_vma_agnostic[i] = ((check_vd_overlap_vs2==1'b0)&(eew_vd!=eew_vs2)&(eew_vd!=EEW_NONE)&(eew_vs2!=EEW_NONE)) |
+                              ((check_vd_overlap_vs1==1'b0)&(eew_vd!=eew_vs1)&(eew_vd!=EEW_NONE)&(eew_vs1!=EEW_NONE));
     end
   end
 
   // update force_vta_agnostic
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_FORCE_VTA
-      uop[i].force_vta_agnostic = (eew_vd==EEW1) |   // Mask destination tail elements are always treated as tail-agnostic
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_FORCE_VTA
+      force_vta_agnostic[i] = (eew_vd==EEW1) |   // Mask destination tail elements are always treated as tail-agnostic
       //When source and destination registers overlap and have different EEW, the instruction is mask- and tail-agnostic.
-                                  ((check_vd_overlap_vs2==1'b0)&(eew_vd!=eew_vs2)&(eew_vd!=EEW_NONE)&(eew_vs2!=EEW_NONE)) |
-                                  ((check_vd_overlap_vs1==1'b0)&(eew_vd!=eew_vs1)&(eew_vd!=EEW_NONE)&(eew_vs1!=EEW_NONE));
+                              ((check_vd_overlap_vs2==1'b0)&(eew_vd!=eew_vs2)&(eew_vd!=EEW_NONE)&(eew_vs2!=EEW_NONE)) |
+                              ((check_vd_overlap_vs1==1'b0)&(eew_vd!=eew_vs1)&(eew_vd!=EEW_NONE)&(eew_vs1!=EEW_NONE));
     end
   end
 
   // update vm field
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_VM
-      uop[i].vm = inst_vm;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_VM
+      vm[i] = inst_vm;
     end
   end
   
   // some uop need v0 as the vector operand
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_V0
-      // initial 
-      uop[i].v0_valid = 'b0;
+    // initial 
+    v0_valid = 'b0;
        
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_V0
       case(1'b1)
         valid_opi: begin
           // OPI*
@@ -4104,7 +4143,7 @@
                 OPIVV,
                 OPIVX,
                 OPIVI: begin
-                  uop[i].v0_valid = !inst_vm;
+                  v0_valid[i] = !inst_vm;
                 end
               endcase
             end 
@@ -4113,7 +4152,7 @@
               case(inst_funct3)
                 OPIVV,
                 OPIVX: begin
-                  uop[i].v0_valid = !inst_vm;
+                  v0_valid[i] = !inst_vm;
                 end
               endcase
             end
@@ -4126,7 +4165,7 @@
               case(vs1_opcode_vwxunary)
                 VCPOP,
                 VFIRST: begin
-                  uop[i].v0_valid = !inst_vm;
+                  v0_valid[i] = !inst_vm;
                 end
               endcase
             end
@@ -4136,7 +4175,7 @@
                 VMSOF,
                 VMSIF,
                 VIOTA: begin
-                  uop[i].v0_valid = !inst_vm;
+                  v0_valid[i] = !inst_vm;
                 end
               endcase
             end
@@ -4148,12 +4187,12 @@
   
   // update vd_index, eew and valid
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_VD
-      // initial
-      uop[i].vd_index = 'b0;
-      uop[i].vd_eew   = EEW_NONE;
-      uop[i].vd_valid = 'b0;
+    // initial
+    vd_index = 'b0;
+    vd_eew   = 'b0; 
+    vd_valid = 'b0;
       
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD
       case(1'b1)
         valid_opi: begin
           case(funct6_ari.ari_funct6)
@@ -4176,9 +4215,9 @@
                 OPIVV,
                 OPIVX,
                 OPIVI: begin  
-                  uop[i].vd_index = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = 1'b1;
+                  vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = 1'b1;
                 end 
               endcase
             end
@@ -4194,9 +4233,9 @@
               case(inst_funct3)
                 OPIVV,
                 OPIVX: begin  
-                  uop[i].vd_index = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = 1'b1;
+                  vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = 1'b1;
                 end 
               endcase
             end
@@ -4206,9 +4245,9 @@
               case(inst_funct3)
                 OPIVX,
                 OPIVI: begin  
-                  uop[i].vd_index = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = 1'b1;
+                  vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = 1'b1;
                 end 
               endcase
             end
@@ -4222,9 +4261,9 @@
                 OPIVV,
                 OPIVX,
                 OPIVI: begin  
-                  uop[i].vd_index = inst_vd;
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+                  vd_index[i] = inst_vd;
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
                 end
               endcase
             end
@@ -4235,9 +4274,9 @@
               case(inst_funct3)
                 OPIVV,
                 OPIVX: begin  
-                  uop[i].vd_index = inst_vd;
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+                  vd_index[i] = inst_vd;
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
                 end
               endcase
             end
@@ -4247,9 +4286,9 @@
               case(inst_funct3)
                 OPIVX,
                 OPIVI: begin  
-                  uop[i].vd_index = inst_vd;
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+                  vd_index[i] = inst_vd;
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
                 end
               endcase
             end
@@ -4262,9 +4301,9 @@
                 OPIVV,
                 OPIVX,
                 OPIVI: begin
-                  uop[i].vd_index = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = 1'b1;
+                  vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -4272,9 +4311,9 @@
             VWREDSUMU,
             VWREDSUM: begin
               if(inst_funct3==OPIVV) begin
-                uop[i].vd_index   = inst_vd;
-                uop[i].vd_eew     = eew_vd;
-                uop[i].vd_valid   = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+                vd_index[i]   = inst_vd;
+                vd_eew[i]     = eew_vd;
+                vd_valid[i]   = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
               end
             end
 
@@ -4286,24 +4325,24 @@
                     {EMUL2,EMUL2},
                     {EMUL4,EMUL4},
                     {EMUL8,EMUL8}: begin
-                      uop[i].vd_index = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                      uop[i].vd_eew   = eew_vd;
-                      uop[i].vd_valid = 1'b1;
+                      vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                      vd_eew[i]   = eew_vd;
+                      vd_valid[i] = 1'b1;
                     end
                     {EMUL2,EMUL1},
                     {EMUL4,EMUL2},
                     {EMUL8,EMUL4}: begin
-                      uop[i].vd_index = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
-                      uop[i].vd_eew   = eew_vd;
-                      uop[i].vd_valid = 1'b1;
+                      vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
+                      vd_eew[i]   = eew_vd;
+                      vd_valid[i] = 1'b1;
                     end
                   endcase
                 end
                 OPIVX,
                 OPIVI: begin  
-                  uop[i].vd_index = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = 1'b1;
+                  vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = 1'b1;
                 end 
               endcase
             end
@@ -4346,9 +4385,9 @@
               case(inst_funct3)
                 OPMVV,
                 OPMVX: begin
-                  uop[i].vd_index = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = 1'b1;
+                  vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = 1'b1;
                 end
               endcase
             end   
@@ -4357,9 +4396,9 @@
             VCOMPRESS: begin
               case(inst_funct3)
                 OPMVV: begin
-                  uop[i].vd_index = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = 1'b1;
+                  vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -4369,9 +4408,9 @@
             VSLIDE1DOWN: begin
               case(inst_funct3)
                 OPMVX: begin
-                  uop[i].vd_index = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = 1'b1;
+                  vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = 1'b1;
                 end
               endcase
             end 
@@ -4386,9 +4425,9 @@
             VREDXOR: begin
               case(inst_funct3)
                 OPMVV: begin
-                  uop[i].vd_index = inst_vd;
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+                  vd_index[i] = inst_vd;
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
                 end
               endcase
             end
@@ -4403,9 +4442,9 @@
             VMXNOR: begin
               case(inst_funct3)
                 OPMVV: begin
-                  uop[i].vd_index = inst_vd;
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = 1'b1;
+                  vd_index[i] = inst_vd;
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -4413,9 +4452,9 @@
             VWXUNARY0: begin
               case(inst_funct3)
                 OPMVX: begin
-                  uop[i].vd_index = inst_vd;
-                  uop[i].vd_eew   = eew_vd;
-                  uop[i].vd_valid = 1'b1;
+                  vd_index[i] = inst_vd;
+                  vd_eew[i]   = eew_vd;
+                  vd_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -4427,15 +4466,15 @@
                     VMSBF,
                     VMSIF,
                     VMSOF: begin
-                      uop[i].vd_index = inst_vd;
-                      uop[i].vd_eew   = eew_vd;
-                      uop[i].vd_valid = 1'b1;
+                      vd_index[i] = inst_vd;
+                      vd_eew[i]   = eew_vd;
+                      vd_valid[i] = 1'b1;
                     end
                     VIOTA,
                     VID: begin
-                      uop[i].vd_index = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                      uop[i].vd_eew   = eew_vd;
-                      uop[i].vd_valid = 1'b1;
+                      vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                      vd_eew[i]   = eew_vd;
+                      vd_valid[i] = 1'b1;
                     end
                   endcase
                 end
@@ -4449,10 +4488,10 @@
 
   // some uop need vd as the vs3 vector operand
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_VS3_VALID
-      // initial
-      uop[i].vs3_valid = 'b0;
-      
+    // initial
+    vs3_valid = 'b0;
+
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS3_VALID
       case(1'b1)
         valid_opi: begin
           // OPI*
@@ -4462,7 +4501,7 @@
                 OPIVV,
                 OPIVX,
                 OPIVI: begin
-                  uop[i].vs3_valid = 1'b1;
+                  vs3_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -4470,7 +4509,7 @@
               case(inst_funct3)
                 OPIVV,
                 OPIVX: begin
-                  uop[i].vs3_valid = 1'b1;
+                  vs3_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -4482,7 +4521,7 @@
                 OPIVV,
                 OPIVX,
                 OPIVI: begin
-                  uop[i].vs3_valid = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+                  vs3_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
                 end
               endcase
             end
@@ -4491,7 +4530,7 @@
               case(inst_funct3)
                 OPIVV,
                 OPIVX: begin
-                  uop[i].vs3_valid = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+                  vs3_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
                 end
               endcase
             end
@@ -4500,7 +4539,7 @@
               case(inst_funct3)
                 OPIVX,
                 OPIVI: begin
-                  uop[i].vs3_valid = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+                  vs3_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
                 end
               endcase
             end
@@ -4508,7 +4547,7 @@
               case(inst_funct3)
                 OPIVX,
                 OPIVI: begin
-                  uop[i].vs3_valid = 1'b1;
+                  vs3_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -4528,14 +4567,14 @@
               case(inst_funct3)
                 OPMVV,
                 OPMVX: begin
-                  uop[i].vs3_valid = 1'b1;
+                  vs3_valid[i] = 1'b1;
                 end
               endcase
             end
             VWMACCUS: begin
               case(inst_funct3)
                 OPMVX: begin
-                  uop[i].vs3_valid = 1'b1;
+                  vs3_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -4550,7 +4589,7 @@
             VCOMPRESS: begin
               case(inst_funct3)
                 OPMVV: begin
-                  uop[i].vs3_valid = 1'b1;
+                  vs3_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -4562,7 +4601,7 @@
                     VMSIF,
                     VMSOF: begin
                       if (inst_vm==1'b0)
-                        uop[i].vs3_valid = 1'b1;
+                        vs3_valid[i] = 1'b1;
                     end
                   endcase
                 end
@@ -4576,12 +4615,12 @@
   
   // update vs1 
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_VS1
-      // initial
-      uop[i].vs1             = inst_vs1;
-      uop[i].vs1_eew         = eew_vs1;
-      uop[i].vs1_index_valid = 'b0;
+    // initial
+    vs1             = 'b0; 
+    vs1_eew         = 'b0; 
+    vs1_index_valid = 'b0;
       
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS1
       case(inst_funct3)
         OPIVV: begin
           case(funct6_ari.ari_funct6)
@@ -4616,22 +4655,25 @@
             VSSRL,
             VSSRA,
             VRGATHER: begin
-              uop[i].vs1              = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-              uop[i].vs1_index_valid  = 1'b1;   
+              vs1[i]              = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+              vs1_eew[i]          = eew_vs1;
+              vs1_index_valid[i]  = 1'b1;   
             end
             
             VNSRL,
             VNSRA,
             VNCLIPU,
             VNCLIP: begin
-              uop[i].vs1              = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
-              uop[i].vs1_index_valid  = 1'b1;
+              vs1[i]              = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
+              vs1_eew[i]          = eew_vs1;
+              vs1_index_valid[i]  = 1'b1;
             end
             
             VWREDSUMU,
             VWREDSUM: begin
-              uop[i].vs1              = inst_vs1;
-              uop[i].vs1_index_valid  = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;   
+              vs1[i]              = inst_vs1;
+              vs1_eew[i]          = eew_vs1;
+              vs1_index_valid[i]  = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;   
             end        
             
             VSLIDEUP_RGATHEREI16: begin
@@ -4640,14 +4682,16 @@
                 {EMUL2,EMUL2},
                 {EMUL4,EMUL4},
                 {EMUL8,EMUL8}: begin
-                  uop[i].vs1             = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vs1_index_valid = 1'b1;
+                  vs1[i]              = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vs1_eew[i]          = eew_vs1;
+                  vs1_index_valid[i]  = 1'b1;
                 end
                 {EMUL2,EMUL1},
                 {EMUL4,EMUL2},
                 {EMUL8,EMUL4}: begin
-                  uop[i].vs1             = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
-                  uop[i].vs1_index_valid = 1'b1;
+                  vs1[i]              = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
+                  vs1_eew[i]          = eew_vs1;
+                  vs1_index_valid[i]  = 1'b1;
                 end
               endcase
             end
@@ -4670,15 +4714,17 @@
             VWMACCU,
             VWMACC,
             VWMACCSU: begin
-              uop[i].vs1              = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
-              uop[i].vs1_index_valid  = 1'b1;        
+              vs1[i]              = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
+              vs1_eew[i]          = eew_vs1;
+              vs1_index_valid[i]  = 1'b1;        
             end
 
             VXUNARY0,
             VWXUNARY0,
             VMUNARY0: begin
-              uop[i].vs1              = inst_vs1; // vs1 is regarded as opcode
-              uop[i].vs1_index_valid  = 'b0;        
+              vs1[i]              = inst_vs1; // vs1 is regarded as opcode
+              vs1_eew[i]          = eew_vs1;
+              vs1_index_valid[i]  = 'b0;        
             end
 
             VMUL,
@@ -4697,8 +4743,9 @@
             VAADD,
             VASUBU,
             VASUB: begin
-              uop[i].vs1              = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-              uop[i].vs1_index_valid  = 1'b1;        
+              vs1[i]              = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+              vs1_eew[i]          = eew_vs1;
+              vs1_index_valid[i]  = 1'b1;        
             end
 
             // reduction
@@ -4710,8 +4757,9 @@
             VREDAND,
             VREDOR,
             VREDXOR: begin
-              uop[i].vs1              = inst_vs1;
-              uop[i].vs1_index_valid  = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+              vs1[i]              = inst_vs1;
+              vs1_eew[i]          = eew_vs1;
+              vs1_index_valid[i]  = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
             end
 
             VMAND,
@@ -4722,14 +4770,16 @@
             VMNOR,
             VMORN,
             VMXNOR: begin
-              uop[i].vs1              = inst_vs1;
-              uop[i].vs1_index_valid  = 1'b1;
+              vs1[i]              = inst_vs1;
+              vs1_eew[i]          = eew_vs1;
+              vs1_index_valid[i]  = 1'b1;
             end
 
             VCOMPRESS: begin
               if (uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_vstart) begin
-                uop[i].vs1              = inst_vs1;
-                uop[i].vs1_index_valid  = 1'b1;        
+                vs1[i]              = inst_vs1;
+                vs1_eew[i]          = eew_vs1;
+                vs1_index_valid[i]  = 1'b1;        
               end
             end
           endcase
@@ -4740,15 +4790,15 @@
 
   // some uop will use vs1 field as an opcode to decode  
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_VS1_OPCODE
-      // initial
-      uop[i].vs1_opcode_valid         = 'b0;
-      
+    // initial
+    vs1_opcode_valid = 'b0;
+    
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS1_OPCODE
       case(inst_funct3)
         OPIVI: begin
           case(funct6_ari.ari_funct6)
             VSMUL_VMVNRR: begin
-              uop[i].vs1_opcode_valid = 1'b1;   // vmvnrr.v's vs1 opcode is 5'b0, which means vmv1r.v
+              vs1_opcode_valid[i] = 1'b1;   // vmvnrr.v's vs1 opcode is 5'b0, which means vmv1r.v
             end
           endcase
         end
@@ -4756,14 +4806,14 @@
         OPMVV: begin
           case(funct6_ari.ari_funct6)
             VXUNARY0: begin
-              uop[i].vs1_opcode_valid = 1'b1;
+              vs1_opcode_valid[i] = 1'b1;
             end
             VWXUNARY0: begin
               case(vs1_opcode_vwxunary)
                 VCPOP,
                 VFIRST,
                 VMV_X_S: begin
-                  uop[i].vs1_opcode_valid = 1'b1;
+                  vs1_opcode_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -4773,7 +4823,7 @@
                 VMSIF,
                 VMSOF,
                 VIOTA: begin
-                  uop[i].vs1_opcode_valid = 1'b1;
+                  vs1_opcode_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -4785,12 +4835,12 @@
 
   // update vs2 index, eew and valid  
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_VS2
-      // initial
-      uop[i].vs2_index        = 'b0; 
-      uop[i].vs2_eew          = EEW_NONE; 
-      uop[i].vs2_valid        = 'b0; 
+    // initial
+    vs2_index = 'b0; 
+    vs2_eew   = 'b0; 
+    vs2_valid = 'b0; 
       
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2
       case(1'b1)
         valid_opi: begin
           // OPI*
@@ -4822,9 +4872,9 @@
                 OPIVV,
                 OPIVX,
                 OPIVI: begin
-                  uop[i].vs2_index    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vs2_eew      = eew_vs2;
-                  uop[i].vs2_valid    = 1'b1;
+                  vs2_index[i]    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vs2_eew[i]      = eew_vs2;
+                  vs2_valid[i]    = 1'b1;
                 end
               endcase
             end
@@ -4843,9 +4893,9 @@
               case(inst_funct3)
                 OPIVV,
                 OPIVX: begin
-                  uop[i].vs2_index    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vs2_eew      = eew_vs2;
-                  uop[i].vs2_valid    = 1'b1;
+                  vs2_index[i]    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vs2_eew[i]      = eew_vs2;
+                  vs2_valid[i]    = 1'b1;
                 end
               endcase
             end
@@ -4857,9 +4907,9 @@
               case(inst_funct3)
                 OPIVX,
                 OPIVI: begin
-                  uop[i].vs2_index    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vs2_eew      = eew_vs2;
-                  uop[i].vs2_valid    = 1'b1;
+                  vs2_index[i]    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vs2_eew[i]      = eew_vs2;
+                  vs2_valid[i]    = 1'b1;
                 end
               endcase
             end
@@ -4870,9 +4920,9 @@
                 OPIVX,
                 OPIVI: begin
                   if(inst_vm==1'b0) begin
-                    uop[i].vs2_index  = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                    uop[i].vs2_eew    = eew_vs2;
-                    uop[i].vs2_valid  = 1'b1;
+                    vs2_index[i]  = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                    vs2_eew[i]    = eew_vs2;
+                    vs2_valid[i]  = 1'b1;
                   end
                 end
               endcase
@@ -4882,9 +4932,9 @@
             VWREDSUM: begin
               case(inst_funct3)
                 OPIVV: begin
-                  uop[i].vs2_index    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vs2_eew      = eew_vs2;
-                  uop[i].vs2_valid    = 1'b1;
+                  vs2_index[i]    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vs2_eew[i]      = eew_vs2;
+                  vs2_valid[i]    = 1'b1;
                 end
               endcase
             end
@@ -4897,24 +4947,24 @@
                     {EMUL2,EMUL2},
                     {EMUL4,EMUL4},
                     {EMUL8,EMUL8}: begin
-                      uop[i].vs2_index  = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                      uop[i].vs2_eew    = eew_vs2;
-                      uop[i].vs2_valid  = 1'b1;
+                      vs2_index[i]  = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                      vs2_eew[i]    = eew_vs2;
+                      vs2_valid[i]  = 1'b1;
                     end
                     {EMUL2,EMUL1},
                     {EMUL4,EMUL2},
                     {EMUL8,EMUL4}: begin
-                      uop[i].vs2_index  = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
-                      uop[i].vs2_eew    = eew_vs2;
-                      uop[i].vs2_valid  = 1'b1;
+                      vs2_index[i]  = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
+                      vs2_eew[i]    = eew_vs2;
+                      vs2_valid[i]  = 1'b1;
                     end
                   endcase
                 end
                 OPIVX,
                 OPIVI: begin  
-                  uop[i].vs2_index = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vs2_eew   = eew_vs2;
-                  uop[i].vs2_valid = 1'b1;
+                  vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vs2_eew[i]   = eew_vs2;
+                  vs2_valid[i] = 1'b1;
                 end 
               endcase
             end
@@ -4937,9 +4987,9 @@
               case(inst_funct3)
                 OPMVV,
                 OPMVX: begin
-                  uop[i].vs2_index    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
-                  uop[i].vs2_eew      = eew_vs2;
-                  uop[i].vs2_valid    = 1'b1;        
+                  vs2_index[i]    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
+                  vs2_eew[i]      = eew_vs2;
+                  vs2_valid[i]    = 1'b1;        
                 end
               endcase
             end
@@ -4967,9 +5017,9 @@
               case(inst_funct3)
                 OPMVV,
                 OPMVX: begin
-                  uop[i].vs2_index    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vs2_eew      = eew_vs2;
-                  uop[i].vs2_valid    = 1'b1;        
+                  vs2_index[i]    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vs2_eew[i]      = eew_vs2;
+                  vs2_valid[i]    = 1'b1;        
                 end
               endcase
             end
@@ -4981,20 +5031,20 @@
                     {EMUL1,EMUL1},
                     {EMUL2,EMUL1},
                     {EMUL4,EMUL1}: begin
-                      uop[i].vs2_index    = inst_vs2;
-                      uop[i].vs2_eew      = eew_vs2;
-                      uop[i].vs2_valid    = 1'b1;
+                      vs2_index[i]    = inst_vs2;
+                      vs2_eew[i]      = eew_vs2;
+                      vs2_valid[i]    = 1'b1;
                     end
                     {EMUL4,EMUL2},
                     {EMUL8,EMUL4}: begin
-                      uop[i].vs2_index    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
-                      uop[i].vs2_eew      = eew_vs2;
-                      uop[i].vs2_valid    = 1'b1;
+                      vs2_index[i]    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
+                      vs2_eew[i]      = eew_vs2;
+                      vs2_valid[i]    = 1'b1;
                     end
                     {EMUL8,EMUL2}: begin
-                      uop[i].vs2_index    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:2];
-                      uop[i].vs2_eew      = eew_vs2;
-                      uop[i].vs2_valid    = 1'b1;
+                      vs2_index[i]    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:2];
+                      vs2_eew[i]      = eew_vs2;
+                      vs2_valid[i]    = 1'b1;
                     end
                   endcase
                 end
@@ -5004,9 +5054,9 @@
             VWMACCUS: begin
               case(inst_funct3)
                 OPMVX: begin
-                  uop[i].vs2_index    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
-                  uop[i].vs2_eew      = eew_vs2;
-                  uop[i].vs2_valid    = 1'b1;        
+                  vs2_index[i]    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
+                  vs2_eew[i]      = eew_vs2;
+                  vs2_valid[i]    = 1'b1;        
                 end
               endcase
             end
@@ -5023,9 +5073,9 @@
             VCOMPRESS: begin
               case(inst_funct3)
                 OPMVV: begin
-                  uop[i].vs2_index    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vs2_eew      = eew_vs2;
-                  uop[i].vs2_valid    = 1'b1;   
+                  vs2_index[i]    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vs2_eew[i]      = eew_vs2;
+                  vs2_valid[i]    = 1'b1;   
                 end
               endcase
             end
@@ -5040,9 +5090,9 @@
             VMXNOR: begin
               case(inst_funct3)
                 OPMVV: begin
-                  uop[i].vs2_index    = inst_vs2;
-                  uop[i].vs2_eew      = eew_vs2;
-                  uop[i].vs2_valid    = 1'b1;   
+                  vs2_index[i]    = inst_vs2;
+                  vs2_eew[i]      = eew_vs2;
+                  vs2_valid[i]    = 1'b1;   
                 end
               endcase
             end
@@ -5055,9 +5105,9 @@
                     VMSIF,
                     VMSOF,
                     VIOTA: begin
-                      uop[i].vs2_index    = inst_vs2;
-                      uop[i].vs2_eew      = eew_vs2;
-                      uop[i].vs2_valid    = 1'b1;   
+                      vs2_index[i]    = inst_vs2;
+                      vs2_eew[i]      = eew_vs2;
+                      vs2_valid[i]    = 1'b1;   
                     end
                   endcase
                 end
@@ -5068,9 +5118,9 @@
             VSLIDE1DOWN: begin
               case(inst_funct3)
                 OPMVX: begin
-                  uop[i].vs2_index    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
-                  uop[i].vs2_eew      = eew_vs2;
-                  uop[i].vs2_valid    = 1'b1;        
+                  vs2_index[i]    = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vs2_eew[i]      = eew_vs2;
+                  vs2_valid[i]    = 1'b1;        
                 end
               endcase
             end
@@ -5082,11 +5132,11 @@
 
   // update rd_index and valid
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_RD
-      // initial
-      uop[i].rd_index         = 'b0;
-      uop[i].rd_index_valid   = 'b0;
+    // initial
+    rd_index       = 'b0;
+    rd_index_valid = 'b0;
      
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_RD
       case(funct6_ari.ari_funct6)
         VWXUNARY0: begin
           case(inst_funct3)
@@ -5095,8 +5145,8 @@
                 VCPOP,
                 VFIRST,
                 VMV_X_S: begin
-                  uop[i].rd_index         = inst_rd;
-                  uop[i].rd_index_valid   = 1'b1;
+                  rd_index[i]         = inst_rd;
+                  rd_index_valid[i]   = 1'b1;
                 end
               endcase
             end
@@ -5108,11 +5158,11 @@
 
   // update rs1_data and rs1_data_valid 
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_RS1
-      // initial
-      uop[i].rs1_data         = 'b0;
-      uop[i].rs1_data_valid   = 'b0;
+    // initial
+    rs1_data       = 'b0;
+    rs1_data_valid = 'b0;
       
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_RS1
       case(1'b1)
         valid_opi: begin
           // OPI*
@@ -5135,12 +5185,12 @@
             VSADD: begin
               case(inst_funct3)
                 OPIVX: begin
-                  uop[i].rs1_data       = rs1_data;
-                  uop[i].rs1_data_valid = 1'b1;
+                  rs1_data[i]       = rs1;
+                  rs1_data_valid[i] = 1'b1;
                 end
                 OPIVI: begin
-                  uop[i].rs1_data       = {{(`XLEN-`IMM_WIDTH){inst_imm[`IMM_WIDTH-1]}},inst_imm[`IMM_WIDTH-1:0]};
-                  uop[i].rs1_data_valid = 1'b1;
+                  rs1_data[i]       = {{(`XLEN-`IMM_WIDTH){inst_imm[`IMM_WIDTH-1]}},inst_imm[`IMM_WIDTH-1:0]};
+                  rs1_data_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -5159,8 +5209,8 @@
             VSMUL_VMVNRR: begin
               case(inst_funct3)
                 OPIVX: begin
-                  uop[i].rs1_data       = rs1_data;
-                  uop[i].rs1_data_valid = 1'b1;
+                  rs1_data[i]       = rs1;
+                  rs1_data_valid[i] = 1'b1;
                 end
               endcase
             end  
@@ -5179,12 +5229,12 @@
             VRGATHER: begin
               case(inst_funct3)
                 OPIVX: begin
-                  uop[i].rs1_data       = rs1_data;
-                  uop[i].rs1_data_valid = 1'b1;
+                  rs1_data[i]       = rs1;
+                  rs1_data_valid[i] = 1'b1;
                 end
                 OPIVI: begin
-                  uop[i].rs1_data       = {{(`XLEN-`IMM_WIDTH){1'b0}},inst_imm[`IMM_WIDTH-1:0]};
-                  uop[i].rs1_data_valid = 1'b1;
+                  rs1_data[i]       = {{(`XLEN-`IMM_WIDTH){1'b0}},inst_imm[`IMM_WIDTH-1:0]};
+                  rs1_data_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -5230,8 +5280,8 @@
             VSLIDE1DOWN: begin
               case(inst_funct3)
                 OPMVX: begin
-                  uop[i].rs1_data       = rs1_data;
-                  uop[i].rs1_data_valid = 1'b1;
+                  rs1_data[i]       = rs1;
+                  rs1_data_valid[i] = 1'b1;
                 end
               endcase
             end
@@ -5243,22 +5293,25 @@
 
   // update first_uop valid
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_FIRST
-      uop[i].first_uop_valid = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_vstart;
+    // initial 
+    first_uop_valid = 'b0;
+    
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_FIRST
+      first_uop_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_vstart;
 
       case(1'b1)
         valid_opi: begin
           case(funct6_ari.ari_funct6)
             VSLIDEUP_RGATHEREI16,
             VRGATHER: begin
-              uop[i].first_uop_valid = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == 'b0;
+              first_uop_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == 'b0;
             end
           endcase
         end
         valid_opm: begin
           case(funct6_ari.ari_funct6)
             VSLIDE1UP: begin
-              uop[i].first_uop_valid = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == 'b0;
+              first_uop_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == 'b0;
             end
           endcase
         end
@@ -5268,22 +5321,63 @@
 
   // update last_uop valid
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_LAST
-      uop[i].last_uop_valid = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_LAST
+      last_uop_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
     end
   end
 
   // update uop index
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: ASSIGN_UOP_INDEX
-      uop[i].uop_index = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: ASSIGN_UOP_INDEX
+      uop_index[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+    end
+  end
+  
+  // update segment_index
+  always_comb begin
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: ASSIGN_SEG_INDEX
+      seg_field_index[i] = 'b0;
     end
   end
 
-  // update segment_index
-  always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: ASSIGN_SEG_INDEX
-      uop[i].seg_field_index = 'b0;
+  // assign result to output
+  generate
+    for(j=0;j<`NUM_DE_UOP;j++) begin: ASSIGN_RES
+    `ifdef TB_SUPPORT
+      assign uop[j].uop_pc              = uop_pc[j];
+    `endif  
+      assign uop[j].uop_funct3          = uop_funct3[j];
+      assign uop[j].uop_funct6          = uop_funct6[j];
+      assign uop[j].uop_exe_unit        = uop_exe_unit[j]; 
+      assign uop[j].uop_class           = uop_class[j];   
+      assign uop[j].vector_csr          = vector_csr[j];  
+      assign uop[j].vs_evl              = vs_evl[j];            
+      assign uop[j].ignore_vma          = ignore_vma[j];
+      assign uop[j].ignore_vta          = ignore_vta[j];
+      assign uop[j].force_vma_agnostic  = force_vma_agnostic[j];
+      assign uop[j].force_vta_agnostic  = force_vta_agnostic[j];
+      assign uop[j].vm                  = vm[j];                
+      assign uop[j].v0_valid            = v0_valid[j];          
+      assign uop[j].vd_index            = vd_index[j];          
+      assign uop[j].vd_eew              = vd_eew[j];  
+      assign uop[j].vd_valid            = vd_valid[j];
+      assign uop[j].vs3_valid           = vs3_valid[j];         
+      assign uop[j].vs1                 = vs1[j];              
+      assign uop[j].vs1_eew             = vs1_eew[j];           
+      assign uop[j].vs1_index_valid     = vs1_index_valid[j];
+      assign uop[j].vs1_opcode_valid    = vs1_opcode_valid[j];
+      assign uop[j].vs2_index 	        = vs2_index[j]; 	       
+      assign uop[j].vs2_eew             = vs2_eew[j];
+      assign uop[j].vs2_valid           = vs2_valid[j];
+      assign uop[j].rd_index 	          = rd_index[j]; 	       
+      assign uop[j].rd_index_valid      = rd_index_valid[j]; 
+      assign uop[j].rs1_data            = rs1_data[j];           
+      assign uop[j].rs1_data_valid      = rs1_data_valid[j];    
+      assign uop[j].uop_index           = uop_index[j];         
+      assign uop[j].first_uop_valid     = first_uop_valid[j];   
+      assign uop[j].last_uop_valid      = last_uop_valid[j];    
+      assign uop[j].seg_field_index     = seg_field_index[j];   
     end
-  end
+  endgenerate
+
 endmodule
diff --git a/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv b/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv
index c25b52d..4d20d9f 100644
--- a/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv
@@ -40,7 +40,7 @@
   RVVConfigState                                  vector_csr_lsu;
   logic   [`VSTART_WIDTH-1:0]                     csr_vstart;
   logic   [`VL_WIDTH-1:0]                         csr_vl;
-  logic   [`VL_WIDTH-1:0]                         vs_evl;
+  logic   [`VL_WIDTH-1:0]                         evl;
   RVVSEW                                          csr_sew;
   RVVLMUL                                         csr_lmul;
   EMUL_e                                          emul_vd;          
@@ -76,7 +76,43 @@
   logic   [`UOP_INDEX_WIDTH-1:0]                  uop_index_max;         
    
   // convert logic to enum/union
-  FUNCT6_u                                        uop_funct6;
+  FUNCT6_u                                        funct6_lsu;
+
+  // result
+`ifdef TB_SUPPORT
+  logic   [`NUM_DE_UOP-1:0][`PC_WIDTH-1:0]            uop_pc;
+`endif
+  logic   [`NUM_DE_UOP-1:0][`FUNCT3_WIDTH-1:0]        uop_funct3;
+  FUNCT6_u        [`NUM_DE_UOP-1:0]                   uop_funct6;
+  EXE_UNIT_e      [`NUM_DE_UOP-1:0]                   uop_exe_unit; 
+  UOP_CLASS_e     [`NUM_DE_UOP-1:0]                   uop_class;   
+  RVVConfigState  [`NUM_DE_UOP-1:0]                   vector_csr;  
+  logic   [`NUM_DE_UOP-1:0][`VL_WIDTH-1:0]            vs_evl;             
+  logic   [`NUM_DE_UOP-1:0]                           ignore_vma;
+  logic   [`NUM_DE_UOP-1:0]                           ignore_vta;
+  logic   [`NUM_DE_UOP-1:0]                           force_vma_agnostic; 
+  logic   [`NUM_DE_UOP-1:0]                           force_vta_agnostic; 
+  logic   [`NUM_DE_UOP-1:0]                           vm;                 
+  logic   [`NUM_DE_UOP-1:0]                           v0_valid;           
+  logic   [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vd_index;           
+  EEW_e   [`NUM_DE_UOP-1:0]                           vd_eew;  
+  logic   [`NUM_DE_UOP-1:0]                           vd_valid;
+  logic   [`NUM_DE_UOP-1:0]                           vs3_valid;          
+  logic   [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs1;              
+  EEW_e   [`NUM_DE_UOP-1:0]                           vs1_eew;            
+  logic   [`NUM_DE_UOP-1:0]                           vs1_index_valid;
+  logic   [`NUM_DE_UOP-1:0]                           vs1_opcode_valid;
+  logic   [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs2_index; 	        
+  EEW_e   [`NUM_DE_UOP-1:0]                           vs2_eew;
+  logic   [`NUM_DE_UOP-1:0]                           vs2_valid;
+  logic   [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] rd_index; 	        
+  logic   [`NUM_DE_UOP-1:0]                           rd_index_valid; 
+  logic   [`NUM_DE_UOP-1:0][`XLEN-1:0] 	              rs1_data;           
+  logic   [`NUM_DE_UOP-1:0]     	                    rs1_data_valid;     
+  logic   [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-1:0]     uop_index;          
+  logic   [`NUM_DE_UOP-1:0]                           first_uop_valid;    
+  logic   [`NUM_DE_UOP-1:0]                           last_uop_valid;     
+  logic   [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-2:0]     seg_field_index;
 
   // use for for-loop 
   genvar                                          j;
@@ -109,70 +145,69 @@
 
   // identify load or store
   always_comb begin
-    uop_funct6.lsu_funct6.lsu_is_store = IS_LOAD;
+    funct6_lsu.lsu_funct6.lsu_is_store = IS_LOAD;
     valid_lsu_opcode                   = 'b0;
 
     case(inst_opcode)
       LOAD: begin
-        uop_funct6.lsu_funct6.lsu_is_store = IS_LOAD;
+        funct6_lsu.lsu_funct6.lsu_is_store = IS_LOAD;
         valid_lsu_opcode                   = 1'b1;
       end
       STORE: begin
-        uop_funct6.lsu_funct6.lsu_is_store = IS_STORE;
+        funct6_lsu.lsu_funct6.lsu_is_store = IS_STORE;
         valid_lsu_opcode                   = 1'b1;
       end
     endcase
-  end
 
   // lsu_mop distinguishes unit-stride, constant-stride, unordered index, ordered index
   // lsu_umop identifies what unit-stride instruction belong to when lsu_mop=US
-  always_comb begin
-    uop_funct6.lsu_funct6.lsu_mop    = US;
-    uop_funct6.lsu_funct6.lsu_umop   = US_US;
-    uop_funct6.lsu_funct6.lsu_is_seg = NONE;
+    // initial 
+    funct6_lsu.lsu_funct6.lsu_mop    = US;
+    funct6_lsu.lsu_funct6.lsu_umop   = US_US;
+    funct6_lsu.lsu_funct6.lsu_is_seg = NONE;
     valid_lsu_mop                    = 'b0;
     
     case(inst_funct6[2:0])
       UNIT_STRIDE: begin
         case(inst_umop)
           US_REGULAR: begin          
-            uop_funct6.lsu_funct6.lsu_mop    = US;
-            uop_funct6.lsu_funct6.lsu_umop   = US_US;
+            funct6_lsu.lsu_funct6.lsu_mop    = US;
+            funct6_lsu.lsu_funct6.lsu_umop   = US_US;
             valid_lsu_mop                    = 1'b1;
-            uop_funct6.lsu_funct6.lsu_is_seg = (inst_nf!=NF1) ? IS_SEGMENT : NONE;
+            funct6_lsu.lsu_funct6.lsu_is_seg = (inst_nf!=NF1) ? IS_SEGMENT : NONE;
           end
           US_WHOLE_REGISTER: begin
-            uop_funct6.lsu_funct6.lsu_mop    = US;
-            uop_funct6.lsu_funct6.lsu_umop   = US_WR;
+            funct6_lsu.lsu_funct6.lsu_mop    = US;
+            funct6_lsu.lsu_funct6.lsu_umop   = US_WR;
             valid_lsu_mop                    = 1'b1;
           end
           US_MASK: begin
-            uop_funct6.lsu_funct6.lsu_mop    = US;
-            uop_funct6.lsu_funct6.lsu_umop   = US_MK;
+            funct6_lsu.lsu_funct6.lsu_mop    = US;
+            funct6_lsu.lsu_funct6.lsu_umop   = US_MK;
             valid_lsu_mop                    = 1'b1;
           end
           US_FAULT_FIRST: begin
-            uop_funct6.lsu_funct6.lsu_mop    = US;
-            uop_funct6.lsu_funct6.lsu_umop   = US_FF;
+            funct6_lsu.lsu_funct6.lsu_mop    = US;
+            funct6_lsu.lsu_funct6.lsu_umop   = US_FF;
             valid_lsu_mop                    = 1'b1;
-            uop_funct6.lsu_funct6.lsu_is_seg = (inst_nf!=NF1) ? IS_SEGMENT : NONE;
+            funct6_lsu.lsu_funct6.lsu_is_seg = (inst_nf!=NF1) ? IS_SEGMENT : NONE;
           end
         endcase
       end
       UNORDERED_INDEX: begin
-        uop_funct6.lsu_funct6.lsu_mop    = IU;
+        funct6_lsu.lsu_funct6.lsu_mop    = IU;
         valid_lsu_mop                    = 1'b1;
-        uop_funct6.lsu_funct6.lsu_is_seg = (inst_nf!=NF1) ? IS_SEGMENT : NONE;
+        funct6_lsu.lsu_funct6.lsu_is_seg = (inst_nf!=NF1) ? IS_SEGMENT : NONE;
       end
       CONSTANT_STRIDE: begin
-        uop_funct6.lsu_funct6.lsu_mop    = CS;
+        funct6_lsu.lsu_funct6.lsu_mop    = CS;
         valid_lsu_mop                    = 1'b1;
-        uop_funct6.lsu_funct6.lsu_is_seg = (inst_nf!=NF1) ? IS_SEGMENT : NONE;
+        funct6_lsu.lsu_funct6.lsu_is_seg = (inst_nf!=NF1) ? IS_SEGMENT : NONE;
       end
       ORDERED_INDEX: begin
-        uop_funct6.lsu_funct6.lsu_mop    = IO;
+        funct6_lsu.lsu_funct6.lsu_mop    = IO;
         valid_lsu_mop                    = 1'b1;
-        uop_funct6.lsu_funct6.lsu_is_seg = (inst_nf!=NF1) ? IS_SEGMENT : NONE;
+        funct6_lsu.lsu_funct6.lsu_is_seg = (inst_nf!=NF1) ? IS_SEGMENT : NONE;
       end
     endcase
   end
@@ -187,9 +222,9 @@
     emul_max        = EMUL_NONE;
 
     if (valid_lsu) begin  
-      case(uop_funct6.lsu_funct6.lsu_mop)
+      case(funct6_lsu.lsu_funct6.lsu_mop)
         US: begin
-          case(uop_funct6.lsu_funct6.lsu_umop)
+          case(funct6_lsu.lsu_funct6.lsu_umop)
             US_US,
             US_FF: begin
               case(inst_nf)
@@ -2587,9 +2622,9 @@
     eew_max = EEW_NONE;  
 
     if (valid_lsu) begin  
-      case(uop_funct6.lsu_funct6.lsu_mop)
+      case(funct6_lsu.lsu_funct6.lsu_mop)
         US: begin
-          case(uop_funct6.lsu_funct6.lsu_umop)
+          case(funct6_lsu.lsu_funct6.lsu_umop)
             US_US,
             US_WR,
             US_FF: begin
@@ -2921,7 +2956,7 @@
 
   // get evl
   always_comb begin
-    vs_evl = csr_vl;
+    evl = csr_vl;
     
     case(inst_funct6[2:0])
       UNIT_STRIDE: begin
@@ -2932,52 +2967,52 @@
               EMUL1: begin
                 case(eew_max)
                   EEW8: begin
-                    vs_evl = 1*`VLEN/8;
+                    evl = 1*`VLEN/8;
                   end
                   EEW16: begin
-                    vs_evl = 1*`VLEN/16;
+                    evl = 1*`VLEN/16;
                   end
                   EEW32: begin
-                    vs_evl = 1*`VLEN/32;
+                    evl = 1*`VLEN/32;
                   end
                 endcase
               end
               EMUL2: begin
                 case(eew_max)
                   EEW8: begin
-                    vs_evl = 2*`VLEN/8;
+                    evl = 2*`VLEN/8;
                   end
                   EEW16: begin
-                    vs_evl = 2*`VLEN/16;
+                    evl = 2*`VLEN/16;
                   end
                   EEW32: begin
-                    vs_evl = 2*`VLEN/32;
+                    evl = 2*`VLEN/32;
                   end
                 endcase
               end
               EMUL4: begin
                 case(eew_max)
                   EEW8: begin
-                    vs_evl = 4*`VLEN/8;
+                    evl = 4*`VLEN/8;
                   end
                   EEW16: begin
-                    vs_evl = 4*`VLEN/16;
+                    evl = 4*`VLEN/16;
                   end
                   EEW32: begin
-                    vs_evl = 4*`VLEN/32;
+                    evl = 4*`VLEN/32;
                   end
                 endcase
               end
               EMUL8: begin
                 case(eew_max)
                   EEW8: begin
-                    vs_evl = 8*`VLEN/8;
+                    evl = 8*`VLEN/8;
                   end
                   EEW16: begin
-                    vs_evl = 8*`VLEN/16;
+                    evl = 8*`VLEN/16;
                   end
                   EEW32: begin
-                    vs_evl = 8*`VLEN/32;
+                    evl = 8*`VLEN/32;
                   end
                 endcase
               end
@@ -2985,7 +3020,7 @@
           end
           US_MASK: begin       
             // evl = ceil(vl/8)
-            vs_evl = {3'b0,csr_vl[`VL_WIDTH-1:3]} + (csr_vl[2:0]!='b0);
+            evl = {3'b0,csr_vl[`VL_WIDTH-1:3]} + (csr_vl[2:0]!='b0);
           end
         endcase
       end
@@ -2993,10 +3028,10 @@
   end
   
   // check evl is not 0
-  assign check_evl_not_0 = vs_evl!='b0;
+  assign check_evl_not_0 = evl!='b0;
 
   // check vstart < evl
-  assign check_vstart_sle_evl = {1'b0,csr_vstart} < vs_evl;
+  assign check_vstart_sle_evl = {1'b0,csr_vstart} < evl;
 
   `ifdef ASSERT_ON
     `ifdef TB_SUPPORT
@@ -3012,9 +3047,11 @@
   assign uop_index_base = uop_index_remain;
 
   // calculate the uop_index used in decoding uops 
-  for(j=0;j<`NUM_DE_UOP;j=j+1) begin: GET_UOP_INDEX
-    assign uop_index_current[j] = j[`UOP_INDEX_WIDTH-1:0]+uop_index_base;
-  end
+  generate
+    for(j=0;j<`NUM_DE_UOP;j++) begin: GET_UOP_INDEX
+      assign uop_index_current[j] = j[`UOP_INDEX_WIDTH-1:0]+uop_index_base;
+    end
+  endgenerate
 
 //
 // split instruction to uops
@@ -3053,7 +3090,7 @@
 
   // generate uop valid
   always_comb begin        
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_VALID
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_VALID
       if ((uop_index_current[i]<={1'b0,uop_index_max})&valid_lsu) 
         uop_valid[i]  = inst_encoding_correct;
       else
@@ -3064,49 +3101,49 @@
 `ifdef TB_SUPPORT
   // assign uop pc
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_PC
-      uop[i].uop_pc = inst.inst_pc;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_PC
+      uop_pc[i] = inst.inst_pc;
     end
   end
 `endif
 
   // update uop funct3
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_FUNCT3
-      uop[i].uop_funct3 = inst_funct3;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_FUNCT3
+      uop_funct3[i] = inst_funct3;
     end
   end
 
   // update uop funct6
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_FUNCT6
-      uop[i].uop_funct6 = uop_funct6;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_FUNCT6
+      uop_funct6[i] = funct6_lsu;
     end
   end
 
   // allocate uop to execution unit
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_EXE_UNIT
-      uop[i].uop_exe_unit = LSU;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_EXE_UNIT
+      uop_exe_unit[i] = LSU;
     end
   end
 
   // update uop class
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_CLASS
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_CLASS
       // initial 
-      uop[i].uop_class = XXX;
+      uop_class[i] = XXX;
       
       case(inst_opcode) 
         LOAD:begin
           case(inst_funct6[2:0])
             UNIT_STRIDE,
             CONSTANT_STRIDE: begin
-              uop[i].uop_class = XXX;
+              uop_class[i] = XXX;
             end
             UNORDERED_INDEX,
             ORDERED_INDEX: begin
-              uop[i].uop_class = XVX;
+              uop_class[i] = XVX;
             end
           endcase
         end
@@ -3115,11 +3152,11 @@
           case(inst_funct6[2:0])
             UNIT_STRIDE,
             CONSTANT_STRIDE: begin
-              uop[i].uop_class = VXX;
+              uop_class[i] = VXX;
             end
             UNORDERED_INDEX,
             ORDERED_INDEX: begin
-              uop[i].uop_class = VVX;
+              uop_class[i] = VVX;
             end
           endcase
         end
@@ -3129,22 +3166,23 @@
 
   // update vector_csr and vstart
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_VCSR
-      uop[i].vector_csr = vector_csr_lsu;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_VCSR
+      // initial 
+      vector_csr[i] = vector_csr_lsu;
 
       // update vstart of every uop
-      if(uop_funct6.lsu_funct6.lsu_is_seg!=IS_SEGMENT) begin
+      if(funct6_lsu.lsu_funct6.lsu_is_seg!=IS_SEGMENT) begin
         case(eew_max)
           EEW8: begin
-            uop[i].vector_csr.vstart  = {uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLENB)){1'b0}}}<csr_vstart ? csr_vstart : 
+            vector_csr[i].vstart  = {uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLENB)){1'b0}}}<csr_vstart ? csr_vstart : 
                                         {uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLENB)){1'b0}}};
           end
           EEW16: begin
-            uop[i].vector_csr.vstart  = {1'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`HWORD_WIDTH)){1'b0}}}<csr_vstart ? csr_vstart : 
+            vector_csr[i].vstart  = {1'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`HWORD_WIDTH)){1'b0}}}<csr_vstart ? csr_vstart : 
                                         {1'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`HWORD_WIDTH)){1'b0}}};
           end
           EEW32: begin
-            uop[i].vector_csr.vstart  = {2'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`WORD_WIDTH)){1'b0}}}<csr_vstart ? csr_vstart : 
+            vector_csr[i].vstart  = {2'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`WORD_WIDTH)){1'b0}}}<csr_vstart ? csr_vstart : 
                                         {2'b0,uop_index_current[i][`UOP_INDEX_WIDTH-1:0],{($clog2(`VLEN/`WORD_WIDTH)){1'b0}}};
           end
         endcase
@@ -3154,31 +3192,31 @@
   
   // update vs_evl
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_EVL
-      uop[i].vs_evl = vs_evl;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_EVL
+      vs_evl[i] = evl;
     end
   end
 
   // update ignore_vma and ignore_vta
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_IGNORE
-      uop[i].ignore_vma = 'b0;
-      uop[i].ignore_vta = 'b0;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_IGNORE
+      ignore_vma[i] = 'b0;
+      ignore_vta[i] = 'b0;
     end
   end
 
   // update force_vma_agnostic
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_FORCE_VMA
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_FORCE_VMA
       //When source and destination registers overlap and have different EEW, the instruction is mask- and tail-agnostic.
-      uop[i].force_vma_agnostic = (check_vd_overlap_vs2==1'b0)&(eew_vd!=eew_vs2)&(eew_vd!=EEW_NONE)&(eew_vs2!=EEW_NONE);
+      force_vma_agnostic[i] = (check_vd_overlap_vs2==1'b0)&(eew_vd!=eew_vs2)&(eew_vd!=EEW_NONE)&(eew_vs2!=EEW_NONE);
     end
   end
 
   // update force_vta_agnostic
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_FORCE_VTA
-      uop[i].force_vta_agnostic = (eew_vd==EEW1) |   // Mask destination tail elements are always treated as tail-agnostic
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_FORCE_VTA
+      force_vta_agnostic[i] = (eew_vd==EEW1) |   // Mask destination tail elements are always treated as tail-agnostic
       //When source and destination registers overlap and have different EEW, the instruction is mask- and tail-agnostic.
                                   ((check_vd_overlap_vs2==1'b0)&(eew_vd!=eew_vs2)&(eew_vd!=EEW_NONE)&(eew_vs2!=EEW_NONE));
     end
@@ -3186,24 +3224,24 @@
 
   // update vm field
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_VM
-      uop[i].vm = inst_vm;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_VM
+      vm[i] = inst_vm;
     end
   end
   
   // some uop need v0 as the vector operand
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_V0
-      uop[i].v0_valid = 'b1;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_V0
+      v0_valid[i] = 'b1;
     end
   end
 
   // update vd_index and eew 
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_VD
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD
       // initial
-      uop[i].vd_index = 'b0;
-      uop[i].vd_eew   = eew_vd;
+      vd_index[i] = 'b0;
+      vd_eew[i]   = eew_vd;
 
       case(inst_funct6[2:0])
         UNIT_STRIDE: begin
@@ -3211,16 +3249,16 @@
             US_REGULAR,          
             US_FAULT_FIRST,
             US_WHOLE_REGISTER: begin
-              uop[i].vd_index = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+              vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
             end
             US_MASK: begin
-              uop[i].vd_index = inst_vd;
+              vd_index[i] = inst_vd;
             end
           endcase
         end
 
         CONSTANT_STRIDE: begin
-          uop[i].vd_index = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+          vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
         end
         
         UNORDERED_INDEX,
@@ -3235,7 +3273,7 @@
             {SEW_16,SEW32},
             // 1:4
             {SEW_8,SEW32}: begin            
-              uop[i].vd_index = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+              vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
             end
             // 2:1
             {SEW_16,SEW8},
@@ -3244,16 +3282,16 @@
             {SEW_32,SEW8}: begin            
               case({emul_vs2,emul_vd})
                 {EMUL1,EMUL1}: begin
-                  uop[i].vd_index = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+                  vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
                 end
                 {EMUL2,EMUL1},
                 {EMUL4,EMUL2},
                 {EMUL8,EMUL4}: begin
-                  uop[i].vd_index = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
+                  vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
                 end
                 {EMUL4,EMUL1},
                 {EMUL8,EMUL2}: begin
-                  uop[i].vd_index = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:2];
+                  vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:2];
                 end
               endcase
             end
@@ -3266,43 +3304,43 @@
   // update vd_valid and vs3_valid
   // some uop need vd as the vs3 vector operand
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_VD_VS3_VALID
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD_VS3_VALID
       // initial
-      uop[i].vs3_valid = 'b0;
-      uop[i].vd_valid  = 'b0;
+      vs3_valid[i] = 'b0;
+      vd_valid[i]  = 'b0;
 
       if(inst_opcode==STORE)
-        uop[i].vs3_valid = 1'b1;
+        vs3_valid[i] = 1'b1;
       else
-        uop[i].vd_valid  = 1'b1;
+        vd_valid[i]  = 1'b1;
     end
   end
 
   // update vs1 
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_VS1
-      uop[i].vs1             = 'b0;
-      uop[i].vs1_eew         = EEW_NONE;
-      uop[i].vs1_index_valid = 'b0;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS1
+      vs1[i]             = 'b0;
+      vs1_eew[i]         = EEW_NONE;
+      vs1_index_valid[i] = 'b0;
     end
   end
 
   // some uop will use vs1 field as an opcode to decode  
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_VS1_OPCODE
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS1_OPCODE
       // initial
-      uop[i].vs1_opcode_valid = 'b0;
+      vs1_opcode_valid[i] = 'b0;
     end
   end
 
   // update vs2 index, eew and valid  
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_VS2
-      // initial
-      uop[i].vs2_index        = 'b0; 
-      uop[i].vs2_eew          = eew_vs2; 
-      uop[i].vs2_valid        = 'b0; 
+    // initial
+    vs2_index = 'b0; 
+    vs2_eew   = 'b0; 
+    vs2_valid = 'b0; 
     
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2
       case(inst_funct6[2:0])
         UNORDERED_INDEX,
         ORDERED_INDEX: begin
@@ -3318,40 +3356,45 @@
             {SEW_32,SEW8}: begin    
               case(emul_vs2)
                 EMUL1: begin
-                  uop[i].vs2_index = inst_vs2;
-                  uop[i].vs2_valid = 1'b1; 
+                  vs2_index[i] = inst_vs2;
+                  vs2_eew[i]   = eew_vs2; 
+                  vs2_valid[i] = 1'b1; 
                 end
                 EMUL2: begin
-                  uop[i].vs2_index = inst_vs2+uop_index_current[i][0];
-                  uop[i].vs2_valid = 1'b1; 
+                  vs2_index[i] = inst_vs2+uop_index_current[i][0];
+                  vs2_eew[i]   = eew_vs2; 
+                  vs2_valid[i] = 1'b1; 
                 end
                 EMUL4: begin
-                  uop[i].vs2_index = inst_vs2+uop_index_current[i][1:0];
-                  uop[i].vs2_valid = 1'b1; 
+                  vs2_index[i] = inst_vs2+uop_index_current[i][1:0];
+                  vs2_eew[i]   = eew_vs2; 
+                  vs2_valid[i] = 1'b1; 
                 end
                 EMUL8: begin
-                  uop[i].vs2_index = inst_vs2+uop_index_current[i][2:0];
-                  uop[i].vs2_valid = 1'b1; 
+                  vs2_index[i] = inst_vs2+uop_index_current[i][2:0];
+                  vs2_eew[i]   = eew_vs2; 
+                  vs2_valid[i] = 1'b1; 
                 end
               endcase
-              //uop[i].vs2_index = inst_vs2+uop_index_current[i];
-              //uop[i].vs2_valid = 1'b1; 
             end
             // 1:2
             {SEW_8,SEW16},
             {SEW_16,SEW32}: begin
               case(emul_vs2)
                 EMUL1: begin
-                  uop[i].vs2_index = inst_vs2;
-                  uop[i].vs2_valid = 1'b1; 
+                  vs2_index[i] = inst_vs2;
+                  vs2_eew[i]   = eew_vs2; 
+                  vs2_valid[i] = 1'b1; 
                 end
                 EMUL2: begin
-                  uop[i].vs2_index = inst_vs2+uop_index_current[i][1];
-                  uop[i].vs2_valid = 1'b1; 
+                  vs2_index[i] = inst_vs2+uop_index_current[i][1];
+                  vs2_eew[i]   = eew_vs2; 
+                  vs2_valid[i] = 1'b1; 
                 end
                 EMUL4: begin
-                  uop[i].vs2_index = inst_vs2+uop_index_current[i][2:1];
-                  uop[i].vs2_valid = 1'b1; 
+                  vs2_index[i] = inst_vs2+uop_index_current[i][2:1];
+                  vs2_eew[i]   = eew_vs2; 
+                  vs2_valid[i] = 1'b1; 
                 end
               endcase
             end
@@ -3359,12 +3402,14 @@
             {SEW_8,SEW32}: begin     
               case(emul_vs2)
                 EMUL1: begin
-                  uop[i].vs2_index = inst_vs2;
-                  uop[i].vs2_valid = 1'b1; 
+                  vs2_index[i] = inst_vs2;
+                  vs2_eew[i]   = eew_vs2; 
+                  vs2_valid[i] = 1'b1; 
                 end
                 EMUL2: begin
-                  uop[i].vs2_index = inst_vs2+uop_index_current[i][2];
-                  uop[i].vs2_valid = 1'b1; 
+                  vs2_index[i] = inst_vs2+uop_index_current[i][2];
+                  vs2_eew[i]   = eew_vs2; 
+                  vs2_valid[i] = 1'b1; 
                 end
               endcase
             end
@@ -3376,56 +3421,97 @@
 
   // update rd_index and valid
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_RD
-      uop[i].rd_index         = 'b0;
-      uop[i].rd_index_valid   = 'b0;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_RD
+      rd_index[i]         = 'b0;
+      rd_index_valid[i]   = 'b0;
     end
   end
 
   // update rs1_data and rs1_data_valid 
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_RS1
-      uop[i].rs1_data         = 'b0;
-      uop[i].rs1_data_valid   = 'b0;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_RS1
+      rs1_data[i]         = 'b0;
+      rs1_data_valid[i]   = 'b0;
     end
   end
 
   // update uop index
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: ASSIGN_UOP_INDEX
-      uop[i].uop_index = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: ASSIGN_UOP_INDEX
+      uop_index[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
     end
   end
 
   // update last_uop valid
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_UOP_LAST
-      uop[i].first_uop_valid = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == 'b0;
-      uop[i].last_uop_valid = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_UOP_LAST
+      first_uop_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == 'b0;
+      last_uop_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
     end
   end
 
   // update segment_index valid
   always_comb begin
-    for(int i=0;i<`NUM_DE_UOP;i=i+1) begin: GET_SEG_INDEX
-      uop[i].seg_field_index = 'b0;
+    for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_SEG_INDEX
+      // initial 
+      seg_field_index[i] = 'b0;
 
-      if(uop_funct6.lsu_funct6.lsu_is_seg==IS_SEGMENT) begin
+      if(funct6_lsu.lsu_funct6.lsu_is_seg==IS_SEGMENT) begin
         case(inst_nf)
           NF2: begin
             case(emul_max_vd_vs2)
-              EMUL2: uop[i].seg_field_index = {1'b0,uop_index_current[i][0]};
-              EMUL4: uop[i].seg_field_index = uop_index_current[i][1:0];
+              EMUL2: seg_field_index[i] = {1'b0,uop_index_current[i][0]};
+              EMUL4: seg_field_index[i] = uop_index_current[i][1:0];
             endcase
           end
           NF3,
           NF4: begin
             if (emul_max_vd_vs2==EMUL2)
-              uop[i].seg_field_index = {1'b0,uop_index_current[i][0]};
+              seg_field_index[i] = {1'b0,uop_index_current[i][0]};
           end
         endcase
       end
     end
   end
 
+  // assign result to output
+  generate
+    for(j=0;j<`NUM_DE_UOP;j++) begin: ASSIGN_RES
+    `ifdef TB_SUPPORT
+      assign uop[j].uop_pc              = uop_pc[j];
+    `endif  
+      assign uop[j].uop_funct3          = uop_funct3[j];
+      assign uop[j].uop_funct6          = uop_funct6[j];
+      assign uop[j].uop_exe_unit        = uop_exe_unit[j]; 
+      assign uop[j].uop_class           = uop_class[j];   
+      assign uop[j].vector_csr          = vector_csr[j];  
+      assign uop[j].vs_evl              = vs_evl[j];            
+      assign uop[j].ignore_vma          = ignore_vma[j];
+      assign uop[j].ignore_vta          = ignore_vta[j];
+      assign uop[j].force_vma_agnostic  = force_vma_agnostic[j];
+      assign uop[j].force_vta_agnostic  = force_vta_agnostic[j];
+      assign uop[j].vm                  = vm[j];                
+      assign uop[j].v0_valid            = v0_valid[j];          
+      assign uop[j].vd_index            = vd_index[j];          
+      assign uop[j].vd_eew              = vd_eew[j];  
+      assign uop[j].vd_valid            = vd_valid[j];
+      assign uop[j].vs3_valid           = vs3_valid[j];         
+      assign uop[j].vs1                 = vs1[j];              
+      assign uop[j].vs1_eew             = vs1_eew[j];           
+      assign uop[j].vs1_index_valid     = vs1_index_valid[j];
+      assign uop[j].vs1_opcode_valid    = vs1_opcode_valid[j];
+      assign uop[j].vs2_index 	        = vs2_index[j]; 	       
+      assign uop[j].vs2_eew             = vs2_eew[j];
+      assign uop[j].vs2_valid           = vs2_valid[j];
+      assign uop[j].rd_index 	          = rd_index[j]; 	       
+      assign uop[j].rd_index_valid      = rd_index_valid[j]; 
+      assign uop[j].rs1_data            = rs1_data[j];           
+      assign uop[j].rs1_data_valid      = rs1_data_valid[j];    
+      assign uop[j].uop_index           = uop_index[j];         
+      assign uop[j].first_uop_valid     = first_uop_valid[j];   
+      assign uop[j].last_uop_valid      = last_uop_valid[j];    
+      assign uop[j].seg_field_index     = seg_field_index[j];   
+    end
+  endgenerate
+
 endmodule
diff --git a/hdl/verilog/rvv/design/rvv_backend_dispatch.sv b/hdl/verilog/rvv/design/rvv_backend_dispatch.sv
index f55baec..ed76dc5 100755
--- a/hdl/verilog/rvv/design/rvv_backend_dispatch.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_dispatch.sv
@@ -65,47 +65,47 @@
     input  logic           rst_n;

 

 // Uops Queue to Dispatch unit

-    input  logic        [`NUM_DP_UOP-1:0] uop_valid_uop2dp;

-    input  UOP_QUEUE_t  [`NUM_DP_UOP-1:0] uop_uop2dp;

-    output logic        [`NUM_DP_UOP-1:0] uop_ready_dp2uop;

+    input  logic        [`NUM_DP_UOP-1:0]         uop_valid_uop2dp;

+    input  UOP_QUEUE_t  [`NUM_DP_UOP-1:0]         uop_uop2dp;

+    output logic        [`NUM_DP_UOP-1:0]         uop_ready_dp2uop;

 

 // Dispatch unit sends oprations to reservation stations

 // Dispatch unit to ALU reservation station

 // rs_*: reservation station

-    output logic          [`NUM_DP_UOP-1:0]   rs_valid_dp2alu;

-    output ALU_RS_t       [`NUM_DP_UOP-1:0]   rs_dp2alu;

-    input  logic          [`NUM_DP_UOP-1:0]   rs_ready_alu2dp;

+    output logic          [`NUM_DP_UOP-1:0]       rs_valid_dp2alu;

+    output ALU_RS_t       [`NUM_DP_UOP-1:0]       rs_dp2alu;

+    input  logic          [`NUM_DP_UOP-1:0]       rs_ready_alu2dp;

 

 // Dispatch unit to PMT+RDT reservation station

-    output logic          [`NUM_DP_UOP-1:0]   rs_valid_dp2pmtrdt;

-    output PMT_RDT_RS_t   [`NUM_DP_UOP-1:0]   rs_dp2pmtrdt;

-    input  logic          [`NUM_DP_UOP-1:0]   rs_ready_pmtrdt2dp;

+    output logic          [`NUM_DP_UOP-1:0]       rs_valid_dp2pmtrdt;

+    output PMT_RDT_RS_t   [`NUM_DP_UOP-1:0]       rs_dp2pmtrdt;

+    input  logic          [`NUM_DP_UOP-1:0]       rs_ready_pmtrdt2dp;

 

 // Dispatch unit to MUL reservation station

-    output logic          [`NUM_DP_UOP-1:0]   rs_valid_dp2mul;

-    output MUL_RS_t       [`NUM_DP_UOP-1:0]   rs_dp2mul;

-    input  logic          [`NUM_DP_UOP-1:0]   rs_ready_mul2dp;

+    output logic          [`NUM_DP_UOP-1:0]       rs_valid_dp2mul;

+    output MUL_RS_t       [`NUM_DP_UOP-1:0]       rs_dp2mul;

+    input  logic          [`NUM_DP_UOP-1:0]       rs_ready_mul2dp;

 

 // Dispatch unit to DIV reservation station

-    output logic          [`NUM_DP_UOP-1:0]   rs_valid_dp2div;

-    output DIV_RS_t       [`NUM_DP_UOP-1:0]   rs_dp2div;

-    input  logic          [`NUM_DP_UOP-1:0]   rs_ready_div2dp;

+    output logic          [`NUM_DP_UOP-1:0]       rs_valid_dp2div;

+    output DIV_RS_t       [`NUM_DP_UOP-1:0]       rs_dp2div;

+    input  logic          [`NUM_DP_UOP-1:0]       rs_ready_div2dp;

 

 // Dispatch unit to LSU 

     // to LSU RS

-    output logic          [`NUM_DP_UOP-1:0]   rs_valid_dp2lsu;

-    output UOP_RVV2LSU_t  [`NUM_DP_UOP-1:0]   rs_dp2lsu;

-    input  logic          [`NUM_DP_UOP-1:0]   rs_ready_lsu2dp;

+    output logic          [`NUM_DP_UOP-1:0]       rs_valid_dp2lsu;

+    output UOP_RVV2LSU_t  [`NUM_DP_UOP-1:0]       rs_dp2lsu;

+    input  logic          [`NUM_DP_UOP-1:0]       rs_ready_lsu2dp;

     // to LSU MAP INFO

-    output logic          [`NUM_DP_UOP-1:0]   mapinfo_valid_dp2lsu;

-    output LSU_MAP_INFO_t [`NUM_DP_UOP-1:0]   mapinfo_dp2lsu;

-    input  logic          [`NUM_DP_UOP-1:0]   mapinfo_ready_lsu2dp;

+    output logic          [`NUM_DP_UOP-1:0]       mapinfo_valid_dp2lsu;

+    output LSU_MAP_INFO_t [`NUM_DP_UOP-1:0]       mapinfo_dp2lsu;

+    input  logic          [`NUM_DP_UOP-1:0]       mapinfo_ready_lsu2dp;

 

-// Dispatch unit pushes operations to ROB unit

-    output logic          [`NUM_DP_UOP-1:0]   uop_valid_dp2rob;

-    output DP2ROB_t       [`NUM_DP_UOP-1:0]   uop_dp2rob;

-    input  logic          [`NUM_DP_UOP-1:0]   uop_ready_rob2dp;

-    input  logic          [`ROB_DEPTH_WIDTH-1:0] uop_index_rob2dp;

+// Dispatch unit pushes operations to ROB unit    

+    output logic          [`NUM_DP_UOP-1:0]       uop_valid_dp2rob;

+    output DP2ROB_t       [`NUM_DP_UOP-1:0]       uop_dp2rob;

+    input  logic          [`NUM_DP_UOP-1:0]       uop_ready_rob2dp;

+    input  logic          [`ROB_DEPTH_WIDTH-1:0]  uop_index_rob2dp;

 

 // Dispatch unit sends read request to VRF for vector data.

 // Dispatch unit to VRF unit

@@ -116,32 +116,32 @@
 

 // Dispatch unit accept all ROB entry to determine if vs_data of RS is from ROB or not

 // ROB unit to Dispatch unit

-    input  ROB2DP_t [`ROB_DEPTH-1:0]      rob_entry;

+    input  ROB2DP_t     [`ROB_DEPTH-1:0]          rob_entry;

 

 // ---internal signal definition--------------------------------------

-    SUC_UOP_RAW_t [`NUM_DP_UOP-1:0]   suc_uop;

-    PRE_UOP_RAW_t [`ROB_DEPTH-1:0]    pre_uop_rob;

-    PRE_UOP_RAW_t [`NUM_DP_UOP-2:0]   pre_uop_uop;

-    RAW_UOP_ROB_t [`NUM_DP_UOP-1:0]   raw_uop_rob; 

+    SUC_UOP_RAW_t       [`NUM_DP_UOP-1:0]   suc_uop;

+    PRE_UOP_RAW_t       [`ROB_DEPTH-1:0]    pre_uop_rob;

+    PRE_UOP_RAW_t       [`NUM_DP_UOP-2:0]   pre_uop_uop;

+    RAW_UOP_ROB_t       [`NUM_DP_UOP-1:0]   raw_uop_rob; 

     // uop0 is the first uop so no need raw check between uops for it

-    RAW_UOP_UOP_t [`NUM_DP_UOP-1:1]   raw_uop_uop; 

+    RAW_UOP_UOP_t       [`NUM_DP_UOP-1:1]   raw_uop_uop; 

 

-    STRCT_UOP_t   [`NUM_DP_UOP-1:0]   strct_uop;

-    ARCH_HAZARD_t                     arch_hazard;

+    STRCT_UOP_t         [`NUM_DP_UOP-1:0]   strct_uop;

+    ARCH_HAZARD_t                           arch_hazard;

 

-    UOP_OPN_t     [`NUM_DP_UOP-1:0]   uop_operand;

-    UOP_OPN_t     [`NUM_DP_UOP-1:0]   vrf_byp;

-    ROB_BYP_t     [`ROB_DEPTH-1:0]    rob_byp;

+    UOP_OPN_t           [`NUM_DP_UOP-1:0]   uop_operand;

+    UOP_OPN_t           [`NUM_DP_UOP-1:0]   vrf_byp;

+    ROB_BYP_t           [`ROB_DEPTH-1:0]    rob_byp;

 

-    UOP_CTRL_t    [`NUM_DP_UOP-1:0]   uop_ctrl;

+    UOP_CTRL_t          [`NUM_DP_UOP-1:0]   uop_ctrl;

 

-    UOP_INFO_t    [`NUM_DP_UOP-1:0]   uop_info;

-    UOP_OPN_BYTE_TYPE_t [`NUM_DP_UOP-1:0] uop_operand_byte_type;

+    UOP_INFO_t          [`NUM_DP_UOP-1:0]   uop_info;

+    UOP_OPN_BYTE_TYPE_t [`NUM_DP_UOP-1:0]   uop_operand_byte_type;

 

-    logic         [`NUM_DP_UOP-1:0][`VL_WIDTH-1:0]             vlmax;

-    logic         [`NUM_DP_UOP-1:0][$clog2(`VSTART_WIDTH)-1:0] vlmax_shift;

+    logic [`NUM_DP_UOP-1:0][`VL_WIDTH-1:0]             vlmax;

+    logic [`NUM_DP_UOP-1:0][$clog2(`VSTART_WIDTH)-1:0] vlmax_shift;

 

-    logic         [`NUM_DP_UOP-1:0][`ROB_DEPTH_WIDTH-1:0]      rob_address;

+    logic [`NUM_DP_UOP-1:0][`ROB_DEPTH_WIDTH-1:0]      rob_address;

 

 // ---code start------------------------------------------------------

     genvar i;

@@ -246,7 +246,7 @@
         .rd_data_vrf2dp (rd_data_vrf2dp),

         .v0_mask_vrf2dp (v0_mask_vrf2dp)

       );

-      

+

       for (i=0;i<`NUM_DP_UOP;i++) begin: gen_bypass_data

         rvv_backend_dispatch_bypass 

         #(

@@ -297,7 +297,7 @@
 // determine the type for each byte in uop's vector operands 

     generate

         for (i=0; i<`NUM_DP_UOP; i++) begin : gen_opr_bype_type

-            assign uop_info[i].uop_index  = (uop_uop2dp[i].uop_exe_unit==LSU)&(uop_uop2dp[i].uop_funct6.lsu_funct6.lsu_is_seg==IS_SEGMENT)?

+            assign uop_info[i].uop_index  = (uop_uop2dp[i].uop_exe_unit==LSU)&(uop_uop2dp[i].uop_funct6.lsu_funct6.lsu_is_seg==IS_SEGMENT)? 

                                             {1'b0,uop_uop2dp[i].seg_field_index} : uop_uop2dp[i].uop_index;

             assign uop_info[i].uop_exe_unit = uop_uop2dp[i].uop_exe_unit;

             assign uop_info[i].vd_eew     = uop_uop2dp[i].vd_eew;

@@ -356,32 +356,32 @@
 

           // PMTRDT RS

 `ifdef TB_SUPPORT

-            assign rs_dp2pmtrdt[i].uop_pc        = uop_uop2dp[i].uop_pc; 

+            assign rs_dp2pmtrdt[i].uop_pc          = uop_uop2dp[i].uop_pc; 

 `endif

-            assign rs_dp2pmtrdt[i].rob_entry     = rob_address[i]; 

-            assign rs_dp2pmtrdt[i].uop_exe_unit  = uop_uop2dp[i].uop_exe_unit; 

-            assign rs_dp2pmtrdt[i].uop_funct6    = uop_uop2dp[i].uop_funct6;

-            assign rs_dp2pmtrdt[i].uop_funct3    = uop_uop2dp[i].uop_funct3;

-            assign rs_dp2pmtrdt[i].vstart        = uop_uop2dp[i].vector_csr.vstart;

-            assign rs_dp2pmtrdt[i].vl            = uop_uop2dp[i].vs_evl;

-            assign rs_dp2pmtrdt[i].vlmax         = vlmax[i];

-            assign rs_dp2pmtrdt[i].vm            = uop_uop2dp[i].vm;

-            assign rs_dp2pmtrdt[i].v0_data       = uop_operand[i].v0;

-            assign rs_dp2pmtrdt[i].v0_data_valid = uop_uop2dp[i].v0_valid;

-            assign rs_dp2pmtrdt[i].vs1_data      = uop_operand[i].vs1;

-            assign rs_dp2pmtrdt[i].vs1_eew       = uop_uop2dp[i].vs1_eew;

-            assign rs_dp2pmtrdt[i].vs1_data_valid= uop_uop2dp[i].vs1_index_valid;

-            assign rs_dp2pmtrdt[i].vs2_data      = uop_operand[i].vs2;

-            assign rs_dp2pmtrdt[i].vs2_eew       = uop_uop2dp[i].vs2_eew;

-            assign rs_dp2pmtrdt[i].vs2_type      = uop_operand_byte_type[i].vs2;

-            assign rs_dp2pmtrdt[i].vs2_data_valid= uop_uop2dp[i].vs2_valid;

-            assign rs_dp2pmtrdt[i].vs3_data      = uop_operand[i].vd;

-            assign rs_dp2pmtrdt[i].vs3_data_valid= uop_uop2dp[i].vs3_valid;

-            assign rs_dp2pmtrdt[i].rs1_data      = uop_uop2dp[i].rs1_data;

-            assign rs_dp2pmtrdt[i].rs1_data_valid= uop_uop2dp[i].rs1_data_valid;

+            assign rs_dp2pmtrdt[i].rob_entry       = rob_address[i]; 

+            assign rs_dp2pmtrdt[i].uop_exe_unit    = uop_uop2dp[i].uop_exe_unit; 

+            assign rs_dp2pmtrdt[i].uop_funct6      = uop_uop2dp[i].uop_funct6;

+            assign rs_dp2pmtrdt[i].uop_funct3      = uop_uop2dp[i].uop_funct3;

+            assign rs_dp2pmtrdt[i].vstart          = uop_uop2dp[i].vector_csr.vstart;

+            assign rs_dp2pmtrdt[i].vl              = uop_uop2dp[i].vs_evl;

+            assign rs_dp2pmtrdt[i].vlmax           = vlmax[i];

+            assign rs_dp2pmtrdt[i].vm              = uop_uop2dp[i].vm;

+            assign rs_dp2pmtrdt[i].v0_data         = uop_operand[i].v0;

+            assign rs_dp2pmtrdt[i].v0_data_valid   = uop_uop2dp[i].v0_valid;

+            assign rs_dp2pmtrdt[i].vs1_data        = uop_operand[i].vs1;

+            assign rs_dp2pmtrdt[i].vs1_eew         = uop_uop2dp[i].vs1_eew;

+            assign rs_dp2pmtrdt[i].vs1_data_valid  = uop_uop2dp[i].vs1_index_valid;

+            assign rs_dp2pmtrdt[i].vs2_data        = uop_operand[i].vs2;

+            assign rs_dp2pmtrdt[i].vs2_eew         = uop_uop2dp[i].vs2_eew;

+            assign rs_dp2pmtrdt[i].vs2_type        = uop_operand_byte_type[i].vs2;

+            assign rs_dp2pmtrdt[i].vs2_data_valid  = uop_uop2dp[i].vs2_valid;

+            assign rs_dp2pmtrdt[i].vs3_data        = uop_operand[i].vd;

+            assign rs_dp2pmtrdt[i].vs3_data_valid  = uop_uop2dp[i].vs3_valid;

+            assign rs_dp2pmtrdt[i].rs1_data        = uop_uop2dp[i].rs1_data;

+            assign rs_dp2pmtrdt[i].rs1_data_valid  = uop_uop2dp[i].rs1_data_valid;

             assign rs_dp2pmtrdt[i].first_uop_valid = uop_uop2dp[i].first_uop_valid;

             assign rs_dp2pmtrdt[i].last_uop_valid  = uop_uop2dp[i].last_uop_valid;

-            assign rs_dp2pmtrdt[i].uop_index     = uop_uop2dp[i].uop_index;

+            assign rs_dp2pmtrdt[i].uop_index       = uop_uop2dp[i].uop_index;

             

           // MUL/MAC RS

 `ifdef TB_SUPPORT

@@ -419,34 +419,34 @@
 

           // LSU RS

 `ifdef TB_SUPPORT

-            assign rs_dp2lsu[i].uop_pc        = uop_uop2dp[i].uop_pc; 

+            assign rs_dp2lsu[i].uop_pc              = uop_uop2dp[i].uop_pc; 

 `endif

-            assign rs_dp2lsu[i].vidx_valid    = uop_uop2dp[i].vs2_valid;

-            assign rs_dp2lsu[i].vidx_addr     = uop_uop2dp[i].vs2_index;

-            assign rs_dp2lsu[i].vidx_data     = uop_operand[i].vs2;

+            assign rs_dp2lsu[i].vidx_valid          = uop_uop2dp[i].vs2_valid;

+            assign rs_dp2lsu[i].vidx_addr           = uop_uop2dp[i].vs2_index;

+            assign rs_dp2lsu[i].vidx_data           = uop_operand[i].vs2;

             assign rs_dp2lsu[i].vregfile_read_valid = uop_uop2dp[i].vs3_valid;

             assign rs_dp2lsu[i].vregfile_read_addr  = uop_uop2dp[i].vd_index;

             assign rs_dp2lsu[i].vregfile_read_data  = uop_operand[i].vd;

-            assign rs_dp2lsu[i].v0_valid      = uop_uop2dp[i].v0_valid;

-            assign rs_dp2lsu[i].v0_data       = uop_operand_byte_type[i].v0_strobe;

+            assign rs_dp2lsu[i].v0_valid            = uop_uop2dp[i].v0_valid;

+            assign rs_dp2lsu[i].v0_data             = uop_operand_byte_type[i].v0_strobe;

 

           // LSU MAP INFO

 `ifdef TB_SUPPORT

-            assign mapinfo_dp2lsu[i].uop_pc     = uop_uop2dp[i].uop_pc; 

+            assign mapinfo_dp2lsu[i].uop_pc              = uop_uop2dp[i].uop_pc; 

 `endif

-            assign mapinfo_dp2lsu[i].valid      = mapinfo_valid_dp2lsu[i];

-            assign mapinfo_dp2lsu[i].rob_entry  = rob_address[i];

-            assign mapinfo_dp2lsu[i].lsu_class  = uop_uop2dp[i].uop_funct6.lsu_funct6.lsu_is_store;

+            assign mapinfo_dp2lsu[i].valid               = mapinfo_valid_dp2lsu[i];

+            assign mapinfo_dp2lsu[i].rob_entry           = rob_address[i];

+            assign mapinfo_dp2lsu[i].lsu_class           = uop_uop2dp[i].uop_funct6.lsu_funct6.lsu_is_store;

             assign mapinfo_dp2lsu[i].vregfile_write_addr = uop_uop2dp[i].vd_index;

 

           // ROB

 `ifdef TB_SUPPORT

-            assign uop_dp2rob[i].uop_pc       = uop_uop2dp[i].uop_pc; 

+            assign uop_dp2rob[i].uop_pc         = uop_uop2dp[i].uop_pc; 

 `endif

-            assign uop_dp2rob[i].w_index      = uop_uop2dp[i].rd_index_valid ? uop_uop2dp[i].rd_index : uop_uop2dp[i].vd_index;

-            assign uop_dp2rob[i].w_type       = uop_uop2dp[i].rd_index_valid ? XRF : VRF;

-            assign uop_dp2rob[i].byte_type    = uop_operand_byte_type[i].vd;

-            assign uop_dp2rob[i].vector_csr   = uop_uop2dp[i].vector_csr;

+            assign uop_dp2rob[i].w_index        = uop_uop2dp[i].rd_index_valid ? uop_uop2dp[i].rd_index : uop_uop2dp[i].vd_index;

+            assign uop_dp2rob[i].w_type         = uop_uop2dp[i].rd_index_valid ? XRF : VRF;

+            assign uop_dp2rob[i].byte_type      = uop_operand_byte_type[i].vd;

+            assign uop_dp2rob[i].vector_csr     = uop_uop2dp[i].vector_csr;

             assign uop_dp2rob[i].last_uop_valid = uop_uop2dp[i].last_uop_valid;

         end

     endgenerate

diff --git a/hdl/verilog/rvv/design/rvv_backend_dispatch_bypass.sv b/hdl/verilog/rvv/design/rvv_backend_dispatch_bypass.sv
index 8ca8d6f..aab81d6 100644
--- a/hdl/verilog/rvv/design/rvv_backend_dispatch_bypass.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_dispatch_bypass.sv
@@ -73,8 +73,7 @@
                     vs1_sel[0][j]: uop_operand.vs1[8*j+:8] = agnostic[0][j] ? 8'hFF : rob_byp[0].w_data[8*j+:8];
                     default:       uop_operand.vs1[8*j+:8] = vrf_byp.vs1[8*j+:8];
                 endcase
-            end
-            always_comb begin
+
                 priority case (1'b1)
                     vs2_sel[7][j]: uop_operand.vs2[8*j+:8] = agnostic[7][j] ? 8'hFF : rob_byp[7].w_data[8*j+:8];
                     vs2_sel[6][j]: uop_operand.vs2[8*j+:8] = agnostic[6][j] ? 8'hFF : rob_byp[6].w_data[8*j+:8];
@@ -86,8 +85,7 @@
                     vs2_sel[0][j]: uop_operand.vs2[8*j+:8] = agnostic[0][j] ? 8'hFF : rob_byp[0].w_data[8*j+:8];
                     default:       uop_operand.vs2[8*j+:8] = vrf_byp.vs2[8*j+:8];
                 endcase
-            end
-            always_comb begin
+
                 priority case (1'b1)
                     vd_sel[7][j]:  uop_operand.vd[8*j+:8]  = agnostic[7][j] ? 8'hFF : rob_byp[7].w_data[8*j+:8];
                     vd_sel[6][j]:  uop_operand.vd[8*j+:8]  = agnostic[6][j] ? 8'hFF : rob_byp[6].w_data[8*j+:8];
@@ -99,8 +97,7 @@
                     vd_sel[0][j]:  uop_operand.vd[8*j+:8]  = agnostic[0][j] ? 8'hFF : rob_byp[0].w_data[8*j+:8];
                     default:       uop_operand.vd[8*j+:8]  = vrf_byp.vd[8*j+:8];
                 endcase
-            end
-            always_comb begin
+
                 priority case (1'b1)
                     v0_sel[7][j]:  uop_operand.v0[8*j+:8]  = agnostic[7][j] ? 8'hFF : rob_byp[7].w_data[8*j+:8];
                     v0_sel[6][j]:  uop_operand.v0[8*j+:8]  = agnostic[6][j] ? 8'hFF : rob_byp[6].w_data[8*j+:8];
diff --git a/hdl/verilog/rvv/design/rvv_backend_dispatch_opr_byte_type.sv b/hdl/verilog/rvv/design/rvv_backend_dispatch_opr_byte_type.sv
index 8937387..770243a 100644
--- a/hdl/verilog/rvv/design/rvv_backend_dispatch_opr_byte_type.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_dispatch_opr_byte_type.sv
@@ -38,6 +38,11 @@
     logic  [`VSTART_WIDTH-1:0]          uop_vd_end;
     logic  [`VLENB-1:0][`VL_WIDTH-1:0]  vd_ele_index; // element index
     logic  [`VLENB-1:0]                 vd_enable, vd_enable_tmp;
+    
+    // result
+    BYTE_TYPE_t                         vs2;
+    BYTE_TYPE_t                         vd;
+    logic [`VLENB-1:0]                  v0_strobe;
 
 // ---code start------------------------------------------------------
     // find eew_max and shift amount
@@ -116,13 +121,13 @@
             assign vs2_ele_index[i] = uop_vs2_start + (i >> vs2_eew_shift);
             always_comb begin
                 if (uop_info.ignore_vta&uop_info.ignore_vma)
-                    operand_byte_type.vs2[i] = BODY_ACTIVE;       
+                    vs2[i] = BODY_ACTIVE;       
                 else if (vs2_ele_index[i] >= uop_info.vl) 
-                    operand_byte_type.vs2[i] = TAIL; 
+                    vs2[i] = TAIL; 
                 else if (vs2_ele_index[i] < {1'b0, uop_info.vstart}) 
-                    operand_byte_type.vs2[i] = NOT_CHANGE; // prestart
+                    vs2[i] = NOT_CHANGE; // prestart
                 else begin 
-                    operand_byte_type.vs2[i] = (vs2_enable[i] || uop_info.ignore_vma) ? BODY_ACTIVE
+                    vs2[i] = (vs2_enable[i] || uop_info.ignore_vma) ? BODY_ACTIVE
                                                                                      : BODY_INACTIVE;
                 end
             end
@@ -193,29 +198,29 @@
             assign vd_ele_index[0] = uop_v0_start;
 
             always_comb begin
-              operand_byte_type.v0_strobe[0] = 'b0;
+              v0_strobe[0] = 'b0;
 
               case (uop_info.uop_exe_unit)
                 RDT:begin
                   case(uop_info.vd_eew)
-                    EEW32:operand_byte_type.vd[0]   = BODY_ACTIVE;
-                    EEW16:operand_byte_type.vd[0]   = BODY_ACTIVE;
-                    default:operand_byte_type.vd[0] = BODY_ACTIVE;
+                    EEW32:vd[0]   = BODY_ACTIVE;
+                    EEW16:vd[0]   = BODY_ACTIVE;
+                    default:vd[0] = BODY_ACTIVE;
                   endcase
                 end
                 default:begin
                   if (uop_info.ignore_vta&uop_info.ignore_vma)
-                      operand_byte_type.vd[0] = BODY_ACTIVE;       
+                      vd[0] = BODY_ACTIVE;       
                   else if (vd_ele_index[0] >= uop_info.vl) 
-                      operand_byte_type.vd[0] = TAIL;       
+                      vd[0] = TAIL;       
                   else if ((vd_ele_index[0] < {1'b0, uop_info.vstart})&(uop_info.vstart>=uop_vd_start)) 
-                      operand_byte_type.vd[0] = NOT_CHANGE;     // prestart
+                      vd[0] = NOT_CHANGE;     // prestart
                   else if (vd_ele_index[0] < {1'b0, uop_vd_start}) // &(uop_info.vstart<uop_vd_start)
-                      operand_byte_type.vd[0] = NOT_CHANGE;     // prestart
+                      vd[0] = NOT_CHANGE;     // prestart
                   else begin 
-                      operand_byte_type.vd[0] = (vd_enable[0] || uop_info.ignore_vma) ? BODY_ACTIVE
+                      vd[0] = (vd_enable[0] || uop_info.ignore_vma) ? BODY_ACTIVE
                                                                                       : BODY_INACTIVE;
-                      operand_byte_type.v0_strobe[0] = vd_enable[0] || uop_info.ignore_vma;
+                      v0_strobe[0] = vd_enable[0] || uop_info.ignore_vma;
                   end
                 end
               endcase
@@ -226,31 +231,31 @@
             assign vd_ele_index[i] = uop_v0_start + (i >> vd_eew_shift);
 
             always_comb begin
-              operand_byte_type.v0_strobe[i] = 'b0;
+              v0_strobe[i] = 'b0;
 
               case (uop_info.uop_exe_unit)
                 RDT:begin
                   case(uop_info.vd_eew)
-                    EEW32:operand_byte_type.vd[i] = i<4 ? BODY_ACTIVE : TAIL;
-                    EEW16:operand_byte_type.vd[i] = i<2 ? BODY_ACTIVE : TAIL;
-                    default:operand_byte_type.vd[i] = i<1 ? BODY_ACTIVE : TAIL;
+                    EEW32:vd[i] = i<4 ? BODY_ACTIVE : TAIL;
+                    EEW16:vd[i] = i<2 ? BODY_ACTIVE : TAIL;
+                    default:vd[i] = i<1 ? BODY_ACTIVE : TAIL;
                   endcase
                 end
                 default:begin
                   if (uop_info.ignore_vta&uop_info.ignore_vma)
-                      operand_byte_type.vd[i] = BODY_ACTIVE;       
+                      vd[i] = BODY_ACTIVE;       
                   else if (vd_ele_index[i] >= uop_info.vl) 
-                      operand_byte_type.vd[i] = TAIL;       
+                      vd[i] = TAIL;       
                   else if ((vd_ele_index[i] < {1'b0, uop_info.vstart})&(uop_info.vstart>=uop_vd_start)) 
-                      operand_byte_type.vd[i] = NOT_CHANGE;     // prestart
+                      vd[i] = NOT_CHANGE;     // prestart
                   else if (vd_ele_index[i] < {1'b0, uop_vd_start}) // &(uop_info.vstart<uop_vd_start)
-                      operand_byte_type.vd[i] = NOT_CHANGE;     // prestart
+                      vd[i] = NOT_CHANGE;     // prestart
                   else if (vd_ele_index[i] > {1'b0, uop_vd_end}) 
-                      operand_byte_type.vd[i] = BODY_INACTIVE;
+                      vd[i] = BODY_INACTIVE;
                   else begin 
-                      operand_byte_type.vd[i] = (vd_enable[i] || uop_info.ignore_vma) ? BODY_ACTIVE
+                      vd[i] = (vd_enable[i] || uop_info.ignore_vma) ? BODY_ACTIVE
                                                                                       : BODY_INACTIVE;
-                      operand_byte_type.v0_strobe[i] = vd_enable[i] || uop_info.ignore_vma;
+                      v0_strobe[i] = vd_enable[i] || uop_info.ignore_vma;
                   end
                 end
               endcase
@@ -259,4 +264,8 @@
         end
     endgenerate
 
+    assign operand_byte_type.vs2       = vs2;
+    assign operand_byte_type.vd        = vd;
+    assign operand_byte_type.v0_strobe = v0_strobe;
+
 endmodule
diff --git a/hdl/verilog/rvv/design/rvv_backend_lsu_remap.sv b/hdl/verilog/rvv/design/rvv_backend_lsu_remap.sv
index 6f1321e..ff0a87f 100644
--- a/hdl/verilog/rvv/design/rvv_backend_lsu_remap.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_lsu_remap.sv
@@ -81,13 +81,13 @@
   generate
     for(i=0;i<`NUM_LSU;i++) begin: GET_RESULT
       `ifdef TB_SUPPORT
-        assign result_lsu2rob[i].uop_pc = mapinfo[i].uop_pc;
+        assign result_lsu2rob[i].uop_pc    = mapinfo[i].uop_pc;
       `endif
         assign result_lsu2rob[i].rob_entry = mapinfo[i].rob_entry;
-        assign result_lsu2rob[i].w_data = lsu_res[i].uop_lsu2rvv.vregfile_write_data;
-        assign result_lsu2rob[i].w_valid = (mapinfo[i].lsu_class==IS_LOAD)&
-                                            lsu_res[i].uop_lsu2rvv.vregfile_write_valid&
-                                           (lsu_res[i].uop_lsu2rvv.vregfile_write_addr==mapinfo[i].vregfile_write_addr);
+        assign result_lsu2rob[i].w_data    = lsu_res[i].uop_lsu2rvv.vregfile_write_data;
+        assign result_lsu2rob[i].w_valid   = (mapinfo[i].lsu_class==IS_LOAD)&
+                                             lsu_res[i].uop_lsu2rvv.vregfile_write_valid&
+                                             (lsu_res[i].uop_lsu2rvv.vregfile_write_addr==mapinfo[i].vregfile_write_addr);
         assign result_lsu2rob[i].vsaturate = 'b0;
     end
   endgenerate
@@ -98,7 +98,7 @@
 
     for (int j=0;j<`NUM_LSU;j++) begin
       if (lsu_res[j].trap_valid&lsu_res_valid[j]&mapinfo_valid[j]) begin
-        trap_valid_rmp2rob = 'b1;
+        trap_valid_rmp2rob     = 'b1;
         trap_rob_entry_rmp2rob = mapinfo[j].rob_entry;
       end
     end
diff --git a/hdl/verilog/rvv/design/rvv_backend_pmtrdt_unit.sv b/hdl/verilog/rvv/design/rvv_backend_pmtrdt_unit.sv
index 50664d5..862e469 100644
--- a/hdl/verilog/rvv/design/rvv_backend_pmtrdt_unit.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_pmtrdt_unit.sv
@@ -2602,23 +2602,29 @@
       default: pmtrdt_res_valid = rdt_ctrl_q.last_uop_valid;
     endcase
   end
-`ifdef TB_SUPPORT
+
   always_comb begin
+    `ifdef TB_SUPPORT
+    // uop_pc
     case (uop_type_q)
       PERMUTATION: pmtrdt_res.uop_pc = rdt_ctrl_q.compress ? compress_ctrl_ex1.uop_pc : pmt_ctrl_q.uop_pc;
-      default: pmtrdt_res.uop_pc = rdt_ctrl_q.uop_pc; 
+      default:     pmtrdt_res.uop_pc = rdt_ctrl_q.uop_pc; 
     endcase
-  end
-`endif
-  always_comb begin
+    `endif
+    
+    // rob_entry
     case (uop_type_q)
       PERMUTATION:pmtrdt_res.rob_entry = rdt_ctrl_q.compress ? compress_ctrl_ex1.rob_entry : pmt_ctrl_q.rob_entry;
-      default:pmtrdt_res.rob_entry = rdt_ctrl_q.rob_entry;
+      default:    pmtrdt_res.rob_entry = rdt_ctrl_q.rob_entry;
     endcase
-  end
-  assign pmtrdt_res.w_valid = 1'b1;
-  assign pmtrdt_res.vsaturate = '0;
-  always_comb begin
+
+    // write valid
+    pmtrdt_res.w_valid = 1'b1;
+
+    // saturate
+    pmtrdt_res.vsaturate = '0;
+
+    // data
     case (uop_type_q)
       PERMUTATION: pmtrdt_res.w_data = rdt_ctrl_q.compress ? pmtrdt_res_compress : pmtrdt_res_pmt;
       REDUCTION:   pmtrdt_res.w_data = pmtrdt_res_red;