1. Complete a mask instruction(vmandn) in alu. 2. Add ignore_vta_vma signal and some signals into ALU_RS_t and ROB_t

Change-Id: Id02b98f827f882661d29a59ce246b14a2219233c
diff --git a/hdl/verilog/rvv/design/rvv.svh b/hdl/verilog/rvv/design/rvv.svh
index 261c3c6..3317e87 100755
--- a/hdl/verilog/rvv/design/rvv.svh
+++ b/hdl/verilog/rvv/design/rvv.svh
@@ -1,8 +1,8 @@
 `include "rvv_define.svh"
 
-/*
-IF stage, RVS to Command Queue
-*/
+//
+// IF stage, RVS to Command Queue
+//
 typedef struct packed {
     logic   [`VTYPE_VILL-1:0]       vill,       // 0:not illegal, 1:illegal
     logic   [`VTYPE_VMA-1:0]        vma,        // 0:inactive element undisturbed, 1:inactive element agnostic
@@ -27,12 +27,12 @@
     logic   [`PC_WIDTH-1:0]         insts_pc,
     logic   [`INST_WIDTH-1:0]       insts, 	
     VECTOR_CSR_t                    vector_csr,
-    logic   [`XLEN-1:0] 	        rs1_data
+    logic   [`XLEN-1:0] 	          rs1_data
 } INST_t; 
 
-/*
-ID stage, Uops Queue to Dispatch unit
-*/
+//
+// ID stage, Uops Queue to Dispatch unit
+//
 // It is used to distinguish which execute units that VVV/VVX/VX uop is dispatch to, based on inst_encoding[6:0]
 typedef enum logic [2:0] {
     ALU,
@@ -57,129 +57,129 @@
 
 // when EXE_UNIT_e is not LSU, it identifys what instruction, vadd or vmacc or ..? based on inst_encoding[31:26]
 typedef enum logic [5:0] {
-    vadd            =   6'b000_000,
-    vsub            =   6'b000_010,
-    vrsub           =   6'b000_011,
-    vminu           =   6'b000_100,
-    vmin            =   6'b000_101,
-    vmaxu           =   6'b000_110,
-    vmaxu           =   6'b000_111,
-    vand            =   6'b001_001,
-    vor             =   6'b001_010,
-    vxor            =   6'b001_011,
-    vrgather        =   6'b001_100,
-    vslideup        =   6'b001_110,
-    vrgatherei16    =   6'b001_110,
-    vslidedown      =   6'b001_111,
-    vadc            =   6'b010_000,
-    vmadc           =   6'b010_001,
-    vsbc            =   6'b010_010,
-    vmsbc           =   6'b010_011,
-    vmerge_vmv      =   6'b010_111,     // it could be vmerge or vmv, based on vm field
-    vmseq           =   6'b011_000,
-    vmsne           =   6'b011_001,
-    vmsltu          =   6'b011_010,
-    vmslt           =   6'b011_011,
-    vmsleu          =   6'b011_100,
-    vmsle           =   6'b011_101,
-    vmsgtu          =   6'b011_110,
-    vmsgt           =   6'b011_111,
-    vsaddu          =   6'b100_000,
-    vsadd           =   6'b100_001,
-    vssubu          =   6'b100_010,
-    vssub           =   6'b100_011,
-    vsll            =   6'b100_101,
-    vsmul_vmvnrr    =   6'b100_111,     // it could be vsmul or vmv<nr>r, based on vm field
-    vsrl            =   6'b101_000,
-    vsra            =   6'b101_001,
-    vssrl           =   6'b101_010,
-    vssra           =   6'b101_011,
-    vnsrl           =   6'b101_100,
-    vnsra           =   6'b101_101,
-    vnclipu         =   6'b101_110,
-    vnclip          =   6'b101_111,
-    vwredsumu       =   6'b110_000,
-    vwredsum        =   6'b110_001   
+    VADD            =   6'b000_000,
+    VSUB            =   6'b000_010,
+    VRSUB           =   6'b000_011,
+    VMINU           =   6'b000_100,
+    VMIN            =   6'b000_101,
+    VMAXU           =   6'b000_110,
+    VMAXU           =   6'b000_111,
+    VAND            =   6'b001_001,
+    VOR             =   6'b001_010,
+    VXOR            =   6'b001_011,
+    VRGATHER        =   6'b001_100,
+    VSLIDEUP        =   6'b001_110,
+    VRGATHEREI16    =   6'b001_110,
+    VSLIDEDOWN      =   6'b001_111,
+    VADC            =   6'b010_000,
+    VMADC           =   6'b010_001,
+    VSBC            =   6'b010_010,
+    VMSBC           =   6'b010_011,
+    VMERGE_VMV      =   6'b010_111,     // it could be vmerge or vmv, based on vm field
+    VMSEQ           =   6'b011_000,
+    VMSNE           =   6'b011_001,
+    VMSLTU          =   6'b011_010,
+    VMSLT           =   6'b011_011,
+    VMSLEU          =   6'b011_100,
+    VMSLE           =   6'b011_101,
+    VMSGTU          =   6'b011_110,
+    VMSGT           =   6'b011_111,
+    VSADDU          =   6'b100_000,
+    VSADD           =   6'b100_001,
+    VSSUBU          =   6'b100_010,
+    VSSUB           =   6'b100_011,
+    VSLL            =   6'b100_101,
+    VSMUL_VMVNRR    =   6'b100_111,     // it could be vsmul or vmv<nr>r, based on vm field
+    VSRL            =   6'b101_000,
+    VSRA            =   6'b101_001,
+    VSSRL           =   6'b101_010,
+    VSSRA           =   6'b101_011,
+    VNSRL           =   6'b101_100,
+    VNSRA           =   6'b101_101,
+    VNCLIPU         =   6'b101_110,
+    VNCLIP          =   6'b101_111,
+    VWREDSUMU       =   6'b110_000,
+    VWREDSUM        =   6'b110_001   
 } OPI_TYPE_e;
 
 typedef enum logic [5:0] {
-    vredsum         =   6'b000_000,
-    vredand         =   6'b000_001,
-    vredor          =   6'b000_010,
-    vredxor         =   6'b000_011,
-    vredminu        =   6'b000_100,
-    vredmin         =   6'b000_101,
-    vredmaxu        =   6'b000_110,
-    vredmax         =   6'b000_111,
-    vaaddu          =   6'b001_000,
-    vaadd           =   6'b001_001,
-    vasubu          =   6'b001_010,
-    vasub           =   6'b001_011,
-    vslide1up       =   6'b001_110,
-    vslide1down     =   6'b001_111,
-    vwxunary0       =   6'b010_000,     // it could be vcpop.m, vfirst.m and vmv. They can be distinguished by vs1 field(inst_encoding[19:15]).
-    vxunary0        =   6'b010_010,     // it could be vzext.vf2, vzext.vf4, vsext.vf2, vsext.vf4. They can be distinguished by vs1 field(inst_encoding[19:15]).
-    vmunary0        =   6'b010_100,     // it could be vmsbf, vmsof, vmsif, viota, vid. They can be distinguished by vs1 field(inst_encoding[19:15]).
-    vcompress       =   6'b010_111,
-    vmandn          =   6'b011_000,
-    vmand           =   6'b011_001,
-    vmor            =   6'b011_010,
-    vmxor           =   6'b011_011,
-    vmorn           =   6'b011_100,
-    vmnand          =   6'b011_101,
-    vmnor           =   6'b011_110,
-    vmxnor          =   6'b011_111,
-    vdivu           =   6'b100_000,
-    vdiv            =   6'b100_001,
-    vremu           =   6'b100_010,
-    vrem            =   6'b100_011,
-    vmulhu          =   6'b100_100,
-    vmul            =   6'b100_101,
-    vmulhsu         =   6'b100_110,
-    vmulh           =   6'b100_111,
-    vmadd           =   6'b101_001,
-    vnmsub          =   6'b101_011,
-    vmacc           =   6'b101_101,
-    vnmsac          =   6'b101_111,
-    vwaddu          =   6'b110_000,
-    vwadd           =   6'b110_001,
-    vwsubu          =   6'b110_010,
-    vwsub           =   6'b110_011,
-    vwaddu          =   6'b110_100,
-    vwadd           =   6'b110_101,
-    vwsubu          =   6'b110_110,
-    vwsub           =   6'b110_111,
-    vwmulu          =   6'b111_000,
-    vwmulsu         =   6'b111_010,
-    vwmul           =   6'b111_011,
-    vwmaccu         =   6'b111_100,
-    vwmacc          =   6'b111_101,
-    vwmaccus        =   6'b111_110,
-    vwmaccsu        =   6'b111_111      
+    VREDSUM         =   6'b000_000,
+    VREDAND         =   6'b000_001,
+    VREDOR          =   6'b000_010,
+    VREDXOR         =   6'b000_011,
+    VREDMINU        =   6'b000_100,
+    VREDMIN         =   6'b000_101,
+    VREDMAXU        =   6'b000_110,
+    VREDMAX         =   6'b000_111,
+    VAADDU          =   6'b001_000,
+    VAADD           =   6'b001_001,
+    VASUBU          =   6'b001_010,
+    VASUB           =   6'b001_011,
+    VSLIDE1UP       =   6'b001_110,
+    VSLIDE1DOWN     =   6'b001_111,
+    VWXUNARY0       =   6'b010_000,     // it could be vcpop.m, vfirst.m and vmv. They can be distinguished by vs1 field(inst_encoding[19:15]).
+    VXUNARY0        =   6'b010_010,     // it could be vzext.vf2, vzext.vf4, vsext.vf2, vsext.vf4. They can be distinguished by vs1 field(inst_encoding[19:15]).
+    VMUNARY0        =   6'b010_100,     // it could be vmsbf, vmsof, vmsif, viota, vid. They can be distinguished by vs1 field(inst_encoding[19:15]).
+    VCOMPRESS       =   6'b010_111,
+    VMANDN          =   6'b011_000,
+    VMAND           =   6'b011_001,
+    VMOR            =   6'b011_010,
+    VMXOR           =   6'b011_011,
+    VMORN           =   6'b011_100,
+    VMNAND          =   6'b011_101,
+    VMNOR           =   6'b011_110,
+    VMXNOR          =   6'b011_111,
+    VDIVU           =   6'b100_000,
+    VDIV            =   6'b100_001,
+    VREMU           =   6'b100_010,
+    VREM            =   6'b100_011,
+    VMULHU          =   6'b100_100,
+    VMUL            =   6'b100_101,
+    VMULHSU         =   6'b100_110,
+    VMULH           =   6'b100_111,
+    VMADD           =   6'b101_001,
+    VNMSUB          =   6'b101_011,
+    VMACC           =   6'b101_101,
+    VNMSAC          =   6'b101_111,
+    VWADDU          =   6'b110_000,
+    VWADD           =   6'b110_001,
+    VWSUBU          =   6'b110_010,
+    VWSUB           =   6'b110_011,
+    VWADDU          =   6'b110_100,
+    VWADD           =   6'b110_101,
+    VWSUBU          =   6'b110_110,
+    VWSUB           =   6'b110_111,
+    VWMULU          =   6'b111_000,
+    VWMULSU         =   6'b111_010,
+    VWMUL           =   6'b111_011,
+    VWMACCU         =   6'b111_100,
+    VWMACC          =   6'b111_101,
+    VWMACCUS        =   6'b111_110,
+    VWMACCSU        =   6'b111_111  
 } OPM_TYPE_e;
 
 // when OPM_TYPE_e=vwxunary0, the uop could be vcpop.m, vfirst.m and vmv. They can be distinguished by vs1 field(inst_encoding[19:15]).
 typedef enum logic [4:0] {
-    vmv_x_s         =   5'b00000,
-    vcpop           =   5'b10000,
-    vfirst          =   5'b10001
+    VMV_X_S         =   5'b00000,
+    VCPOP           =   5'b10000,
+    VFIRST          =   5'b10001
 } OPM_VWXUNARY0_e;
 
 // when OPM_TYPE_e=vxunary0, the uop could be vzext.vf2, vzext.vf4, vsext.vf2, vsext.vf4. They can be distinguished by vs1 field(inst_encoding[19:15]).
 typedef enum logic [4:0] {
-    vzext_vf4       =   5'b00100,
-    vsext_vf4       =   5'b00101,
-    vzext_vf2       =   5'b00110,
-    vsext_vf2       =   5'b00111
+    VZEXT_VF4       =   5'b00100,
+    VSEXT_VF4       =   5'b00101,
+    VZEXT_VF2       =   5'b00110,
+    VSEXT_VF2       =   5'b00111
 } OPM_VXUNARY0_e;
 
 // when OPM_TYPE_e=vmxunary0, the uop could be vmsbf, vmsof, vmsif, viota, vid. They can be distinguished by vs1 field(inst_encoding[19:15]).
 typedef enum logic [4:0] {
-    vmsbf           =   5'b00001,
-    vmsof           =   5'b00010,
-    vmsif           =   5'b00011,
-    viota           =   5'b10000,
-    vid             =   5'b10001
+    VMSBF           =   5'b00001,
+    VMSOF           =   5'b00010,
+    VMSIF           =   5'b00011,
+    VIOTA           =   5'b10000,
+    VID             =   5'b10001
 } OPM_VMXUNARY0_e;
 
 // when EXE_UNIT_e is LSU, it identifys what LSU instruction, unit-stride load or indexed store or ..? based on inst_encoding[31:26]
@@ -237,6 +237,7 @@
 
 // Effective Element Width
 typedef enum logic [1:0] {
+    EEW1,
     EEW8, 
     EEW16,
     EEW32
@@ -263,16 +264,16 @@
     logic                               vs2_valid,
     logic   [`REGFILE_INDEX_WIDTH-1:0]  rd_index, 	        // Original 32bit instruction encoding: insts[11:7].
     logic                               rd_index_valid, 
-    logic   [`XLEN-1:0] 	            rs1_data,           // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend or zero-extend(shift instructions...) to XLEN-bit. 
-    logic        	                    rs1_data_valid,                                
+    logic   [`XLEN-1:0] 	              rs1_data,           // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend or zero-extend(shift instructions...) to XLEN-bit. 
+    logic        	                      rs1_data_valid,                                
             
     logic   [`UOP_INDEX_WIDTH-1:0]      uop_index,          // used for calculate v0_start in DP stage
     logic                               last_uop_valid      // one instruction may be split to many uops, this signal is used to specify the last uop in those uops of one instruction.
 } UOP_QUEUE_t;    
 
-/*
-DP stage, 
-*/
+//
+// DP stage, 
+//
 // specify whether the current byte belongs to 'prestart' or 'body-inactive' or 'body-active' or 'tail'
 typedef enum logic [1:0] {
     NOT_CHANGE,         // the byte is not changed, which may belong to 'prestart' or superfluous element in widening/narrowing uop
@@ -285,17 +286,28 @@
 typedef ELE_TYPE_e [`VLENB-1:0]         ELE_TYPE_t;
 
 // ALU reservation station struct
+typedef union packed {
+    logic [`VLEN-1:0]   v0_data,
+    logic [`VLEN-1:0]   vd_data
+}VS3_u;
+
 typedef struct packed {
     logic   [`ROB_DEPTH_WIDTH-1:0]      rob_entry,
-    FUNCT6_e                            uop_funct,  
+    FUNCT_e                             uop_funct,  
     EXE_OPCODE_e                        uop_opcode,
-    logic                               vm,                 // Identify vmadc.v?m and vmadc.v? in the same uop_funct(6'b010000).
-                                                            // Identify vmsbc.v?m and vmsbc.v? in the same uop_funct(6'b010011).    
-    logic   [`VCSR_VXRM-1:0]            vxrm,               // rounding mode and saturate mode
-    
-    logic   [`VLENB-1:0]                v0_data,            // when the uop is vmadc.v?m or vmsbc.v?m, it will use v0 as the third vector operand
-    VS1_u                               vs1,                // when vs1_data_valid=0, vs1 field is valid and used to decode some OPMVV uops
-    logic   [`VLEN-1:0]                 vs1_data,           // when vs1_data_valid=1, vs1_data is valid as a vector operand
+    logic   [`VSTART_WIDTH-1:0]         vstart,
+    // vm field can be used to identify vmadc.v?m/vmadc.v? uop in the same uop_funct(6'b010000).
+    // vm field can be used to identify vmsbc.v?m/vmsbc.v? uop in the same uop_funct(6'b010011).   
+    logic                               vm,               
+    // rounding mode 
+    logic   [`VCSR_VXRM-1:0]            vxrm,              
+    // when the uop is vmadc.v?m/vmsbc.v?m, the uop will use v0_data as the third vector operand.
+    // when the uop is mask uop(vmandn,vmand,...), the uop will use vd_data as the third vector operand.
+    VS3_u                               vs3_data,           
+    // when vs1_data_valid=0, vs1_data is used to decode some OPMVV uops
+    // when vs1_data_valid=1, vs1_data is valid as a vector operand
+    VS1_u                               vs1,                
+    logic   [`VLEN-1:0]                 vs1_data,           
     EEW_e                               vs1_eew,
     logic                               vs1_data_valid, 
     ELE_TYPE_t                          vs1_type, 
@@ -303,17 +315,18 @@
     EEW_e                               vs2_eew,
     logic                               vs2_data_valid,  
     ELE_TYPE_t                          vs2_type, 
-    logic   [`XLEN-1:0] 	            rs1_data,           // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit. 
-    logic        	                    rs1_data_valid                                   
+    // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit. 
+    logic   [`XLEN-1:0] 	              rs1_data,        
+    logic        	                      rs1_data_valid                                   
 } ALU_RS_t;    
 
 // DIV reservation station struct
 typedef struct packed {
     logic   [`ROB_DEPTH_WIDTH-1:0]      rob_entry,
-    FUNCT6_e                            uop_funct,  
+    FUNCT_e                             uop_funct,  
     EXE_OPCODE_e                        uop_opcode,
-    
-    logic   [`VLEN-1:0]                 vs1_data,           // when vs1_data_valid=1, vs1_data is valid as a vector operand
+    // when vs1_data_valid=1, vs1_data is valid as a vector operand
+    logic   [`VLEN-1:0]                 vs1_data,           
     EEW_e                               vs1_eew,
     logic                               vs1_data_valid, 
     ELE_TYPE_t                          vs1_type, 
@@ -321,16 +334,17 @@
     EEW_e                               vs2_eew,
     logic                               vs2_data_valid,  
     ELE_TYPE_t                          vs2_type, 
-    logic   [`XLEN-1:0] 	            rs1_data,           // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit. 
-    logic        	                    rs1_data_valid                                   
+    // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit. 
+    logic   [`XLEN-1:0] 	              rs1_data,     
+    logic        	                      rs1_data_valid                                   
 } DIV_RS_t; 
 
 // MUL and MAC reservation station struct
 typedef struct packed {   
     logic   [`ROB_DEPTH_WIDTH-1:0]      rob_entry,
-    FUNCT6_e                            uop_func,
+    FUNCT_e                             uop_func,
     EXE_OPCODE_e                        uop_opcode,
-    logic   [`VCSR_VXRM-1:0]            vxrm,               // rounding mode and saturate mode
+    logic   [`VCSR_VXRM-1:0]            vxrm,               // rounding mode 
  
     logic   [`VLEN-1:0]                 vs1_data,           
     EEW_e                               vs1_eew,
@@ -343,29 +357,34 @@
     logic   [`VLEN-1:0]                 vs3_data,	        
     EEW_e                               vs3_eew,
     logic                               vs3_data_valid, 
-    ELE_TYPE_t                          vs3_type, 
-    logic   [`XLEN-1:0] 	            rs1_data,           // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit. 
-    logic        	                    rs1_data_valid   
+    ELE_TYPE_t                          vs3_type,
+    // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit. 
+    logic   [`XLEN-1:0] 	              rs1_data,          
+    logic          	                    rs1_data_valid   
 } MUL_RS_t;    
 
 // PMT and RDT reservation station struct
 typedef struct packed {   
     logic   [`ROB_DEPTH_WIDTH-1:0]      rob_entry,
-    FUNCT6_e                            uop_func,
+    FUNCT_e                             uop_func,
     EXE_OPCODE_e                        uop_opcode,
-
-    logic                               vm,                 // Identify vmerge and vmv in the same uop_funct(6'b010111).
-    VS1_u                               vs1,                // when vs1_data_valid=0, vs1 field is valid and used to decode some OPMVV uops
-    logic   [`VLEN-1:0]                 vs1_data,           // when vs1_data_valid=1, vs1_data is valid as a vector operand
+    // Identify vmerge and vmv in the same uop_funct(6'b010111).
+    logic                               vm,               
+    // when vs1_data_valid=0, vs1 field is valid and used to decode some OPMVV uops
+    // when vs1_data_valid=1, vs1_data is valid as a vector operand
+    VS1_u                               vs1,               
+    logic   [`VLEN-1:0]                 vs1_data,          
     EEW_e                               vs1_eew,
     logic                               vs1_data_valid, 
     ELE_TYPE_t                          vs1_type, 
     logic   [`VLEN-1:0]                 vs2_data,	        
     EEW_e                               vs2_eew,
     logic                               vs2_data_valid, 
-    ELE_TYPE_t                          vs2_type, 
-    logic   [`XLEN-1:0] 	            rs1_data,           // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit. 
-    logic        	                    rs1_data_valid   
+    ELE_TYPE_t                          vs2_type,
+    // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit. 
+    logic   [`XLEN-1:0] 	              rs1_data,         
+    logic        	                      rs1_data_valid,
+    logic                               last_uop_valid     
 } PMT_RDT_RS_t;    
 
 // LSU reservation station struct
@@ -384,9 +403,19 @@
     ELE_TYPE_t                          vs3_type, 
 } LSU_RS_t;    
 
-/*
-EX stage, 
-*/
+//
+// EX stage, 
+//
+// send ALU's result to ROB
+typedef struct packed {
+    logic   [`ROB_DEPTH_WIDTH-1:0]      rob_entry,
+    logic   [`VLEN-1:0]                 w_data,             // when w_type=XRF, w_data[`XLEN-1:0] will store the scalar result
+    W_DATA_TYPE_t                       w_type,
+    logic                               w_valid,
+    logic   [`VCSR_VXSAT-1:0]           vxsat,
+    logic                               ignore_vta_vma
+} ALU2ROB_t;  
+
 // send uop to LSU
 typedef struct packed {   
     // RVV send to uop_pc to help LSU match the vld/vst uop
@@ -394,14 +423,14 @@
     // When LSU submit the result to RVV, LSU need to attend uop_id to help RVV retire the uop in ROB  
     logic   [`ROB_DEPTH_WIDTH-1:0]      uop_id,     
     // Vector regfile index interface for indexed vld/vst
-	logic 								vidx_valid,
-	logic	[`REGFILE_INDEX_WIDTH-1:0]	vidx_addr,
-  	logic	[`VLEN-1:0]					vidx_data,              // vs2
+	  logic 							              	vidx_valid,
+  	logic	[`REGFILE_INDEX_WIDTH-1:0]	  vidx_addr,
+  	logic	[`VLEN-1:0]				          	vidx_data,              // vs2
     ELE_TYPE_t                          vs2_type,               // mask for vs2
     // Vector regfile read interface for vst
-	logic 								vregfile_read_valid,
-  	logic	[`REGFILE_INDEX_WIDTH-1:0]	vregfile_read_addr,
-  	logic	[`VLEN-1:0] 				vregfile_read_data,		// vs3     
+  	logic 								              vregfile_read_valid,
+  	logic	[`REGFILE_INDEX_WIDTH-1:0]  	vregfile_read_addr,
+  	logic	[`VLEN-1:0] 				          vregfile_read_data,	  	// vs3     
     ELE_TYPE_t                          vs3_type                // mask for vs3
 } UOP_LSU_RVV2RVS_t;  
 
@@ -411,11 +440,13 @@
     logic   [`ROB_DEPTH_WIDTH-1:0]      uop_id,   
     // LSU uop type
     // When LSU complete the vstore uop, it need to tell RVV done signal and attend uop_id to help RVV retire the uops
-    LSU_IS_STORE_e                      uop_type,               // when load, it means the uop is vld. It enables vregfile_write_addr and vregfile_write_data, and submit the vector data to ROB
-                                                                // when store, it means this store uop is done in LSU, ROB can retire this uop.
+    // when load, it means the uop is vld. It enables vregfile_write_addr and vregfile_write_data, and submit the vector data to ROB
+    // when store, it means this store uop is done in LSU, ROB can retire this uop.
+    LSU_IS_STORE_e                      uop_type,               
+                                                                
 	// Vector regfile write interface for vld
-  	logic	[`REGFILE_INDEX_WIDTH-1:0] 	vregfile_write_addr,
-  	logic	[`VLEN-1:0] 				vregfile_write_data, 	// vd   
+  	logic	[`REGFILE_INDEX_WIDTH-1:0] 	  vregfile_write_addr,
+  	logic	[`VLEN-1:0] 			          	vregfile_write_data,  	// vd   
     ELE_TYPE_t                          vs1_type                // mask for vd
 } UOP_LSU_RVS2RVV_t;  
 
@@ -431,14 +462,14 @@
     logic   [`VLEN-1:0]                 w_data,             // when w_type=XRF, w_data[`XLEN-1:0] will store the scalar result
     W_DATA_TYPE_t                       w_type,
     logic                               w_valid,                    
-    ELE_TYPE_t                          ele_type, 
+    ELE_TYPE_t                          vd_type, 
     VECTOR_CSR_t                        vector_csr,
-    logic                               last_uop_valid     
+    logic                               ignore_vta_vma
 } ROB_t;  
 
-/*
-WB stage, bypass and write back to VRF/XRF, trap handler
-*/
+//
+// WB stage, bypass and write back to VRF/XRF, trap handler
+//
 // write back to XRF
 typedef struct packed {
     logic   [`REGFILE_INDEX_WIDTH-1:0]  w_index, 
diff --git a/hdl/verilog/rvv/design/rvv_alu_unit.sv b/hdl/verilog/rvv/design/rvv_alu_unit.sv
new file mode 100644
index 0000000..014a995
--- /dev/null
+++ b/hdl/verilog/rvv/design/rvv_alu_unit.sv
@@ -0,0 +1,257 @@
+/*
+description: 
+1. It will get uops from ALU Reservation station and execute this uop.
+
+feature list:
+1. All alu uop is executed and submit to ROB in 1 cycle.
+2. Reuse arithmetic logic as much as possible.
+3. Low-power design.
+*/
+
+`include 'rvv.svh'
+
+module rvv_alu_unit
+(
+    clk,
+    rstn,
+    
+    alu_uop_valid,
+    alu_uop,
+    result_alu2rob_valid,
+    result_alu2rob
+);
+//
+// interface signals
+//
+    // global signals
+    input   logic                   clk;
+    input   logic                   rstn;
+
+    // ALU RS handshake signals
+    input   logic                   alu_uop_valid;
+    input   ALU_RS_t                alu_uop;
+
+    // ALU send result signals to ROB
+    output  logic                   result_alu2rob_valid;
+    output  ALU2ROB_t               result_alu2rob;
+
+//
+// internal signals
+//
+    // ALU_RS_t struct signals
+    logic   [`ROB_DEPTH_WIDTH-1:0]  rob_entry;
+    FUNCT_u                         uop_funct;
+    EXE_OPCODE_e                    uop_opcode;
+    logic   [`VSTART_WIDTH-1:0]     vstart;
+    logic                           vm;       
+    logic   [`VCSR_VXRM-1:0]        vxrm;              
+    logic   [`VLENB-1:0]            v0_data;           
+    logic   [`VLEN-1:0]             vd_data;           
+    logic   [`VLEN-1:0]             vs1_data;           
+    EEW_e                           vs1_eew;
+    logic                           vs1_data_valid; 
+    ELE_TYPE_t                      vs1_type; 
+    logic   [`VLEN-1:0]             vs2_data;	        
+    EEW_e                           vs2_eew;
+    logic                           vs2_data_valid;  
+    ELE_TYPE_t                      vs2_type; 
+    logic   [`XLEN-1:0] 	          rs1_data;        
+    logic        	                  rs1_data_valid;
+
+    // execute 
+    logic   [`VLEN-1:0]             src2_vdata_mask_logic;
+    logic   [`VLEN-1:0]             src1_vdata_mask_logic;
+    logic                           result_valid_mask_logic;
+    logic   [`VLEN-1:0]             result_vdata_mask_logic;
+
+    // ALU2ROB_t struct signals
+    logic   [`VLEN-1:0]             w_data;             // when w_type=XRF, w_data[`XLEN-1:0] will store the scalar result
+    W_DATA_TYPE_t                   w_type;
+    logic                           w_valid; 
+    logic   [`VCSR_VXSAT-1:0]       vxsat;     
+    logic                           ignore_vta_vma;
+    
+    //
+    integer                         i;
+//
+// execute uop
+//
+    // split ALU_RS_t struct
+    assign  rob_entry       = alu_uop.rob_entry;
+    assign  uop_funct       = alu_uop.uop_funct;
+    assign  uop_opcode      = alu_uop.uop_opcode;
+    assign  vstart          = alu_uop.vstart;
+    assign  vm              = alu_uop.vm;
+    assign  vxrm            = alu_uop.vxrm;
+    assign  v0_data         = alu_uop.vs3_data.v0_data;
+    assign  vd_data         = alu_uop.vs3_data.vd_data;
+    assign  vs1             = alu_uop.vs1;
+    assign  vs1_data        = alu_uop.vs1_data;
+    assign  vs1_eew         = alu_uop.vs1_eew;
+    assign  vs1_data_valid  = alu_uop.vs1_data_valid;
+    assign  vs1_type        = alu_uop.vs1_type;
+    assign  vs2_data        = alu_uop.vs2_data;
+    assign  vs2_eew         = alu_uop.vs2_eew;
+    assign  vs2_data_valid  = alu_uop.vs2_data_valid;
+    assign  vs2_type        = alu_uop.vs2_type;
+    assign  rs1_data        = alu_uop.rs1_data;
+    assign  rs1_data_valid  = alu_uop.rs1_data_valid;
+    
+    // prepare source data to calculate    
+    always_comb begin
+      // initial the data
+      src2_vdata_mask_logic     = 'b0;
+      src1_vdata_mask_logic     = 'b0;
+      result_valid_mask_logic   = 'b0;
+
+      // prepare source data
+      case({alu_uop_valid,uop_opcode}) 
+        
+        {1'b1,OPIVV},
+        {1'b1,OPIVX},
+        {1'b1,OPIVI}: begin
+          case(uop_funct.opi_funct)
+            
+            default: begin
+              `ifdef ASSERT_ON
+              // ("unsupported uop_funct.opi_funct. uop_opcode=%s, uop_funct=%s, rob_entry=%d.\n",uop_opcode,uop_funct.opi_funct,rob_entry);
+              `endif
+            end
+          endcase
+        end
+
+        {1'b1,OPMVV}, 
+        {1'b1,OPMVX}: begin
+          case(uop_funct.opm_funct)
+              
+            VMANDN: begin
+              if((vs1_data_valid&vs2_data_valid)&(vm==1'b1)) begin
+                src2_vdata_mask_logic     = vs2_data;
+                src1_vdata_vmask_logic    = vs1_data;
+                result_valid_vmask_logic  = 1'b1;
+              end else begin
+                src2_vdata_mask_logic     = 'b0;
+                src1_vdata_mask_logic     = 'b0;
+                result_valid_mask_logic   = 'b0;
+                `ifdef ASSERT_ON
+                // assertion("%s uop: rob_entry=%d, vs1_data_valid(should be 1)=%d, vs2_data_valid(should be 1)=%d, vm(should be 1)=%d.\n",uop_funct.opm_funct,rob_entry,vs1_data_valid,vs2_data_valid,vm);
+                `endif
+              end
+            end
+
+            default: begin
+            `ifdef ASSERT_ON
+            // ("unsupported uop_funct.opi_funct. uop_opcode=%s, uop_funct=%s, rob_entry=%d.\n",uop_opcode,uop_funct.opm_funct,rob_entry);
+            `endif
+            end
+          endcase
+        end
+        
+        default: begin
+          `ifdef ASSERT_ON
+          // when alu_uop_valid=1, ("unsupported uop_opcode. uop_opcode=%s, rob_entry=%d.\n",uop_opcode,rob_entry);
+          `endif
+        end
+      endcase
+    end
+    
+    // calculate the result
+    always_comb begin
+      // initial the data
+      result_vdata_mask_logic   = 'b0; 
+
+      // calculate result data
+      case({alu_uop_valid,uop_opcode}) 
+        
+        {1'b1,OPIVV},
+        {1'b1,OPIVX},
+        {1'b1,OPIVI}: begin
+          case(uop_funct.opi_funct)
+            
+          endcase
+        end
+
+        {1'b1,OPMVV}, 
+        {1'b1,OPMVX}: begin
+          case(uop_funct.opm_funct)
+            
+            VMANDN: begin
+              result_vdata_mask_logic   = f_vmandn(src2_vdata_mask_logic,src1_vdata_maska_logic);  
+            end
+
+          endcase
+        end
+
+      endcase
+    end
+
+//
+// submit resutl to ROB
+//
+    // assign ALU2ROB_t struct signals
+    assign  result_alu2rob.rob_entry      = rob_entry;
+    assign  result_alu2rob.w_data         = w_data;
+    assign  result_alu2rob.w_type         = w_type;
+    assign  result_alu2rob.w_valid        = w_valid;
+    assign  result_alu2rob.vxsat          = vxsat;
+    assign  result_alu2rob.ignore_vta_vma = ignore_vta_vma;
+
+    // combine the signals to result_alu2rob struct and submit
+    always_comb begin
+    // initial
+      result_alu2rob_valid  = 'b0;
+      w_data                = 'b0;
+      w_tpye                = 'b0;
+      w_valid               = 'b0;
+      vxsat                 = 'b0;
+      ignore_vta_vma        = 'b0;
+    // submit
+      case({alu_uop_valid,uop_opcode}) 
+       
+        {1'b1,OPIVV},
+        {1'b1,OPIVX},
+        {1'b1,OPIVI}: begin
+          case(uop_funct.opi_funct)
+            
+          endcase
+        end
+
+        {1'b1,OPMVV}, 
+        {1'b1,OPMVX}: begin
+          case(uop_funct.opm_funct)
+            
+            VMANDN: begin
+              for (i=0;i<`VLEN;i=i+1) 
+              begin
+                if (i<vstart)
+                  w_data[i]         = vd_data[i];
+                else
+                  w_data[i]         = result_vdata_mask_logic[i];
+              end
+              result_alu2rob_valid  = result_valid_mask_logic;
+              w_type                = VRF;
+              w_valid               = 1'b1;
+              vxsat                 = 1'b0;
+              ignore_vta_vma        = 1'b1;
+            end
+
+          endcase
+        end
+
+      endcase
+    end
+
+//
+// function unit
+//
+  // OPMVV-vmandn function unit
+  function [`VLEN-1:0] f_vmandn;
+    input logic [`VLEN-1:0] vs2_data;
+    input logic [`VLEN-1:0] vs1_data;
+
+    f_vmandn = vs2_data & (~vs1_data);
+  endfunction
+
+
+
+endmodule