1. Complete a mask instruction(vmandn) in alu. 2. Add ignore_vta_vma signal and some signals into ALU_RS_t and ROB_t
Change-Id: Id02b98f827f882661d29a59ce246b14a2219233c
diff --git a/hdl/verilog/rvv/design/rvv.svh b/hdl/verilog/rvv/design/rvv.svh
index 261c3c6..3317e87 100755
--- a/hdl/verilog/rvv/design/rvv.svh
+++ b/hdl/verilog/rvv/design/rvv.svh
@@ -1,8 +1,8 @@
`include "rvv_define.svh"
-/*
-IF stage, RVS to Command Queue
-*/
+//
+// IF stage, RVS to Command Queue
+//
typedef struct packed {
logic [`VTYPE_VILL-1:0] vill, // 0:not illegal, 1:illegal
logic [`VTYPE_VMA-1:0] vma, // 0:inactive element undisturbed, 1:inactive element agnostic
@@ -27,12 +27,12 @@
logic [`PC_WIDTH-1:0] insts_pc,
logic [`INST_WIDTH-1:0] insts,
VECTOR_CSR_t vector_csr,
- logic [`XLEN-1:0] rs1_data
+ logic [`XLEN-1:0] rs1_data
} INST_t;
-/*
-ID stage, Uops Queue to Dispatch unit
-*/
+//
+// ID stage, Uops Queue to Dispatch unit
+//
// It is used to distinguish which execute units that VVV/VVX/VX uop is dispatch to, based on inst_encoding[6:0]
typedef enum logic [2:0] {
ALU,
@@ -57,129 +57,129 @@
// when EXE_UNIT_e is not LSU, it identifys what instruction, vadd or vmacc or ..? based on inst_encoding[31:26]
typedef enum logic [5:0] {
- vadd = 6'b000_000,
- vsub = 6'b000_010,
- vrsub = 6'b000_011,
- vminu = 6'b000_100,
- vmin = 6'b000_101,
- vmaxu = 6'b000_110,
- vmaxu = 6'b000_111,
- vand = 6'b001_001,
- vor = 6'b001_010,
- vxor = 6'b001_011,
- vrgather = 6'b001_100,
- vslideup = 6'b001_110,
- vrgatherei16 = 6'b001_110,
- vslidedown = 6'b001_111,
- vadc = 6'b010_000,
- vmadc = 6'b010_001,
- vsbc = 6'b010_010,
- vmsbc = 6'b010_011,
- vmerge_vmv = 6'b010_111, // it could be vmerge or vmv, based on vm field
- vmseq = 6'b011_000,
- vmsne = 6'b011_001,
- vmsltu = 6'b011_010,
- vmslt = 6'b011_011,
- vmsleu = 6'b011_100,
- vmsle = 6'b011_101,
- vmsgtu = 6'b011_110,
- vmsgt = 6'b011_111,
- vsaddu = 6'b100_000,
- vsadd = 6'b100_001,
- vssubu = 6'b100_010,
- vssub = 6'b100_011,
- vsll = 6'b100_101,
- vsmul_vmvnrr = 6'b100_111, // it could be vsmul or vmv<nr>r, based on vm field
- vsrl = 6'b101_000,
- vsra = 6'b101_001,
- vssrl = 6'b101_010,
- vssra = 6'b101_011,
- vnsrl = 6'b101_100,
- vnsra = 6'b101_101,
- vnclipu = 6'b101_110,
- vnclip = 6'b101_111,
- vwredsumu = 6'b110_000,
- vwredsum = 6'b110_001
+ VADD = 6'b000_000,
+ VSUB = 6'b000_010,
+ VRSUB = 6'b000_011,
+ VMINU = 6'b000_100,
+ VMIN = 6'b000_101,
+ VMAXU = 6'b000_110,
+ VMAXU = 6'b000_111,
+ VAND = 6'b001_001,
+ VOR = 6'b001_010,
+ VXOR = 6'b001_011,
+ VRGATHER = 6'b001_100,
+ VSLIDEUP = 6'b001_110,
+ VRGATHEREI16 = 6'b001_110,
+ VSLIDEDOWN = 6'b001_111,
+ VADC = 6'b010_000,
+ VMADC = 6'b010_001,
+ VSBC = 6'b010_010,
+ VMSBC = 6'b010_011,
+ VMERGE_VMV = 6'b010_111, // it could be vmerge or vmv, based on vm field
+ VMSEQ = 6'b011_000,
+ VMSNE = 6'b011_001,
+ VMSLTU = 6'b011_010,
+ VMSLT = 6'b011_011,
+ VMSLEU = 6'b011_100,
+ VMSLE = 6'b011_101,
+ VMSGTU = 6'b011_110,
+ VMSGT = 6'b011_111,
+ VSADDU = 6'b100_000,
+ VSADD = 6'b100_001,
+ VSSUBU = 6'b100_010,
+ VSSUB = 6'b100_011,
+ VSLL = 6'b100_101,
+ VSMUL_VMVNRR = 6'b100_111, // it could be vsmul or vmv<nr>r, based on vm field
+ VSRL = 6'b101_000,
+ VSRA = 6'b101_001,
+ VSSRL = 6'b101_010,
+ VSSRA = 6'b101_011,
+ VNSRL = 6'b101_100,
+ VNSRA = 6'b101_101,
+ VNCLIPU = 6'b101_110,
+ VNCLIP = 6'b101_111,
+ VWREDSUMU = 6'b110_000,
+ VWREDSUM = 6'b110_001
} OPI_TYPE_e;
typedef enum logic [5:0] {
- vredsum = 6'b000_000,
- vredand = 6'b000_001,
- vredor = 6'b000_010,
- vredxor = 6'b000_011,
- vredminu = 6'b000_100,
- vredmin = 6'b000_101,
- vredmaxu = 6'b000_110,
- vredmax = 6'b000_111,
- vaaddu = 6'b001_000,
- vaadd = 6'b001_001,
- vasubu = 6'b001_010,
- vasub = 6'b001_011,
- vslide1up = 6'b001_110,
- vslide1down = 6'b001_111,
- vwxunary0 = 6'b010_000, // it could be vcpop.m, vfirst.m and vmv. They can be distinguished by vs1 field(inst_encoding[19:15]).
- vxunary0 = 6'b010_010, // it could be vzext.vf2, vzext.vf4, vsext.vf2, vsext.vf4. They can be distinguished by vs1 field(inst_encoding[19:15]).
- vmunary0 = 6'b010_100, // it could be vmsbf, vmsof, vmsif, viota, vid. They can be distinguished by vs1 field(inst_encoding[19:15]).
- vcompress = 6'b010_111,
- vmandn = 6'b011_000,
- vmand = 6'b011_001,
- vmor = 6'b011_010,
- vmxor = 6'b011_011,
- vmorn = 6'b011_100,
- vmnand = 6'b011_101,
- vmnor = 6'b011_110,
- vmxnor = 6'b011_111,
- vdivu = 6'b100_000,
- vdiv = 6'b100_001,
- vremu = 6'b100_010,
- vrem = 6'b100_011,
- vmulhu = 6'b100_100,
- vmul = 6'b100_101,
- vmulhsu = 6'b100_110,
- vmulh = 6'b100_111,
- vmadd = 6'b101_001,
- vnmsub = 6'b101_011,
- vmacc = 6'b101_101,
- vnmsac = 6'b101_111,
- vwaddu = 6'b110_000,
- vwadd = 6'b110_001,
- vwsubu = 6'b110_010,
- vwsub = 6'b110_011,
- vwaddu = 6'b110_100,
- vwadd = 6'b110_101,
- vwsubu = 6'b110_110,
- vwsub = 6'b110_111,
- vwmulu = 6'b111_000,
- vwmulsu = 6'b111_010,
- vwmul = 6'b111_011,
- vwmaccu = 6'b111_100,
- vwmacc = 6'b111_101,
- vwmaccus = 6'b111_110,
- vwmaccsu = 6'b111_111
+ VREDSUM = 6'b000_000,
+ VREDAND = 6'b000_001,
+ VREDOR = 6'b000_010,
+ VREDXOR = 6'b000_011,
+ VREDMINU = 6'b000_100,
+ VREDMIN = 6'b000_101,
+ VREDMAXU = 6'b000_110,
+ VREDMAX = 6'b000_111,
+ VAADDU = 6'b001_000,
+ VAADD = 6'b001_001,
+ VASUBU = 6'b001_010,
+ VASUB = 6'b001_011,
+ VSLIDE1UP = 6'b001_110,
+ VSLIDE1DOWN = 6'b001_111,
+ VWXUNARY0 = 6'b010_000, // it could be vcpop.m, vfirst.m and vmv. They can be distinguished by vs1 field(inst_encoding[19:15]).
+ VXUNARY0 = 6'b010_010, // it could be vzext.vf2, vzext.vf4, vsext.vf2, vsext.vf4. They can be distinguished by vs1 field(inst_encoding[19:15]).
+ VMUNARY0 = 6'b010_100, // it could be vmsbf, vmsof, vmsif, viota, vid. They can be distinguished by vs1 field(inst_encoding[19:15]).
+ VCOMPRESS = 6'b010_111,
+ VMANDN = 6'b011_000,
+ VMAND = 6'b011_001,
+ VMOR = 6'b011_010,
+ VMXOR = 6'b011_011,
+ VMORN = 6'b011_100,
+ VMNAND = 6'b011_101,
+ VMNOR = 6'b011_110,
+ VMXNOR = 6'b011_111,
+ VDIVU = 6'b100_000,
+ VDIV = 6'b100_001,
+ VREMU = 6'b100_010,
+ VREM = 6'b100_011,
+ VMULHU = 6'b100_100,
+ VMUL = 6'b100_101,
+ VMULHSU = 6'b100_110,
+ VMULH = 6'b100_111,
+ VMADD = 6'b101_001,
+ VNMSUB = 6'b101_011,
+ VMACC = 6'b101_101,
+ VNMSAC = 6'b101_111,
+ VWADDU = 6'b110_000,
+ VWADD = 6'b110_001,
+ VWSUBU = 6'b110_010,
+ VWSUB = 6'b110_011,
+ VWADDU = 6'b110_100,
+ VWADD = 6'b110_101,
+ VWSUBU = 6'b110_110,
+ VWSUB = 6'b110_111,
+ VWMULU = 6'b111_000,
+ VWMULSU = 6'b111_010,
+ VWMUL = 6'b111_011,
+ VWMACCU = 6'b111_100,
+ VWMACC = 6'b111_101,
+ VWMACCUS = 6'b111_110,
+ VWMACCSU = 6'b111_111
} OPM_TYPE_e;
// when OPM_TYPE_e=vwxunary0, the uop could be vcpop.m, vfirst.m and vmv. They can be distinguished by vs1 field(inst_encoding[19:15]).
typedef enum logic [4:0] {
- vmv_x_s = 5'b00000,
- vcpop = 5'b10000,
- vfirst = 5'b10001
+ VMV_X_S = 5'b00000,
+ VCPOP = 5'b10000,
+ VFIRST = 5'b10001
} OPM_VWXUNARY0_e;
// when OPM_TYPE_e=vxunary0, the uop could be vzext.vf2, vzext.vf4, vsext.vf2, vsext.vf4. They can be distinguished by vs1 field(inst_encoding[19:15]).
typedef enum logic [4:0] {
- vzext_vf4 = 5'b00100,
- vsext_vf4 = 5'b00101,
- vzext_vf2 = 5'b00110,
- vsext_vf2 = 5'b00111
+ VZEXT_VF4 = 5'b00100,
+ VSEXT_VF4 = 5'b00101,
+ VZEXT_VF2 = 5'b00110,
+ VSEXT_VF2 = 5'b00111
} OPM_VXUNARY0_e;
// when OPM_TYPE_e=vmxunary0, the uop could be vmsbf, vmsof, vmsif, viota, vid. They can be distinguished by vs1 field(inst_encoding[19:15]).
typedef enum logic [4:0] {
- vmsbf = 5'b00001,
- vmsof = 5'b00010,
- vmsif = 5'b00011,
- viota = 5'b10000,
- vid = 5'b10001
+ VMSBF = 5'b00001,
+ VMSOF = 5'b00010,
+ VMSIF = 5'b00011,
+ VIOTA = 5'b10000,
+ VID = 5'b10001
} OPM_VMXUNARY0_e;
// when EXE_UNIT_e is LSU, it identifys what LSU instruction, unit-stride load or indexed store or ..? based on inst_encoding[31:26]
@@ -237,6 +237,7 @@
// Effective Element Width
typedef enum logic [1:0] {
+ EEW1,
EEW8,
EEW16,
EEW32
@@ -263,16 +264,16 @@
logic vs2_valid,
logic [`REGFILE_INDEX_WIDTH-1:0] rd_index, // Original 32bit instruction encoding: insts[11:7].
logic rd_index_valid,
- logic [`XLEN-1:0] rs1_data, // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend or zero-extend(shift instructions...) to XLEN-bit.
- logic rs1_data_valid,
+ logic [`XLEN-1:0] rs1_data, // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend or zero-extend(shift instructions...) to XLEN-bit.
+ logic rs1_data_valid,
logic [`UOP_INDEX_WIDTH-1:0] uop_index, // used for calculate v0_start in DP stage
logic last_uop_valid // one instruction may be split to many uops, this signal is used to specify the last uop in those uops of one instruction.
} UOP_QUEUE_t;
-/*
-DP stage,
-*/
+//
+// DP stage,
+//
// specify whether the current byte belongs to 'prestart' or 'body-inactive' or 'body-active' or 'tail'
typedef enum logic [1:0] {
NOT_CHANGE, // the byte is not changed, which may belong to 'prestart' or superfluous element in widening/narrowing uop
@@ -285,17 +286,28 @@
typedef ELE_TYPE_e [`VLENB-1:0] ELE_TYPE_t;
// ALU reservation station struct
+typedef union packed {
+ logic [`VLEN-1:0] v0_data,
+ logic [`VLEN-1:0] vd_data
+}VS3_u;
+
typedef struct packed {
logic [`ROB_DEPTH_WIDTH-1:0] rob_entry,
- FUNCT6_e uop_funct,
+ FUNCT_e uop_funct,
EXE_OPCODE_e uop_opcode,
- logic vm, // Identify vmadc.v?m and vmadc.v? in the same uop_funct(6'b010000).
- // Identify vmsbc.v?m and vmsbc.v? in the same uop_funct(6'b010011).
- logic [`VCSR_VXRM-1:0] vxrm, // rounding mode and saturate mode
-
- logic [`VLENB-1:0] v0_data, // when the uop is vmadc.v?m or vmsbc.v?m, it will use v0 as the third vector operand
- VS1_u vs1, // when vs1_data_valid=0, vs1 field is valid and used to decode some OPMVV uops
- logic [`VLEN-1:0] vs1_data, // when vs1_data_valid=1, vs1_data is valid as a vector operand
+ logic [`VSTART_WIDTH-1:0] vstart,
+ // vm field can be used to identify vmadc.v?m/vmadc.v? uop in the same uop_funct(6'b010000).
+ // vm field can be used to identify vmsbc.v?m/vmsbc.v? uop in the same uop_funct(6'b010011).
+ logic vm,
+ // rounding mode
+ logic [`VCSR_VXRM-1:0] vxrm,
+ // when the uop is vmadc.v?m/vmsbc.v?m, the uop will use v0_data as the third vector operand.
+ // when the uop is mask uop(vmandn,vmand,...), the uop will use vd_data as the third vector operand.
+ VS3_u vs3_data,
+ // when vs1_data_valid=0, vs1_data is used to decode some OPMVV uops
+ // when vs1_data_valid=1, vs1_data is valid as a vector operand
+ VS1_u vs1,
+ logic [`VLEN-1:0] vs1_data,
EEW_e vs1_eew,
logic vs1_data_valid,
ELE_TYPE_t vs1_type,
@@ -303,17 +315,18 @@
EEW_e vs2_eew,
logic vs2_data_valid,
ELE_TYPE_t vs2_type,
- logic [`XLEN-1:0] rs1_data, // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit.
- logic rs1_data_valid
+ // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit.
+ logic [`XLEN-1:0] rs1_data,
+ logic rs1_data_valid
} ALU_RS_t;
// DIV reservation station struct
typedef struct packed {
logic [`ROB_DEPTH_WIDTH-1:0] rob_entry,
- FUNCT6_e uop_funct,
+ FUNCT_e uop_funct,
EXE_OPCODE_e uop_opcode,
-
- logic [`VLEN-1:0] vs1_data, // when vs1_data_valid=1, vs1_data is valid as a vector operand
+ // when vs1_data_valid=1, vs1_data is valid as a vector operand
+ logic [`VLEN-1:0] vs1_data,
EEW_e vs1_eew,
logic vs1_data_valid,
ELE_TYPE_t vs1_type,
@@ -321,16 +334,17 @@
EEW_e vs2_eew,
logic vs2_data_valid,
ELE_TYPE_t vs2_type,
- logic [`XLEN-1:0] rs1_data, // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit.
- logic rs1_data_valid
+ // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit.
+ logic [`XLEN-1:0] rs1_data,
+ logic rs1_data_valid
} DIV_RS_t;
// MUL and MAC reservation station struct
typedef struct packed {
logic [`ROB_DEPTH_WIDTH-1:0] rob_entry,
- FUNCT6_e uop_func,
+ FUNCT_e uop_func,
EXE_OPCODE_e uop_opcode,
- logic [`VCSR_VXRM-1:0] vxrm, // rounding mode and saturate mode
+ logic [`VCSR_VXRM-1:0] vxrm, // rounding mode
logic [`VLEN-1:0] vs1_data,
EEW_e vs1_eew,
@@ -343,29 +357,34 @@
logic [`VLEN-1:0] vs3_data,
EEW_e vs3_eew,
logic vs3_data_valid,
- ELE_TYPE_t vs3_type,
- logic [`XLEN-1:0] rs1_data, // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit.
- logic rs1_data_valid
+ ELE_TYPE_t vs3_type,
+ // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit.
+ logic [`XLEN-1:0] rs1_data,
+ logic rs1_data_valid
} MUL_RS_t;
// PMT and RDT reservation station struct
typedef struct packed {
logic [`ROB_DEPTH_WIDTH-1:0] rob_entry,
- FUNCT6_e uop_func,
+ FUNCT_e uop_func,
EXE_OPCODE_e uop_opcode,
-
- logic vm, // Identify vmerge and vmv in the same uop_funct(6'b010111).
- VS1_u vs1, // when vs1_data_valid=0, vs1 field is valid and used to decode some OPMVV uops
- logic [`VLEN-1:0] vs1_data, // when vs1_data_valid=1, vs1_data is valid as a vector operand
+ // Identify vmerge and vmv in the same uop_funct(6'b010111).
+ logic vm,
+ // when vs1_data_valid=0, vs1 field is valid and used to decode some OPMVV uops
+ // when vs1_data_valid=1, vs1_data is valid as a vector operand
+ VS1_u vs1,
+ logic [`VLEN-1:0] vs1_data,
EEW_e vs1_eew,
logic vs1_data_valid,
ELE_TYPE_t vs1_type,
logic [`VLEN-1:0] vs2_data,
EEW_e vs2_eew,
logic vs2_data_valid,
- ELE_TYPE_t vs2_type,
- logic [`XLEN-1:0] rs1_data, // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit.
- logic rs1_data_valid
+ ELE_TYPE_t vs2_type,
+ // rs1_data could be from X[rs1] and imm(insts[19:15]). If it is imm, the 5-bit imm(insts[19:15]) will be sign-extend to XLEN-bit.
+ logic [`XLEN-1:0] rs1_data,
+ logic rs1_data_valid,
+ logic last_uop_valid
} PMT_RDT_RS_t;
// LSU reservation station struct
@@ -384,9 +403,19 @@
ELE_TYPE_t vs3_type,
} LSU_RS_t;
-/*
-EX stage,
-*/
+//
+// EX stage,
+//
+// send ALU's result to ROB
+typedef struct packed {
+ logic [`ROB_DEPTH_WIDTH-1:0] rob_entry,
+ logic [`VLEN-1:0] w_data, // when w_type=XRF, w_data[`XLEN-1:0] will store the scalar result
+ W_DATA_TYPE_t w_type,
+ logic w_valid,
+ logic [`VCSR_VXSAT-1:0] vxsat,
+ logic ignore_vta_vma
+} ALU2ROB_t;
+
// send uop to LSU
typedef struct packed {
// RVV send to uop_pc to help LSU match the vld/vst uop
@@ -394,14 +423,14 @@
// When LSU submit the result to RVV, LSU need to attend uop_id to help RVV retire the uop in ROB
logic [`ROB_DEPTH_WIDTH-1:0] uop_id,
// Vector regfile index interface for indexed vld/vst
- logic vidx_valid,
- logic [`REGFILE_INDEX_WIDTH-1:0] vidx_addr,
- logic [`VLEN-1:0] vidx_data, // vs2
+ logic vidx_valid,
+ logic [`REGFILE_INDEX_WIDTH-1:0] vidx_addr,
+ logic [`VLEN-1:0] vidx_data, // vs2
ELE_TYPE_t vs2_type, // mask for vs2
// Vector regfile read interface for vst
- logic vregfile_read_valid,
- logic [`REGFILE_INDEX_WIDTH-1:0] vregfile_read_addr,
- logic [`VLEN-1:0] vregfile_read_data, // vs3
+ logic vregfile_read_valid,
+ logic [`REGFILE_INDEX_WIDTH-1:0] vregfile_read_addr,
+ logic [`VLEN-1:0] vregfile_read_data, // vs3
ELE_TYPE_t vs3_type // mask for vs3
} UOP_LSU_RVV2RVS_t;
@@ -411,11 +440,13 @@
logic [`ROB_DEPTH_WIDTH-1:0] uop_id,
// LSU uop type
// When LSU complete the vstore uop, it need to tell RVV done signal and attend uop_id to help RVV retire the uops
- LSU_IS_STORE_e uop_type, // when load, it means the uop is vld. It enables vregfile_write_addr and vregfile_write_data, and submit the vector data to ROB
- // when store, it means this store uop is done in LSU, ROB can retire this uop.
+ // when load, it means the uop is vld. It enables vregfile_write_addr and vregfile_write_data, and submit the vector data to ROB
+ // when store, it means this store uop is done in LSU, ROB can retire this uop.
+ LSU_IS_STORE_e uop_type,
+
// Vector regfile write interface for vld
- logic [`REGFILE_INDEX_WIDTH-1:0] vregfile_write_addr,
- logic [`VLEN-1:0] vregfile_write_data, // vd
+ logic [`REGFILE_INDEX_WIDTH-1:0] vregfile_write_addr,
+ logic [`VLEN-1:0] vregfile_write_data, // vd
ELE_TYPE_t vs1_type // mask for vd
} UOP_LSU_RVS2RVV_t;
@@ -431,14 +462,14 @@
logic [`VLEN-1:0] w_data, // when w_type=XRF, w_data[`XLEN-1:0] will store the scalar result
W_DATA_TYPE_t w_type,
logic w_valid,
- ELE_TYPE_t ele_type,
+ ELE_TYPE_t vd_type,
VECTOR_CSR_t vector_csr,
- logic last_uop_valid
+ logic ignore_vta_vma
} ROB_t;
-/*
-WB stage, bypass and write back to VRF/XRF, trap handler
-*/
+//
+// WB stage, bypass and write back to VRF/XRF, trap handler
+//
// write back to XRF
typedef struct packed {
logic [`REGFILE_INDEX_WIDTH-1:0] w_index,
diff --git a/hdl/verilog/rvv/design/rvv_alu_unit.sv b/hdl/verilog/rvv/design/rvv_alu_unit.sv
new file mode 100644
index 0000000..014a995
--- /dev/null
+++ b/hdl/verilog/rvv/design/rvv_alu_unit.sv
@@ -0,0 +1,257 @@
+/*
+description:
+1. It will get uops from ALU Reservation station and execute this uop.
+
+feature list:
+1. All alu uop is executed and submit to ROB in 1 cycle.
+2. Reuse arithmetic logic as much as possible.
+3. Low-power design.
+*/
+
+`include 'rvv.svh'
+
+module rvv_alu_unit
+(
+ clk,
+ rstn,
+
+ alu_uop_valid,
+ alu_uop,
+ result_alu2rob_valid,
+ result_alu2rob
+);
+//
+// interface signals
+//
+ // global signals
+ input logic clk;
+ input logic rstn;
+
+ // ALU RS handshake signals
+ input logic alu_uop_valid;
+ input ALU_RS_t alu_uop;
+
+ // ALU send result signals to ROB
+ output logic result_alu2rob_valid;
+ output ALU2ROB_t result_alu2rob;
+
+//
+// internal signals
+//
+ // ALU_RS_t struct signals
+ logic [`ROB_DEPTH_WIDTH-1:0] rob_entry;
+ FUNCT_u uop_funct;
+ EXE_OPCODE_e uop_opcode;
+ logic [`VSTART_WIDTH-1:0] vstart;
+ logic vm;
+ logic [`VCSR_VXRM-1:0] vxrm;
+ logic [`VLENB-1:0] v0_data;
+ logic [`VLEN-1:0] vd_data;
+ logic [`VLEN-1:0] vs1_data;
+ EEW_e vs1_eew;
+ logic vs1_data_valid;
+ ELE_TYPE_t vs1_type;
+ logic [`VLEN-1:0] vs2_data;
+ EEW_e vs2_eew;
+ logic vs2_data_valid;
+ ELE_TYPE_t vs2_type;
+ logic [`XLEN-1:0] rs1_data;
+ logic rs1_data_valid;
+
+ // execute
+ logic [`VLEN-1:0] src2_vdata_mask_logic;
+ logic [`VLEN-1:0] src1_vdata_mask_logic;
+ logic result_valid_mask_logic;
+ logic [`VLEN-1:0] result_vdata_mask_logic;
+
+ // ALU2ROB_t struct signals
+ logic [`VLEN-1:0] w_data; // when w_type=XRF, w_data[`XLEN-1:0] will store the scalar result
+ W_DATA_TYPE_t w_type;
+ logic w_valid;
+ logic [`VCSR_VXSAT-1:0] vxsat;
+ logic ignore_vta_vma;
+
+ //
+ integer i;
+//
+// execute uop
+//
+ // split ALU_RS_t struct
+ assign rob_entry = alu_uop.rob_entry;
+ assign uop_funct = alu_uop.uop_funct;
+ assign uop_opcode = alu_uop.uop_opcode;
+ assign vstart = alu_uop.vstart;
+ assign vm = alu_uop.vm;
+ assign vxrm = alu_uop.vxrm;
+ assign v0_data = alu_uop.vs3_data.v0_data;
+ assign vd_data = alu_uop.vs3_data.vd_data;
+ assign vs1 = alu_uop.vs1;
+ assign vs1_data = alu_uop.vs1_data;
+ assign vs1_eew = alu_uop.vs1_eew;
+ assign vs1_data_valid = alu_uop.vs1_data_valid;
+ assign vs1_type = alu_uop.vs1_type;
+ assign vs2_data = alu_uop.vs2_data;
+ assign vs2_eew = alu_uop.vs2_eew;
+ assign vs2_data_valid = alu_uop.vs2_data_valid;
+ assign vs2_type = alu_uop.vs2_type;
+ assign rs1_data = alu_uop.rs1_data;
+ assign rs1_data_valid = alu_uop.rs1_data_valid;
+
+ // prepare source data to calculate
+ always_comb begin
+ // initial the data
+ src2_vdata_mask_logic = 'b0;
+ src1_vdata_mask_logic = 'b0;
+ result_valid_mask_logic = 'b0;
+
+ // prepare source data
+ case({alu_uop_valid,uop_opcode})
+
+ {1'b1,OPIVV},
+ {1'b1,OPIVX},
+ {1'b1,OPIVI}: begin
+ case(uop_funct.opi_funct)
+
+ default: begin
+ `ifdef ASSERT_ON
+ // ("unsupported uop_funct.opi_funct. uop_opcode=%s, uop_funct=%s, rob_entry=%d.\n",uop_opcode,uop_funct.opi_funct,rob_entry);
+ `endif
+ end
+ endcase
+ end
+
+ {1'b1,OPMVV},
+ {1'b1,OPMVX}: begin
+ case(uop_funct.opm_funct)
+
+ VMANDN: begin
+ if((vs1_data_valid&vs2_data_valid)&(vm==1'b1)) begin
+ src2_vdata_mask_logic = vs2_data;
+ src1_vdata_vmask_logic = vs1_data;
+ result_valid_vmask_logic = 1'b1;
+ end else begin
+ src2_vdata_mask_logic = 'b0;
+ src1_vdata_mask_logic = 'b0;
+ result_valid_mask_logic = 'b0;
+ `ifdef ASSERT_ON
+ // assertion("%s uop: rob_entry=%d, vs1_data_valid(should be 1)=%d, vs2_data_valid(should be 1)=%d, vm(should be 1)=%d.\n",uop_funct.opm_funct,rob_entry,vs1_data_valid,vs2_data_valid,vm);
+ `endif
+ end
+ end
+
+ default: begin
+ `ifdef ASSERT_ON
+ // ("unsupported uop_funct.opi_funct. uop_opcode=%s, uop_funct=%s, rob_entry=%d.\n",uop_opcode,uop_funct.opm_funct,rob_entry);
+ `endif
+ end
+ endcase
+ end
+
+ default: begin
+ `ifdef ASSERT_ON
+ // when alu_uop_valid=1, ("unsupported uop_opcode. uop_opcode=%s, rob_entry=%d.\n",uop_opcode,rob_entry);
+ `endif
+ end
+ endcase
+ end
+
+ // calculate the result
+ always_comb begin
+ // initial the data
+ result_vdata_mask_logic = 'b0;
+
+ // calculate result data
+ case({alu_uop_valid,uop_opcode})
+
+ {1'b1,OPIVV},
+ {1'b1,OPIVX},
+ {1'b1,OPIVI}: begin
+ case(uop_funct.opi_funct)
+
+ endcase
+ end
+
+ {1'b1,OPMVV},
+ {1'b1,OPMVX}: begin
+ case(uop_funct.opm_funct)
+
+ VMANDN: begin
+ result_vdata_mask_logic = f_vmandn(src2_vdata_mask_logic,src1_vdata_maska_logic);
+ end
+
+ endcase
+ end
+
+ endcase
+ end
+
+//
+// submit resutl to ROB
+//
+ // assign ALU2ROB_t struct signals
+ assign result_alu2rob.rob_entry = rob_entry;
+ assign result_alu2rob.w_data = w_data;
+ assign result_alu2rob.w_type = w_type;
+ assign result_alu2rob.w_valid = w_valid;
+ assign result_alu2rob.vxsat = vxsat;
+ assign result_alu2rob.ignore_vta_vma = ignore_vta_vma;
+
+ // combine the signals to result_alu2rob struct and submit
+ always_comb begin
+ // initial
+ result_alu2rob_valid = 'b0;
+ w_data = 'b0;
+ w_tpye = 'b0;
+ w_valid = 'b0;
+ vxsat = 'b0;
+ ignore_vta_vma = 'b0;
+ // submit
+ case({alu_uop_valid,uop_opcode})
+
+ {1'b1,OPIVV},
+ {1'b1,OPIVX},
+ {1'b1,OPIVI}: begin
+ case(uop_funct.opi_funct)
+
+ endcase
+ end
+
+ {1'b1,OPMVV},
+ {1'b1,OPMVX}: begin
+ case(uop_funct.opm_funct)
+
+ VMANDN: begin
+ for (i=0;i<`VLEN;i=i+1)
+ begin
+ if (i<vstart)
+ w_data[i] = vd_data[i];
+ else
+ w_data[i] = result_vdata_mask_logic[i];
+ end
+ result_alu2rob_valid = result_valid_mask_logic;
+ w_type = VRF;
+ w_valid = 1'b1;
+ vxsat = 1'b0;
+ ignore_vta_vma = 1'b1;
+ end
+
+ endcase
+ end
+
+ endcase
+ end
+
+//
+// function unit
+//
+ // OPMVV-vmandn function unit
+ function [`VLEN-1:0] f_vmandn;
+ input logic [`VLEN-1:0] vs2_data;
+ input logic [`VLEN-1:0] vs1_data;
+
+ f_vmandn = vs2_data & (~vs1_data);
+ endfunction
+
+
+
+endmodule