| |
| `ifndef HDL_VERILOG_RVV_DESIGN_RVV_SVH |
| `include "rvv_backend.svh" |
| `endif |
| `ifndef ALU_DEFINE_SVH |
| `include "rvv_backend_alu.svh" |
| `endif |
| |
| module rvv_backend_alu_unit_addsub |
| ( |
| alu_uop_valid, |
| alu_uop, |
| result_valid, |
| result |
| ); |
| // |
| // interface signals |
| // |
| // ALU RS handshake signals |
| input logic alu_uop_valid; |
| input ALU_RS_t alu_uop; |
| |
| // ALU send result signals to ROB |
| output logic result_valid; |
| output PU2ROB_t result; |
| |
| // |
| // internal signals |
| // |
| // ALU_RS_t struct signals |
| logic [`ROB_DEPTH_WIDTH-1:0] rob_entry; |
| FUNCT6_u uop_funct6; |
| logic [`FUNCT3_WIDTH-1:0] uop_funct3; |
| logic [`VSTART_WIDTH-1:0] vstart; |
| logic [`VL_WIDTH-1:0] vl; |
| logic vm; |
| RVVXRM vxrm; |
| logic [`VLEN-1:0] v0_data; |
| logic v0_data_valid; |
| logic [`VLEN-1:0] vd_data; |
| logic vd_data_valid; |
| logic [`VLEN-1:0] vs1_data; |
| logic vs1_data_valid; |
| logic [`VLEN-1:0] vs2_data; |
| logic vs2_data_valid; |
| EEW_e vs2_eew; |
| logic [`XLEN-1:0] rs1_data; |
| logic rs1_data_valid; |
| logic [`UOP_INDEX_WIDTH-1:0] uop_index; |
| |
| // execute |
| // add and sub instructions |
| logic [`VLENB-1:0] v0_data_in_use; |
| logic [`VLENB-1:0][`BYTE_WIDTH-1:0] src2_data; |
| logic [`VLENB-1:0][`BYTE_WIDTH-1:0] src1_data; |
| logic [`VLENB-1:0][`BYTE_WIDTH-1:0] product8; |
| logic [`VLEN/`HWORD_WIDTH-1:0][`HWORD_WIDTH-1:0] product16; |
| logic [`VLEN/`WORD_WIDTH-1:0][`WORD_WIDTH-1:0] product32; |
| logic [`VLENB-1:0][`BYTE_WIDTH-1:0] round8_src; |
| logic [`VLEN/`HWORD_WIDTH-1:0][`HWORD_WIDTH-1:0] round16_src; |
| logic [`VLEN/`WORD_WIDTH-1:0][`WORD_WIDTH-1:0] round32_src; |
| logic [`VLENB-1:0][`BYTE_WIDTH-1:0] round8; |
| logic [`VLEN/`HWORD_WIDTH-1:0][`HWORD_WIDTH-1:0] round16; |
| logic [`VLEN/`WORD_WIDTH-1:0][`WORD_WIDTH-1:0] round32; |
| logic [`VLENB-1:0] cin; |
| logic [`VLENB-1:0] cout8; |
| logic [`VLEN/`HWORD_WIDTH-1:0] cout16; |
| logic [`VLEN/`WORD_WIDTH-1:0] cout32; |
| logic [`VLENB-1:0] addu_upoverflow; |
| logic [`VLENB-1:0] add_upoverflow; |
| logic [`VLENB-1:0] add_underoverflow; |
| logic [`VLENB-1:0] subu_underoverflow; |
| logic [`VLENB-1:0] sub_upoverflow; |
| logic [`VLENB-1:0] sub_underoverflow; |
| logic [`VLENB-1:0][`BYTE_WIDTH-1:0] result_minmax8; |
| logic [`VLEN/`HWORD_WIDTH-1:0][`HWORD_WIDTH-1:0] result_minmax16; |
| logic [`VLEN/`WORD_WIDTH-1:0][`WORD_WIDTH-1:0] result_minmax32; |
| logic [`VLEN-1:0] result_data; // regular data for EEW_vd = 8b,16b,32b |
| ADDSUB_e opcode; |
| |
| // for-loop |
| genvar j; |
| |
| // |
| // prepare source data to calculate |
| // |
| // split ALU_RS_t struct |
| assign rob_entry = alu_uop.rob_entry; |
| assign uop_funct6 = alu_uop.uop_funct6; |
| assign uop_funct3 = alu_uop.uop_funct3; |
| assign vstart = alu_uop.vstart; |
| assign vl = alu_uop.vl; |
| assign vm = alu_uop.vm; |
| assign vxrm = alu_uop.vxrm; |
| assign v0_data = alu_uop.v0_data; |
| assign v0_data_valid = alu_uop.v0_data_valid; |
| assign vd_data = alu_uop.vd_data; |
| assign vd_data_valid = alu_uop.vd_data_valid; |
| assign vs1_data = alu_uop.vs1_data; |
| assign vs1_data_valid = alu_uop.vs1_data_valid; |
| assign vs2_data = alu_uop.vs2_data; |
| assign vs2_data_valid = alu_uop.vs2_data_valid; |
| assign vs2_eew = alu_uop.vs2_eew; |
| assign rs1_data = alu_uop.rs1_data; |
| assign rs1_data_valid = alu_uop.rs1_data_valid; |
| assign uop_index = alu_uop.uop_index; |
| |
| // |
| // prepare source data |
| // |
| // prepare valid signal |
| always_comb begin |
| // initial the data |
| result_valid = 'b0; |
| |
| case(uop_funct3) |
| OPIVV: begin |
| case(uop_funct6.ari_funct6) |
| VADD, |
| VSUB, |
| VSADD, |
| VSSUB, |
| VSADDU, |
| VSSUBU: begin |
| result_valid = alu_uop_valid&vs2_data_valid&vs1_data_valid; |
| end |
| |
| VADC, |
| VSBC: begin |
| result_valid = alu_uop_valid&vs2_data_valid&vs1_data_valid&(vm==1'b0)&v0_data_valid; |
| end |
| |
| VMINU, |
| VMIN, |
| VMAXU, |
| VMAX: begin |
| result_valid = alu_uop_valid&vs1_data_valid&vs2_data_valid; |
| end |
| endcase |
| end |
| |
| OPIVX: begin |
| case(uop_funct6.ari_funct6) |
| VADD, |
| VSUB, |
| VRSUB, |
| VSADD, |
| VSSUB, |
| VSADDU, |
| VSSUBU: begin |
| result_valid = alu_uop_valid&vs2_data_valid&rs1_data_valid; |
| end |
| |
| VADC, |
| VSBC: begin |
| result_valid = alu_uop_valid&vs2_data_valid&rs1_data_valid&(vm==1'b0)&v0_data_valid; |
| end |
| |
| VMINU, |
| VMIN, |
| VMAXU, |
| VMAX: begin |
| result_valid = alu_uop_valid&rs1_data_valid&vs2_data_valid; |
| end |
| endcase |
| end |
| OPIVI: begin |
| case(uop_funct6.ari_funct6) |
| VADD, |
| VRSUB, |
| VSADD, |
| VSADDU: begin |
| result_valid = alu_uop_valid&vs2_data_valid&rs1_data_valid; |
| end |
| |
| VADC: begin |
| result_valid = alu_uop_valid&vs2_data_valid&rs1_data_valid&(vm==1'b0)&v0_data_valid; |
| end |
| endcase |
| end |
| |
| OPMVV: begin |
| case(uop_funct6.ari_funct6) |
| VWADDU, |
| VWADD, |
| VWSUBU, |
| VWSUB: begin |
| result_valid = alu_uop_valid&vs2_data_valid&vs1_data_valid&((vs2_eew==EEW8)|(vs2_eew==EEW16)); |
| end |
| |
| VWADDU_W, |
| VWADD_W, |
| VWSUBU_W, |
| VWSUB_W: begin |
| result_valid = alu_uop_valid&vs2_data_valid&vs1_data_valid&((vs2_eew==EEW16)|(vs2_eew==EEW32)); |
| end |
| |
| VAADDU, |
| VAADD, |
| VASUBU, |
| VASUB: begin |
| result_valid = alu_uop_valid&vs2_data_valid&vs1_data_valid; |
| end |
| endcase |
| end |
| |
| OPMVX: begin |
| case(uop_funct6.ari_funct6) |
| VWADDU, |
| VWADD, |
| VWSUBU, |
| VWSUB: begin |
| result_valid = alu_uop_valid&vs2_data_valid&rs1_data_valid&((vs2_eew==EEW8)|(vs2_eew==EEW16)); |
| end |
| |
| VWADDU_W, |
| VWADD_W, |
| VWSUBU_W, |
| VWSUB_W: begin |
| result_valid = alu_uop_valid&vs2_data_valid&rs1_data_valid&((vs2_eew==EEW16)|(vs2_eew==EEW32)); |
| end |
| |
| VAADDU, |
| VAADD, |
| VASUBU, |
| VASUB: begin |
| result_valid = alu_uop_valid&vs2_data_valid&rs1_data_valid; |
| end |
| endcase |
| end |
| endcase |
| end |
| |
| // prepare source data |
| always_comb begin |
| // initial the data |
| src2_data = 'b0; |
| src1_data = 'b0; |
| |
| case(uop_funct3) |
| OPIVV: begin |
| case(uop_funct6.ari_funct6) |
| VADD, |
| VSUB, |
| VADC, |
| VSBC, |
| VSADDU, |
| VSADD, |
| VSSUBU, |
| VSSUB, |
| VMINU, |
| VMIN, |
| VMAXU, |
| VMAX: begin |
| src2_data = vs2_data; |
| src1_data = vs1_data; |
| end |
| endcase |
| end |
| |
| OPIVX: begin |
| case(uop_funct6.ari_funct6) |
| VADD, |
| VSUB, |
| VADC, |
| VSBC, |
| VSADDU, |
| VSADD, |
| VSSUBU, |
| VSSUB, |
| VMINU, |
| VMIN, |
| VMAXU, |
| VMAX: begin |
| src2_data = vs2_data; |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| case(vs2_eew) |
| EEW8: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = rs1_data[0 +: `BYTE_WIDTH]; |
| end |
| EEW16: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| end |
| EEW32: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = rs1_data[2*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = rs1_data[3*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| end |
| endcase |
| end |
| end |
| |
| VRSUB: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| case(vs2_eew) |
| EEW8: begin |
| src2_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = rs1_data[0 +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = rs1_data[0 +: `BYTE_WIDTH]; |
| end |
| EEW16: begin |
| src2_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| end |
| EEW32: begin |
| src2_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = rs1_data[2*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = rs1_data[3*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| end |
| endcase |
| end |
| |
| src1_data = vs2_data; |
| end |
| endcase |
| end |
| |
| OPIVI: begin |
| case(uop_funct6.ari_funct6) |
| VADD, |
| VADC, |
| VSADDU, |
| VSADD: begin |
| src2_data = vs2_data; |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| case(vs2_eew) |
| EEW8: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = rs1_data[0 +: `BYTE_WIDTH]; |
| end |
| EEW16: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| end |
| EEW32: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = rs1_data[2*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = rs1_data[3*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| end |
| endcase |
| end |
| end |
| |
| VRSUB: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| case(vs2_eew) |
| EEW8: begin |
| src2_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = rs1_data[0 +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = rs1_data[0 +: `BYTE_WIDTH]; |
| end |
| EEW16: begin |
| src2_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| end |
| EEW32: begin |
| src2_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = rs1_data[2*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = rs1_data[3*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| end |
| endcase |
| end |
| |
| src1_data = vs2_data; |
| end |
| endcase |
| end |
| |
| OPMVV: begin |
| case(uop_funct6.ari_funct6) |
| VWADDU, |
| VWSUBU: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| case(vs2_eew) |
| EEW8: begin |
| if(uop_index[0]==1'b0) begin |
| src2_data[4*i] = vs2_data[(2*i)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = 'b0; |
| src2_data[4*i+2] = vs2_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = 'b0; |
| |
| src1_data[4*i] = vs1_data[(2*i)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = 'b0; |
| src1_data[4*i+2] = vs1_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = 'b0; |
| end |
| else begin |
| src2_data[4*i] = vs2_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = 'b0; |
| src2_data[4*i+2] = vs2_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = 'b0; |
| |
| src1_data[4*i] = vs1_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = 'b0; |
| src1_data[4*i+2] = vs1_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = 'b0; |
| end |
| end |
| EEW16: begin |
| if(uop_index[0]==1'b0) begin |
| src2_data[4*i] = vs2_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = vs2_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = 'b0; |
| src2_data[4*i+3] = 'b0; |
| |
| src1_data[4*i] = vs1_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = vs1_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = 'b0; |
| src1_data[4*i+3] = 'b0; |
| end |
| else begin |
| src2_data[4*i] = vs2_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = vs2_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = 'b0; |
| src2_data[4*i+3] = 'b0; |
| |
| src1_data[4*i] = vs1_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = vs1_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = 'b0; |
| src1_data[4*i+3] = 'b0; |
| end |
| end |
| endcase |
| end |
| end |
| |
| VWADD, |
| VWSUB: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| case(vs2_eew) |
| EEW8: begin |
| if(uop_index[0]==1'b0) begin |
| src2_data[4*i] = vs2_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = {`BYTE_WIDTH{vs2_data[(2*i+1)*`BYTE_WIDTH-1]}}; |
| src2_data[4*i+2] = vs2_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = {`BYTE_WIDTH{vs2_data[(2*i+2)*`BYTE_WIDTH-1]}}; |
| |
| src1_data[4*i] = vs1_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = {`BYTE_WIDTH{vs1_data[(2*i+1)*`BYTE_WIDTH-1]}}; |
| src1_data[4*i+2] = vs1_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = {`BYTE_WIDTH{vs1_data[(2*i+2)*`BYTE_WIDTH-1]}}; |
| end |
| else begin |
| src2_data[4*i] = vs2_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = {`BYTE_WIDTH{vs2_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH-1]}}; |
| src2_data[4*i+2] = vs2_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = {`BYTE_WIDTH{vs2_data[`VLEN/2+(2*i+2)*`BYTE_WIDTH-1]}}; |
| |
| src1_data[4*i] = vs1_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = {`BYTE_WIDTH{vs1_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH-1]}}; |
| src1_data[4*i+2] = vs1_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = {`BYTE_WIDTH{vs1_data[`VLEN/2+(2*i+2)*`BYTE_WIDTH-1]}}; |
| end |
| end |
| EEW16: begin |
| if(uop_index[0]==1'b0) begin |
| src2_data[4*i] = vs2_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = vs2_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = {`BYTE_WIDTH{vs2_data[(2*i+2)*`BYTE_WIDTH-1]}}; |
| src2_data[4*i+3] = {`BYTE_WIDTH{vs2_data[(2*i+2)*`BYTE_WIDTH-1]}}; |
| |
| src1_data[4*i] = vs1_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = vs1_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = {`BYTE_WIDTH{vs1_data[(2*i+2)*`BYTE_WIDTH-1]}}; |
| src1_data[4*i+3] = {`BYTE_WIDTH{vs1_data[(2*i+2)*`BYTE_WIDTH-1]}}; |
| end |
| else begin |
| src2_data[4*i] = vs2_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = vs2_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = {`BYTE_WIDTH{vs2_data[`VLEN/2+(2*i+2)*`BYTE_WIDTH-1]}}; |
| src2_data[4*i+3] = {`BYTE_WIDTH{vs2_data[`VLEN/2+(2*i+2)*`BYTE_WIDTH-1]}}; |
| |
| src1_data[4*i] = vs1_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = vs1_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = {`BYTE_WIDTH{vs1_data[`VLEN/2+(2*i+2)*`BYTE_WIDTH-1]}}; |
| src1_data[4*i+3] = {`BYTE_WIDTH{vs1_data[`VLEN/2+(2*i+2)*`BYTE_WIDTH-1]}}; |
| end |
| end |
| endcase |
| end |
| end |
| |
| VWADDU_W, |
| VWSUBU_W: begin |
| src2_data = vs2_data; |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| case(vs2_eew) |
| EEW16: begin |
| if(uop_index[0]==1'b0) begin |
| src1_data[4*i] = vs1_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = 'b0; |
| src1_data[4*i+2] = vs1_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = 'b0; |
| end |
| else begin |
| src1_data[4*i] = vs1_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = 'b0; |
| src1_data[4*i+2] = vs1_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = 'b0; |
| end |
| end |
| EEW32: begin |
| if(uop_index[0]==1'b0) begin |
| src1_data[4*i] = vs1_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = vs1_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = 'b0; |
| src1_data[4*i+3] = 'b0; |
| end |
| else begin |
| src1_data[4*i] = vs1_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = vs1_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = 'b0; |
| src1_data[4*i+3] = 'b0; |
| end |
| end |
| endcase |
| end |
| end |
| |
| VWADD_W, |
| VWSUB_W: begin |
| src2_data = vs2_data; |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| case(vs2_eew) |
| EEW16: begin |
| if(uop_index[0]==1'b0) begin |
| src1_data[4*i] = vs1_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = {`BYTE_WIDTH{vs1_data[(2*i+1)*`BYTE_WIDTH-1]}}; |
| src1_data[4*i+2] = vs1_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = {`BYTE_WIDTH{vs1_data[(2*i+2)*`BYTE_WIDTH-1]}}; |
| end |
| else begin |
| src1_data[4*i] = vs1_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = {`BYTE_WIDTH{vs1_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH-1]}}; |
| src1_data[4*i+2] = vs1_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = {`BYTE_WIDTH{vs1_data[`VLEN/2+(2*i+2)*`BYTE_WIDTH-1]}}; |
| end |
| end |
| EEW32: begin |
| if(uop_index[0]==1'b0) begin |
| src1_data[4*i] = vs1_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = vs1_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = {`BYTE_WIDTH{vs1_data[(2*i+2)*`BYTE_WIDTH-1]}}; |
| src1_data[4*i+3] = {`BYTE_WIDTH{vs1_data[(2*i+2)*`BYTE_WIDTH-1]}}; |
| end |
| else begin |
| src1_data[4*i] = vs1_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = vs1_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = {`BYTE_WIDTH{vs1_data[`VLEN/2+(2*i+2)*`BYTE_WIDTH-1]}}; |
| src1_data[4*i+3] = {`BYTE_WIDTH{vs1_data[`VLEN/2+(2*i+2)*`BYTE_WIDTH-1]}}; |
| end |
| end |
| endcase |
| end |
| end |
| |
| VAADDU, |
| VASUBU, |
| VAADD, |
| VASUB: begin |
| src2_data = vs2_data; |
| src1_data = vs1_data; |
| end |
| endcase |
| end |
| |
| OPMVX: begin |
| case(uop_funct6.ari_funct6) |
| VWADDU, |
| VWSUBU: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| case(vs2_eew) |
| EEW8: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = 'b0; |
| src1_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = 'b0; |
| |
| if(uop_index[0]==1'b0) begin |
| src2_data[4*i] = vs2_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = 'b0; |
| src2_data[4*i+2] = vs2_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = 'b0; |
| end |
| else begin |
| src2_data[4*i] = vs2_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = 'b0; |
| src2_data[4*i+2] = vs2_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = 'b0; |
| end |
| end |
| EEW16: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = rs1_data[`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = 'b0; |
| src1_data[4*i+3] = 'b0; |
| |
| if(uop_index[0]==1'b0) begin |
| src2_data[4*i] = vs2_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = vs2_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = 'b0; |
| src2_data[4*i+3] = 'b0; |
| end |
| else begin |
| src2_data[4*i] = vs2_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = vs2_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = 'b0; |
| src2_data[4*i+3] = 'b0; |
| end |
| end |
| endcase |
| end |
| end |
| |
| VWADD, |
| VWSUB: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| case(vs2_eew) |
| EEW8: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = {`BYTE_WIDTH{rs1_data[`BYTE_WIDTH-1]}}; |
| src1_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = {`BYTE_WIDTH{rs1_data[`BYTE_WIDTH-1]}}; |
| |
| if(uop_index[0]==1'b0) begin |
| src2_data[4*i] = vs2_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = {`BYTE_WIDTH{vs2_data[(2*i+1)*`BYTE_WIDTH-1]}}; |
| src2_data[4*i+2] = vs2_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = {`BYTE_WIDTH{vs2_data[(2*i+2)*`BYTE_WIDTH-1]}}; |
| end |
| else begin |
| src2_data[4*i] = vs2_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = {`BYTE_WIDTH{vs2_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH-1]}}; |
| src2_data[4*i+2] = vs2_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+3] = {`BYTE_WIDTH{vs2_data[`VLEN/2+(2*i+2)*`BYTE_WIDTH-1]}}; |
| end |
| end |
| EEW16: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = rs1_data[`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = {`BYTE_WIDTH{rs1_data[2*`BYTE_WIDTH-1]}}; |
| src1_data[4*i+3] = {`BYTE_WIDTH{rs1_data[2*`BYTE_WIDTH-1]}}; |
| |
| if(uop_index[0]==1'b0) begin |
| src2_data[4*i] = vs2_data[(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = vs2_data[(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = {`BYTE_WIDTH{vs2_data[(2*i+2)*`BYTE_WIDTH-1]}}; |
| src2_data[4*i+3] = {`BYTE_WIDTH{vs2_data[(2*i+2)*`BYTE_WIDTH-1]}}; |
| end |
| else begin |
| src2_data[4*i] = vs2_data[`VLEN/2+(2*i )*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+1] = vs2_data[`VLEN/2+(2*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src2_data[4*i+2] = {`BYTE_WIDTH{vs2_data[`VLEN/2+(2*i+2)*`BYTE_WIDTH-1]}}; |
| src2_data[4*i+3] = {`BYTE_WIDTH{vs2_data[`VLEN/2+(2*i+2)*`BYTE_WIDTH-1]}}; |
| end |
| end |
| endcase |
| end |
| end |
| |
| VWADDU_W, |
| VWSUBU_W: begin |
| src2_data = vs2_data; |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| case(vs2_eew) |
| EEW16: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = 'b0; |
| src1_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = 'b0; |
| end |
| EEW32: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = rs1_data[`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = 'b0; |
| src1_data[4*i+3] = 'b0; |
| end |
| endcase |
| end |
| end |
| |
| VWADD_W, |
| VWSUB_W: begin |
| src2_data = vs2_data; |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| case(vs2_eew) |
| EEW16: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = {`BYTE_WIDTH{rs1_data[`BYTE_WIDTH-1]}}; |
| src1_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = {`BYTE_WIDTH{rs1_data[`BYTE_WIDTH-1]}}; |
| end |
| EEW32: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = rs1_data[`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = {`BYTE_WIDTH{rs1_data[2*`BYTE_WIDTH-1]}}; |
| src1_data[4*i+3] = {`BYTE_WIDTH{rs1_data[2*`BYTE_WIDTH-1]}}; |
| end |
| endcase |
| end |
| end |
| |
| VAADDU, |
| VASUBU, |
| VAADD, |
| VASUB: begin |
| src2_data = vs2_data; |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| case(vs2_eew) |
| EEW8: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = rs1_data[0 +: `BYTE_WIDTH]; |
| end |
| EEW16: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| end |
| EEW32: begin |
| src1_data[4*i] = rs1_data[0 +: `BYTE_WIDTH]; |
| src1_data[4*i+1] = rs1_data[1*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+2] = rs1_data[2*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| src1_data[4*i+3] = rs1_data[3*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| end |
| endcase |
| end |
| end |
| endcase |
| end |
| endcase |
| end |
| |
| // prepare cin |
| always_comb begin |
| v0_data_in_use = 'b0; |
| |
| case(vs2_eew) |
| EEW8: begin |
| v0_data_in_use = v0_data[{uop_index,{($clog2(`VLENB)){1'b0}}} +: `VLENB]; |
| end |
| EEW16: begin |
| v0_data_in_use = {{(`VLENB/2){1'b0}}, v0_data[{uop_index,{($clog2(`VLENB/2)){1'b0}}} +: `VLENB/2]}; |
| end |
| EEW32: begin |
| v0_data_in_use = {{(`VLENB*3/4){1'b0}}, v0_data[{uop_index,{($clog2(`VLENB/4)){1'b0}}} +: `VLENB/4]}; |
| end |
| endcase |
| end |
| |
| generate |
| for (j=0;j<`VLEN/`WORD_WIDTH;j=j+1) begin: GET_CIN |
| always_comb begin |
| // initial the data |
| cin[4*j] = 'b0; |
| cin[4*j+1] = 'b0; |
| cin[4*j+2] = 'b0; |
| cin[4*j+3] = 'b0; |
| |
| case(uop_funct3) |
| OPIVV, |
| OPIVX, |
| OPIVI: begin |
| case(uop_funct6.ari_funct6) |
| VADC, |
| VSBC: begin |
| case(vs2_eew) |
| EEW8: begin |
| cin[4*j] = v0_data_in_use[4*j]; |
| cin[4*j+1] = v0_data_in_use[4*j+1]; |
| cin[4*j+2] = v0_data_in_use[4*j+2]; |
| cin[4*j+3] = v0_data_in_use[4*j+3]; |
| end |
| EEW16: begin |
| cin[4*j] = v0_data_in_use[2*j]; |
| cin[4*j+1] = 'b0; |
| cin[4*j+2] = v0_data_in_use[2*j+1]; |
| cin[4*j+3] = 'b0; |
| end |
| EEW32: begin |
| cin[4*j] = v0_data_in_use[j]; |
| cin[4*j+1] = 'b0; |
| cin[4*j+2] = 'b0; |
| cin[4*j+3] = 'b0; |
| end |
| endcase |
| end |
| endcase |
| end |
| endcase |
| end |
| end |
| endgenerate |
| |
| // get opcode for f_addsub |
| always_comb begin |
| // initial the data |
| opcode = ADDSUB_VADD; |
| |
| // prepare source data |
| case(uop_funct3) |
| OPIVV, |
| OPIVX, |
| OPIVI: begin |
| case(uop_funct6.ari_funct6) |
| VADD, |
| VADC, |
| VSADDU, |
| VSADD: begin |
| opcode = ADDSUB_VADD; |
| end |
| VSUB, |
| VRSUB, |
| VSBC, |
| VSSUBU, |
| VSSUB, |
| VMINU, |
| VMIN, |
| VMAXU, |
| VMAX: begin |
| opcode = ADDSUB_VSUB; |
| end |
| endcase |
| end |
| OPMVV, |
| OPMVX: begin |
| case(uop_funct6.ari_funct6) |
| VWADDU, |
| VWADD, |
| VWADDU_W, |
| VWADD_W, |
| VAADDU, |
| VAADD: begin |
| opcode = ADDSUB_VADD; |
| end |
| VWSUBU, |
| VWSUB, |
| VWSUBU_W, |
| VWSUB_W, |
| VASUBU, |
| VASUB: begin |
| opcode = ADDSUB_VSUB; |
| end |
| endcase |
| end |
| endcase |
| end |
| |
| // |
| // calculate the result |
| // |
| // for add and sub instructions |
| generate |
| for (j=0;j<`VLENB;j=j+1) begin: EXE_VADDSUB_PROD8 |
| assign {cout8[j],product8[j]} = f_full_addsub8(opcode, src2_data[j], src1_data[j], cin[j]); |
| end |
| endgenerate |
| |
| generate |
| for (j=0;j<`VLEN/`HWORD_WIDTH;j=j+1) begin: EXE_VADDSUB_PROD16 |
| assign {cout16[j],product16[j]} = {f_half_addsub8(opcode, {cout8[2*j+1],product8[2*j+1]}, cout8[2*j]), product8[2*j]}; |
| end |
| endgenerate |
| |
| generate |
| for (j=0;j<`VLEN/`WORD_WIDTH;j=j+1) begin: EXE_VADDSUB_PROD32 |
| assign {cout32[j],product32[j]} = {f_half_addsub16(opcode, {cout16[2*j+1],product16[2*j+1]}, cout16[2*j]), product16[2*j]}; |
| end |
| endgenerate |
| |
| // rounding result |
| always_comb begin |
| round8_src = 'b0; |
| round16_src = 'b0; |
| round32_src = 'b0; |
| round8 = 'b0; |
| round16 = 'b0; |
| round32 = 'b0; |
| |
| case(uop_funct6.ari_funct6) |
| VAADDU, |
| VASUBU: begin |
| case(vxrm) |
| RNU: begin |
| for(int i=0;i<`VLENB;i=i+1) begin |
| round8_src[i] = {cout8[i],product8[i][`BYTE_WIDTH-1:1]}; |
| round8[i] = product8[i][0] ? round8_src[i]+1'b1 : round8_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| round16_src[i] = {cout16[i],product16[i][`HWORD_WIDTH-1:1]}; |
| round16[i] = product16[i][0] ? round16_src[i]+1'b1 : round16_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| round32_src[i] = {cout32[i],product32[i][`WORD_WIDTH-1:1]}; |
| round32[i] = product32[i][0] ? f_src_plus1(round32_src[i]) : round32_src[i]; |
| end |
| end |
| RNE: begin |
| for(int i=0;i<`VLENB;i=i+1) begin |
| round8_src[i] = {cout8[i],product8[i][`BYTE_WIDTH-1:1]}; |
| round8[i] = product8[i][0]&product8[i][1] ? round8_src[i]+1'b1 : round8_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| round16_src[i] = {cout16[i],product16[i][`HWORD_WIDTH-1:1]}; |
| round16[i] = product16[i][0]&product16[i][1] ? round16_src[i]+1'b1 : round16_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| round32_src[i] = {cout32[i],product32[i][`WORD_WIDTH-1:1]}; |
| round32[i] = product32[i][0]&product32[i][1] ? f_src_plus1(round32_src[i]) : round32_src[i]; |
| end |
| end |
| RDN: begin |
| for(int i=0;i<`VLENB;i=i+1) begin |
| round8_src[i] = {cout8[i],product8[i][`BYTE_WIDTH-1:1]}; |
| round8[i] = round8_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| round16_src[i] = {cout16[i],product16[i][`HWORD_WIDTH-1:1]}; |
| round16[i] = round16_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| round32_src[i] = {cout32[i],product32[i][`WORD_WIDTH-1:1]}; |
| round32[i] = round32_src[i]; |
| end |
| end |
| ROD: begin |
| for(int i=0;i<`VLENB;i=i+1) begin |
| round8_src[i] = {cout8[i],product8[i][`BYTE_WIDTH-1:1]}; |
| round8[i] = (!product8[i][1])&product8[i][0] ? round8_src[i]+1'b1 : round8_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| round16_src[i] = {cout16[i],product16[i][`HWORD_WIDTH-1:1]}; |
| round16[i] = (!product16[i][1])&product16[i][0] ? round16_src[i]+1'b1 : round16_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| round32_src[i] = {cout32[i],product32[i][`WORD_WIDTH-1:1]}; |
| round32[i] = (!product32[i][1])&product32[i][0] ? f_src_plus1(round32_src[i]) : round32_src[i]; |
| end |
| end |
| endcase |
| end |
| VAADD, |
| VASUB: begin |
| case(vxrm) |
| RNU: begin |
| for(int i=0;i<`VLENB;i=i+1) begin |
| round8_src[i] = {src2_data[i][`BYTE_WIDTH-1]^src1_data[i][`BYTE_WIDTH-1]?(!cout8[i]):cout8[i],product8[i][`BYTE_WIDTH-1:1]}; |
| round8[i] = product8[i][0] ? round8_src[i]+1'b1 : round8_src[i]; |
| |
| end |
| |
| for(int i=0;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| round16_src[i] = {src2_data[2*i+1][`BYTE_WIDTH-1]^src1_data[2*i+1][`BYTE_WIDTH-1]?(!cout16[i]):cout16[i],product16[i][`HWORD_WIDTH-1:1]}; |
| round16[i] = product16[i][0] ? round16_src[i]+1'b1 : round16_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| round32_src[i] = {src2_data[4*i+3][`BYTE_WIDTH-1]^src1_data[4*i+3][`BYTE_WIDTH-1]?(!cout32[i]):cout32[i],product32[i][`WORD_WIDTH-1:1]}; |
| round32[i] = product32[i][0] ? f_src_plus1(round32_src[i]) : round32_src[i]; |
| end |
| end |
| RNE: begin |
| for(int i=0;i<`VLENB;i=i+1) begin |
| round8_src[i] = {src2_data[i][`BYTE_WIDTH-1]^src1_data[i][`BYTE_WIDTH-1]?(!cout8[i]):cout8[i],product8[i][`BYTE_WIDTH-1:1]}; |
| round8[i] = product8[i][0]&product8[i][1] ? round8_src[i]+1'b1 : round8_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| round16_src[i] = {src2_data[2*i+1][`BYTE_WIDTH-1]^src1_data[2*i+1][`BYTE_WIDTH-1]?(!cout16[i]):cout16[i],product16[i][`HWORD_WIDTH-1:1]}; |
| round16[i] = product16[i][0]&product16[i][1] ? round16_src[i]+1'b1 : round16_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| round32_src[i] = {src2_data[4*i+3][`BYTE_WIDTH-1]^src1_data[4*i+3][`BYTE_WIDTH-1]?(!cout32[i]):cout32[i],product32[i][`WORD_WIDTH-1:1]}; |
| round32[i] = product32[i][0]&product32[i][1] ? f_src_plus1(round32_src[i]) : round32_src[i]; |
| end |
| end |
| RDN: begin |
| for(int i=0;i<`VLENB;i=i+1) begin |
| round8_src[i] = {src2_data[i][`BYTE_WIDTH-1]^src1_data[i][`BYTE_WIDTH-1]?(!cout8[i]):cout8[i],product8[i][`BYTE_WIDTH-1:1]}; |
| round8[i] = round8_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| round16_src[i] = {src2_data[2*i+1][`BYTE_WIDTH-1]^src1_data[2*i+1][`BYTE_WIDTH-1]?(!cout16[i]):cout16[i],product16[i][`HWORD_WIDTH-1:1]}; |
| round16[i] = round16_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| round32_src[i] = {src2_data[4*i+3][`BYTE_WIDTH-1]^src1_data[4*i+3][`BYTE_WIDTH-1]?(!cout32[i]):cout32[i],product32[i][`WORD_WIDTH-1:1]}; |
| round32[i] = round32_src[i]; |
| end |
| end |
| ROD: begin |
| for(int i=0;i<`VLENB;i=i+1) begin |
| round8_src[i] = {src2_data[i][`BYTE_WIDTH-1]^src1_data[i][`BYTE_WIDTH-1]?(!cout8[i]):cout8[i],product8[i][`BYTE_WIDTH-1:1]}; |
| round8[i] = (!product8[i][1])&product8[i][0] ? round8_src[i]+1'b1 : round8_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| round16_src[i] = {src2_data[2*i+1][`BYTE_WIDTH-1]^src1_data[2*i+1][`BYTE_WIDTH-1]?(!cout16[i]):cout16[i],product16[i][`HWORD_WIDTH-1:1]}; |
| round16[i] = (!product16[i][1])&product16[i][0] ? round16_src[i]+1'b1 : round16_src[i]; |
| end |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| round32_src[i] = {src2_data[4*i+3][`BYTE_WIDTH-1]^src1_data[4*i+3][`BYTE_WIDTH-1]?(!cout32[i]):cout32[i],product32[i][`WORD_WIDTH-1:1]}; |
| round32[i] = (!product32[i][1])&product32[i][0] ? f_src_plus1(round32_src[i]) : round32_src[i]; |
| end |
| end |
| endcase |
| end |
| endcase |
| end |
| |
| // overflow check |
| generate |
| for (j=0;j<`VLEN/`WORD_WIDTH;j++) begin: OVERFLOW |
| always_comb begin |
| // initial |
| addu_upoverflow[ 4*j +: 4] = 'b0; |
| add_upoverflow[ 4*j +: 4] = 'b0; |
| add_underoverflow[ 4*j +: 4] = 'b0; |
| subu_underoverflow[4*j +: 4] = 'b0; |
| sub_upoverflow[ 4*j +: 4] = 'b0; |
| sub_underoverflow[ 4*j +: 4] = 'b0; |
| |
| case(vs2_eew) |
| EEW8: begin |
| addu_upoverflow[4*j +: 4] = {cout8[4*j+3],cout8[4*j+2],cout8[4*j+1],cout8[4*j]}; |
| |
| add_upoverflow[4*j +: 4] = { |
| ((product8[4*j+3][`BYTE_WIDTH-1]==1'b1)&(src2_data[4*j+3][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j+3][`BYTE_WIDTH-1]==1'b0)), |
| ((product8[4*j+2][`BYTE_WIDTH-1]==1'b1)&(src2_data[4*j+2][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j+2][`BYTE_WIDTH-1]==1'b0)), |
| ((product8[4*j+1][`BYTE_WIDTH-1]==1'b1)&(src2_data[4*j+1][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j+1][`BYTE_WIDTH-1]==1'b0)), |
| ((product8[4*j ][`BYTE_WIDTH-1]==1'b1)&(src2_data[4*j ][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j ][`BYTE_WIDTH-1]==1'b0))}; |
| |
| add_underoverflow[4*j +: 4] = { |
| ((product8[4*j+3][`BYTE_WIDTH-1]==1'b0)&(src2_data[4*j+3][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j+3][`BYTE_WIDTH-1]==1'b1)), |
| ((product8[4*j+2][`BYTE_WIDTH-1]==1'b0)&(src2_data[4*j+2][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j+2][`BYTE_WIDTH-1]==1'b1)), |
| ((product8[4*j+1][`BYTE_WIDTH-1]==1'b0)&(src2_data[4*j+1][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j+1][`BYTE_WIDTH-1]==1'b1)), |
| ((product8[4*j ][`BYTE_WIDTH-1]==1'b0)&(src2_data[4*j ][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j ][`BYTE_WIDTH-1]==1'b1))}; |
| |
| subu_underoverflow[4*j +: 4] = {cout8[4*j+3],cout8[4*j+2],cout8[4*j+1],cout8[4*j]}; |
| |
| sub_upoverflow[4*j +: 4] = { |
| ((product8[4*j+3][`BYTE_WIDTH-1]==1'b1)&(src2_data[4*j+3][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j+3][`BYTE_WIDTH-1]==1'b1)), |
| ((product8[4*j+2][`BYTE_WIDTH-1]==1'b1)&(src2_data[4*j+2][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j+2][`BYTE_WIDTH-1]==1'b1)), |
| ((product8[4*j+1][`BYTE_WIDTH-1]==1'b1)&(src2_data[4*j+1][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j+1][`BYTE_WIDTH-1]==1'b1)), |
| ((product8[4*j ][`BYTE_WIDTH-1]==1'b1)&(src2_data[4*j ][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j ][`BYTE_WIDTH-1]==1'b1))}; |
| |
| sub_underoverflow[4*j +: 4] = { |
| ((product8[4*j+3][`BYTE_WIDTH-1]==1'b0)&(src2_data[4*j+3][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j+3][`BYTE_WIDTH-1]==1'b0)), |
| ((product8[4*j+2][`BYTE_WIDTH-1]==1'b0)&(src2_data[4*j+2][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j+2][`BYTE_WIDTH-1]==1'b0)), |
| ((product8[4*j+1][`BYTE_WIDTH-1]==1'b0)&(src2_data[4*j+1][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j+1][`BYTE_WIDTH-1]==1'b0)), |
| ((product8[4*j ][`BYTE_WIDTH-1]==1'b0)&(src2_data[4*j ][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j ][`BYTE_WIDTH-1]==1'b0))}; |
| end |
| EEW16: begin |
| addu_upoverflow[4*j +: 4] = {cout16[2*j+1],1'b0,cout16[2*j],1'b0}; |
| |
| add_upoverflow[4*j +: 4] = { |
| ((product16[2*j+1][`HWORD_WIDTH-1]==1'b1)&(src2_data[4*j+3][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j+3][`BYTE_WIDTH-1]==1'b0)), |
| 1'b0, |
| ((product16[2*j ][`HWORD_WIDTH-1]==1'b1)&(src2_data[4*j+1][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j+1][`BYTE_WIDTH-1]==1'b0)), |
| 1'b0}; |
| |
| add_underoverflow[4*j +: 4] = { |
| ((product16[2*j+1][`HWORD_WIDTH-1]==1'b0)&(src2_data[4*j+3][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j+3][`BYTE_WIDTH-1]==1'b1)), |
| 1'b0, |
| ((product16[2*j ][`HWORD_WIDTH-1]==1'b0)&(src2_data[4*j+1][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j+1][`BYTE_WIDTH-1]==1'b1)), |
| 1'b0}; |
| |
| subu_underoverflow[4*j +: 4] = {cout16[2*j+1],1'b0,cout16[2*j],1'b0}; |
| |
| sub_upoverflow[4*j +: 4] = { |
| ((product16[2*j+1][`HWORD_WIDTH-1]==1'b1)&(src2_data[4*j+3][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j+3][`BYTE_WIDTH-1]==1'b1)), |
| 1'b0, |
| ((product16[2*j ][`HWORD_WIDTH-1]==1'b1)&(src2_data[4*j+1][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j+1][`BYTE_WIDTH-1]==1'b1)), |
| 1'b0}; |
| |
| sub_underoverflow[4*j +: 4] = { |
| ((product16[2*j+1][`HWORD_WIDTH-1]==1'b0)&(src2_data[4*j+3][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j+3][`BYTE_WIDTH-1]==1'b0)), |
| 1'b0, |
| ((product16[2*j ][`HWORD_WIDTH-1]==1'b0)&(src2_data[4*j+1][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j+1][`BYTE_WIDTH-1]==1'b0)), |
| 1'b0}; |
| end |
| EEW32: begin |
| addu_upoverflow[4*j +: 4] = {cout32[j],3'b0}; |
| |
| add_upoverflow[4*j +: 4] = { |
| ((product32[j][`WORD_WIDTH-1]==1'b1)&(src2_data[4*j+3][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j+3][`BYTE_WIDTH-1]==1'b0)), |
| 3'b0}; |
| |
| add_underoverflow[4*j +: 4] = { |
| ((product32[j][`WORD_WIDTH-1]==1'b0)&(src2_data[4*j+3][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j+3][`BYTE_WIDTH-1]==1'b1)), |
| 3'b0}; |
| |
| subu_underoverflow[4*j +: 4] = {cout32[j],3'b0}; |
| |
| sub_upoverflow[4*j +: 4] = { |
| ((product32[j][`WORD_WIDTH-1]==1'b1)&(src2_data[4*j+3][`BYTE_WIDTH-1]==1'b0)&(src1_data[4*j+3][`BYTE_WIDTH-1]==1'b1)), |
| 3'b0}; |
| |
| sub_underoverflow[4*j +: 4] = { |
| ((product32[j][`WORD_WIDTH-1]==1'b0)&(src2_data[4*j+3][`BYTE_WIDTH-1]==1'b1)&(src1_data[4*j+3][`BYTE_WIDTH-1]==1'b0)), |
| 3'b0}; |
| end |
| endcase |
| end |
| end |
| endgenerate |
| |
| // assign to result_data |
| generate |
| for (j=0;j<`VLEN/`WORD_WIDTH;j++) begin: GET_RESULT_DATA |
| always_comb begin |
| // initial the data |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = 'b0; |
| result_minmax8[4*j+3] = 'b0; |
| result_minmax8[4*j+2] = 'b0; |
| result_minmax8[4*j+1] = 'b0; |
| result_minmax8[4*j] = 'b0; |
| result_minmax16[2*j+1] = 'b0; |
| result_minmax16[2*j] = 'b0; |
| result_minmax32[j] = 'b0; |
| |
| // calculate result data |
| case(uop_funct3) |
| OPIVV, |
| OPIVX, |
| OPIVI: begin |
| case(uop_funct6.ari_funct6) |
| VADD, |
| VSUB, |
| VRSUB, |
| VADC, |
| VSBC: begin |
| case(vs2_eew) |
| EEW8: begin |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {product8[4*j+3],product8[4*j+2],product8[4*j+1],product8[4*j]}; |
| end |
| EEW16: begin |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {product16[2*j+1],product16[2*j]}; |
| end |
| EEW32: begin |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = product32[j]; |
| end |
| endcase |
| end |
| |
| VSADDU: begin |
| case(vs2_eew) |
| EEW8: begin |
| if(addu_upoverflow[4*j]) |
| result_data[j*`WORD_WIDTH +: `BYTE_WIDTH] = 'hff; |
| else |
| result_data[j*`WORD_WIDTH +: `BYTE_WIDTH] = product8[4*j]; |
| |
| if(addu_upoverflow[4*j+1]) |
| result_data[j*`WORD_WIDTH+1*`BYTE_WIDTH +: `BYTE_WIDTH] = 'hff; |
| else |
| result_data[j*`WORD_WIDTH+1*`BYTE_WIDTH +: `BYTE_WIDTH] = product8[4*j+1]; |
| |
| if(addu_upoverflow[4*j+2]) |
| result_data[j*`WORD_WIDTH+2*`BYTE_WIDTH +: `BYTE_WIDTH] = 'hff; |
| else |
| result_data[j*`WORD_WIDTH+2*`BYTE_WIDTH +: `BYTE_WIDTH] = product8[4*j+2]; |
| |
| if(addu_upoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH+3*`BYTE_WIDTH +: `BYTE_WIDTH] = 'hff; |
| else |
| result_data[j*`WORD_WIDTH+3*`BYTE_WIDTH +: `BYTE_WIDTH] = product8[4*j+3]; |
| end |
| EEW16: begin |
| if(addu_upoverflow[4*j+1]) |
| result_data[j*`WORD_WIDTH +: `HWORD_WIDTH] = 'hffff; |
| else |
| result_data[j*`WORD_WIDTH +: `HWORD_WIDTH] = product16[2*j]; |
| |
| if(addu_upoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH+1*`HWORD_WIDTH +: `HWORD_WIDTH] = 'hffff; |
| else |
| result_data[j*`WORD_WIDTH+1*`HWORD_WIDTH +: `HWORD_WIDTH] = product16[2*j+1]; |
| end |
| EEW32: begin |
| if(addu_upoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = 'hffff_ffff; |
| else |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = product32[j]; |
| end |
| endcase |
| end |
| |
| VSADD: begin |
| case(vs2_eew) |
| EEW8: begin |
| if (add_upoverflow[4*j]) |
| result_data[j*`WORD_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (add_underoverflow[4*j]) |
| result_data[j*`WORD_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[j*`WORD_WIDTH +: `BYTE_WIDTH] = product8[4*j]; |
| |
| if (add_upoverflow[4*j+1]) |
| result_data[j*`WORD_WIDTH+1*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (add_underoverflow[4*j+1]) |
| result_data[j*`WORD_WIDTH+1*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[j*`WORD_WIDTH+1*`BYTE_WIDTH +: `BYTE_WIDTH] = product8[4*j+1]; |
| |
| if (add_upoverflow[4*j+2]) |
| result_data[j*`WORD_WIDTH+2*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (add_underoverflow[4*j+2]) |
| result_data[j*`WORD_WIDTH+2*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[j*`WORD_WIDTH+2*`BYTE_WIDTH +: `BYTE_WIDTH] = product8[4*j+2]; |
| |
| if (add_upoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH+3*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (add_underoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH+3*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[j*`WORD_WIDTH+3*`BYTE_WIDTH +: `BYTE_WIDTH] = product8[4*j+3]; |
| end |
| EEW16: begin |
| if (add_upoverflow[4*j+1]) |
| result_data[j*`WORD_WIDTH +: `HWORD_WIDTH] = 'h7fff; |
| else if (add_underoverflow[4*j+1]) |
| result_data[j*`WORD_WIDTH +: `HWORD_WIDTH] = 'h8000; |
| else |
| result_data[j*`WORD_WIDTH +: `HWORD_WIDTH] = product16[2*j]; |
| |
| if (add_upoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH+1*`HWORD_WIDTH +: `HWORD_WIDTH] = 'h7fff; |
| else if (add_underoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH+1*`HWORD_WIDTH +: `HWORD_WIDTH] = 'h8000; |
| else |
| result_data[j*`WORD_WIDTH+1*`HWORD_WIDTH +: `HWORD_WIDTH] = product16[2*j+1]; |
| end |
| EEW32: begin |
| if (add_upoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = 'h7fff_ffff; |
| else if (add_underoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = 'h8000_0000; |
| else |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = product32[j]; |
| end |
| endcase |
| end |
| |
| VSSUBU: begin |
| case(vs2_eew) |
| EEW8: begin |
| if(subu_underoverflow[4*j]) |
| result_data[j*`WORD_WIDTH +: `BYTE_WIDTH] = 'd0; |
| else |
| result_data[j*`WORD_WIDTH +: `BYTE_WIDTH] = product8[4*j]; |
| |
| if(subu_underoverflow[4*j+1]) |
| result_data[j*`WORD_WIDTH+1*`BYTE_WIDTH +: `BYTE_WIDTH] = 'd0; |
| else |
| result_data[j*`WORD_WIDTH+1*`BYTE_WIDTH +: `BYTE_WIDTH] = product8[4*j+1]; |
| |
| if(subu_underoverflow[4*j+2]) |
| result_data[j*`WORD_WIDTH+2*`BYTE_WIDTH +: `BYTE_WIDTH] = 'd0; |
| else |
| result_data[j*`WORD_WIDTH+2*`BYTE_WIDTH +: `BYTE_WIDTH] = product8[4*j+2]; |
| |
| if(subu_underoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH+3*`BYTE_WIDTH +: `BYTE_WIDTH] = 'd0; |
| else |
| result_data[j*`WORD_WIDTH+3*`BYTE_WIDTH +: `BYTE_WIDTH] = product8[4*j+3]; |
| end |
| EEW16: begin |
| if(subu_underoverflow[4*j+1]) |
| result_data[j*`WORD_WIDTH +: `HWORD_WIDTH] = 'd0; |
| else |
| result_data[j*`WORD_WIDTH +: `HWORD_WIDTH] = product16[2*j]; |
| |
| if(subu_underoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH+1*`HWORD_WIDTH +: `HWORD_WIDTH] = 'd0; |
| else |
| result_data[j*`WORD_WIDTH+1*`HWORD_WIDTH +: `HWORD_WIDTH] = product16[2*j+1]; |
| end |
| EEW32: begin |
| if(subu_underoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = 'd0; |
| else |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = product32[j]; |
| end |
| endcase |
| end |
| |
| VSSUB: begin |
| case(vs2_eew) |
| EEW8: begin |
| if (sub_upoverflow[4*j]) |
| result_data[j*`WORD_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (sub_underoverflow[4*j]) |
| result_data[j*`WORD_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[j*`WORD_WIDTH +: `BYTE_WIDTH] = product8[4*j]; |
| |
| if (sub_upoverflow[4*j+1]) |
| result_data[j*`WORD_WIDTH+1*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (sub_underoverflow[4*j+1]) |
| result_data[j*`WORD_WIDTH+1*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[j*`WORD_WIDTH+1*`BYTE_WIDTH +: `BYTE_WIDTH] = product8[4*j+1]; |
| |
| if (sub_upoverflow[4*j+2]) |
| result_data[j*`WORD_WIDTH+2*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (sub_underoverflow[4*j+2]) |
| result_data[j*`WORD_WIDTH+2*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[j*`WORD_WIDTH+2*`BYTE_WIDTH +: `BYTE_WIDTH] = product8[4*j+2]; |
| |
| if (sub_upoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH+3*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (sub_underoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH+3*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[j*`WORD_WIDTH+3*`BYTE_WIDTH +: `BYTE_WIDTH] = product8[4*j+3]; |
| end |
| EEW16: begin |
| if (sub_upoverflow[4*j+1]) |
| result_data[j*`WORD_WIDTH +: `HWORD_WIDTH] = 'h7fff; |
| else if (sub_underoverflow[4*j+1]) |
| result_data[j*`WORD_WIDTH +: `HWORD_WIDTH] = 'h8000; |
| else |
| result_data[j*`WORD_WIDTH +: `HWORD_WIDTH] = product16[2*j]; |
| |
| if (sub_upoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH+1*`HWORD_WIDTH +: `HWORD_WIDTH] = 'h7fff; |
| else if (sub_underoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH+1*`HWORD_WIDTH +: `HWORD_WIDTH] = 'h8000; |
| else |
| result_data[j*`WORD_WIDTH+1*`HWORD_WIDTH +: `HWORD_WIDTH] = product16[2*j+1]; |
| end |
| EEW32: begin |
| if (sub_upoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = 'h7fff_ffff; |
| else if (sub_underoverflow[4*j+3]) |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = 'h8000_0000; |
| else |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = product32[j]; |
| end |
| endcase |
| end |
| |
| VMINU: begin |
| case(vs2_eew) |
| EEW8: begin |
| result_minmax8[4*j+3] = cout8[4*j+3] ? src2_data[4*j+3] : src1_data[4*j+3]; |
| result_minmax8[4*j+2] = cout8[4*j+2] ? src2_data[4*j+2] : src1_data[4*j+2]; |
| result_minmax8[4*j+1] = cout8[4*j+1] ? src2_data[4*j+1] : src1_data[4*j+1]; |
| result_minmax8[4*j ] = cout8[4*j ] ? src2_data[4*j ] : src1_data[4*j ]; |
| |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {result_minmax8[4*j+3], |
| result_minmax8[4*j+2], |
| result_minmax8[4*j+1], |
| result_minmax8[4*j]}; |
| end |
| EEW16: begin |
| result_minmax16[2*j+1] = cout16[2*j+1] ? {src2_data[4*j+3],src2_data[4*j+2]} : {src1_data[4*j+3],src1_data[4*j+2]}; |
| result_minmax16[2*j ] = cout16[2*j ] ? {src2_data[4*j+1],src2_data[4*j ]} : {src1_data[4*j+1],src1_data[4*j ]}; |
| |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {result_minmax16[2*j+1], |
| result_minmax16[2*j]}; |
| end |
| EEW32: begin |
| result_minmax32[j] = cout32[j] ? {src2_data[4*j+3],src2_data[4*j+2],src2_data[4*j+1],src2_data[4*j]}: |
| {src1_data[4*j+3],src1_data[4*j+2],src1_data[4*j+1],src1_data[4*j]}; |
| |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = result_minmax32[j]; |
| end |
| endcase |
| end |
| |
| VMIN: begin |
| case(vs2_eew) |
| EEW8: begin |
| case({src2_data[4*j][`BYTE_WIDTH-1],src1_data[4*j][`BYTE_WIDTH-1]}) |
| 2'b10 : result_minmax8[4*j] = src2_data[4*j]; |
| 2'b01 : result_minmax8[4*j] = src1_data[4*j]; |
| default: result_minmax8[4*j] = product8[4*j][`BYTE_WIDTH-1] ? src2_data[4*j] : src1_data[4*j]; |
| endcase |
| |
| case({src2_data[4*j+1][`BYTE_WIDTH-1],src1_data[4*j+1][`BYTE_WIDTH-1]}) |
| 2'b10 : result_minmax8[4*j+1] = src2_data[4*j+1]; |
| 2'b01 : result_minmax8[4*j+1] = src1_data[4*j+1]; |
| default: result_minmax8[4*j+1] = product8[4*j+1][`BYTE_WIDTH-1] ? src2_data[4*j+1] : src1_data[4*j+1]; |
| endcase |
| |
| case({src2_data[4*j+2][`BYTE_WIDTH-1],src1_data[4*j+2][`BYTE_WIDTH-1]}) |
| 2'b10 : result_minmax8[4*j+2] = src2_data[4*j+2]; |
| 2'b01 : result_minmax8[4*j+2] = src1_data[4*j+2]; |
| default: result_minmax8[4*j+2] = product8[4*j+2][`BYTE_WIDTH-1] ? src2_data[4*j+2] : src1_data[4*j+2]; |
| endcase |
| |
| case({src2_data[4*j+3][`BYTE_WIDTH-1],src1_data[4*j+3][`BYTE_WIDTH-1]}) |
| 2'b10 : result_minmax8[4*j+3] = src2_data[4*j+3]; |
| 2'b01 : result_minmax8[4*j+3] = src1_data[4*j+3]; |
| default: result_minmax8[4*j+3] = product8[4*j+3][`BYTE_WIDTH-1] ? src2_data[4*j+3] : src1_data[4*j+3]; |
| endcase |
| |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {result_minmax8[4*j+3], |
| result_minmax8[4*j+2], |
| result_minmax8[4*j+1], |
| result_minmax8[4*j]}; |
| end |
| EEW16: begin |
| case({src2_data[4*j+1][`BYTE_WIDTH-1],src1_data[4*j+1][`BYTE_WIDTH-1]}) |
| 2'b10 : result_minmax16[2*j] = {src2_data[4*j+1],src2_data[4*j]}; |
| 2'b01 : result_minmax16[2*j] = {src1_data[4*j+1],src1_data[4*j]}; |
| default: result_minmax16[2*j] = product16[2*j][`HWORD_WIDTH-1] ? {src2_data[4*j+1],src2_data[4*j]} : {src1_data[4*j+1],src1_data[4*j]}; |
| endcase |
| |
| case({src2_data[4*j+3][`BYTE_WIDTH-1],src1_data[4*j+3][`BYTE_WIDTH-1]}) |
| 2'b10 : result_minmax16[2*j+1] = {src2_data[4*j+3],src2_data[4*j+2]}; |
| 2'b01 : result_minmax16[2*j+1] = {src1_data[4*j+3],src1_data[4*j+2]}; |
| default: result_minmax16[2*j+1] = product16[2*j+1][`HWORD_WIDTH-1] ? {src2_data[4*j+3],src2_data[4*j+2]} : {src1_data[4*j+3],src1_data[4*j+2]}; |
| endcase |
| |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {result_minmax16[2*j+1], |
| result_minmax16[2*j]}; |
| end |
| EEW32: begin |
| case({src2_data[4*j+3][`BYTE_WIDTH-1],src1_data[4*j+3][`BYTE_WIDTH-1]}) |
| 2'b10 : result_minmax32[j] = {src2_data[4*j+3],src2_data[4*j+2],src2_data[4*j+1],src2_data[4*j]}; |
| 2'b01 : result_minmax32[j] = {src1_data[4*j+3],src1_data[4*j+2],src1_data[4*j+1],src1_data[4*j]}; |
| default: result_minmax32[j] = product32[j][`WORD_WIDTH-1] ? |
| {src2_data[4*j+3],src2_data[4*j+2],src2_data[4*j+1],src2_data[4*j]}: |
| {src1_data[4*j+3],src1_data[4*j+2],src1_data[4*j+1],src1_data[4*j]}; |
| endcase |
| |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = result_minmax32[j]; |
| end |
| endcase |
| end |
| |
| VMAXU: begin |
| case(vs2_eew) |
| EEW8: begin |
| result_minmax8[4*j+3] = cout8[4*j+3] ? src1_data[4*j+3] : src2_data[4*j+3]; |
| result_minmax8[4*j+2] = cout8[4*j+2] ? src1_data[4*j+2] : src2_data[4*j+2]; |
| result_minmax8[4*j+1] = cout8[4*j+1] ? src1_data[4*j+1] : src2_data[4*j+1]; |
| result_minmax8[4*j ] = cout8[4*j ] ? src1_data[4*j ] : src2_data[4*j ]; |
| |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {result_minmax8[4*j+3], |
| result_minmax8[4*j+2], |
| result_minmax8[4*j+1], |
| result_minmax8[4*j]}; |
| end |
| EEW16: begin |
| result_minmax16[2*j+1] = cout16[2*j+1] ? {src1_data[4*j+3],src1_data[4*j+2]} : {src2_data[4*j+3],src2_data[4*j+2]}; |
| result_minmax16[2*j ] = cout16[2*j ] ? {src1_data[4*j+1],src1_data[4*j ]} : {src2_data[4*j+1],src2_data[4*j ]}; |
| |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {result_minmax16[2*j+1], |
| result_minmax16[2*j]}; |
| end |
| EEW32: begin |
| result_minmax32[j] = cout32[j] ? {src1_data[4*j+3],src1_data[4*j+2],src1_data[4*j+1],src1_data[4*j]}: |
| {src2_data[4*j+3],src2_data[4*j+2],src2_data[4*j+1],src2_data[4*j]}; |
| |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = result_minmax32[j]; |
| end |
| endcase |
| end |
| |
| VMAX: begin |
| case(vs2_eew) |
| EEW8: begin |
| case({src2_data[4*j][`BYTE_WIDTH-1],src1_data[4*j][`BYTE_WIDTH-1]}) |
| 2'b01 : result_minmax8[4*j] = src2_data[4*j]; |
| 2'b10 : result_minmax8[4*j] = src1_data[4*j]; |
| default: result_minmax8[4*j] = product8[4*j][`BYTE_WIDTH-1] ? src1_data[4*j] : src2_data[4*j]; |
| endcase |
| |
| case({src2_data[4*j+1][`BYTE_WIDTH-1],src1_data[4*j+1][`BYTE_WIDTH-1]}) |
| 2'b01 : result_minmax8[4*j+1] = src2_data[4*j+1]; |
| 2'b10 : result_minmax8[4*j+1] = src1_data[4*j+1]; |
| default: result_minmax8[4*j+1] = product8[4*j+1][`BYTE_WIDTH-1] ? src1_data[4*j+1] : src2_data[4*j+1]; |
| endcase |
| |
| case({src2_data[4*j+2][`BYTE_WIDTH-1],src1_data[4*j+2][`BYTE_WIDTH-1]}) |
| 2'b01 : result_minmax8[4*j+2] = src2_data[4*j+2]; |
| 2'b10 : result_minmax8[4*j+2] = src1_data[4*j+2]; |
| default: result_minmax8[4*j+2] = product8[4*j+2][`BYTE_WIDTH-1] ? src1_data[4*j+2] : src2_data[4*j+2]; |
| endcase |
| |
| case({src2_data[4*j+3][`BYTE_WIDTH-1],src1_data[4*j+3][`BYTE_WIDTH-1]}) |
| 2'b01 : result_minmax8[4*j+3] = src2_data[4*j+3]; |
| 2'b10 : result_minmax8[4*j+3] = src1_data[4*j+3]; |
| default: result_minmax8[4*j+3] = product8[4*j+3][`BYTE_WIDTH-1] ? src1_data[4*j+3] : src2_data[4*j+3]; |
| endcase |
| |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {result_minmax8[4*j+3], |
| result_minmax8[4*j+2], |
| result_minmax8[4*j+1], |
| result_minmax8[4*j]}; |
| end |
| EEW16: begin |
| case({src2_data[4*j+1][`BYTE_WIDTH-1],src1_data[4*j+1][`BYTE_WIDTH-1]}) |
| 2'b01 : result_minmax16[2*j] = {src2_data[4*j+1],src2_data[4*j]}; |
| 2'b10 : result_minmax16[2*j] = {src1_data[4*j+1],src1_data[4*j]}; |
| default: result_minmax16[2*j] = product16[2*j][`HWORD_WIDTH-1] ? {src1_data[4*j+1],src1_data[4*j]} : {src2_data[4*j+1],src2_data[4*j]}; |
| endcase |
| |
| case({src2_data[4*j+3][`BYTE_WIDTH-1],src1_data[4*j+3][`BYTE_WIDTH-1]}) |
| 2'b01 : result_minmax16[2*j+1] = {src2_data[4*j+3],src2_data[4*j+2]}; |
| 2'b10 : result_minmax16[2*j+1] = {src1_data[4*j+3],src1_data[4*j+2]}; |
| default: result_minmax16[2*j+1] = product16[2*j+1][`HWORD_WIDTH-1] ? {src1_data[4*j+3],src1_data[4*j+2]} : {src2_data[4*j+3],src2_data[4*j+2]}; |
| endcase |
| |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {result_minmax16[2*j+1], |
| result_minmax16[2*j]}; |
| end |
| EEW32: begin |
| case({src2_data[4*j+3][`BYTE_WIDTH-1],src1_data[4*j+3][`BYTE_WIDTH-1]}) |
| 2'b01 : result_minmax32[j] = {src2_data[4*j+3],src2_data[4*j+2],src2_data[4*j+1],src2_data[4*j]}; |
| 2'b10 : result_minmax32[j] = {src1_data[4*j+3],src1_data[4*j+2],src1_data[4*j+1],src1_data[4*j]}; |
| default: result_minmax32[j] = product32[j][`WORD_WIDTH-1] ? |
| {src1_data[4*j+3],src1_data[4*j+2],src1_data[4*j+1],src1_data[4*j]}: |
| {src2_data[4*j+3],src2_data[4*j+2],src2_data[4*j+1],src2_data[4*j]}; |
| endcase |
| |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = result_minmax32[j]; |
| end |
| endcase |
| end |
| |
| |
| endcase |
| end |
| |
| OPMVV, |
| OPMVX: begin |
| case(uop_funct6.ari_funct6) |
| VWADDU, |
| VWSUBU, |
| VWADD, |
| VWSUB: begin |
| case(vs2_eew) |
| EEW8: begin |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {product16[2*j+1], product16[2*j]}; |
| end |
| EEW16: begin |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = product32[j]; |
| end |
| endcase |
| end |
| |
| VWADDU_W, |
| VWSUBU_W, |
| VWADD_W, |
| VWSUB_W: begin |
| case(vs2_eew) |
| EEW16: begin |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {product16[2*j+1], product16[2*j]}; |
| end |
| EEW32: begin |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = product32[j]; |
| end |
| endcase |
| end |
| |
| VAADDU, |
| VAADD, |
| VASUBU, |
| VASUB: begin |
| case(vs2_eew) |
| EEW8: begin |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {round8[4*j+3], round8[4*j+2], round8[4*j+1], round8[4*j]}; |
| end |
| EEW16: begin |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = {round16[2*j+1], round16[2*j]}; |
| end |
| EEW32: begin |
| result_data[j*`WORD_WIDTH +: `WORD_WIDTH] = round32[j]; |
| end |
| endcase |
| end |
| endcase |
| end |
| endcase |
| end |
| end |
| endgenerate |
| |
| // |
| // submit result to ROB |
| // |
| always_comb begin |
| // initial |
| `ifdef TB_SUPPORT |
| result.uop_pc = alu_uop.uop_pc; |
| `endif |
| result.rob_entry = rob_entry; |
| result.w_data = result_data; |
| result.w_valid = result_valid; |
| result.vsaturate = 'b0; |
| |
| case(uop_funct3) |
| OPIVV, |
| OPIVX, |
| OPIVI: begin |
| case(uop_funct6.ari_funct6) |
| VSADDU: begin |
| result.vsaturate = addu_upoverflow; |
| end |
| VSADD: begin |
| result.vsaturate = add_upoverflow|add_underoverflow; |
| end |
| VSSUBU: begin |
| result.vsaturate = subu_underoverflow; |
| end |
| VSSUB: begin |
| result.vsaturate = sub_upoverflow|sub_underoverflow; |
| end |
| endcase |
| end |
| endcase |
| end |
| |
| // |
| // function unit |
| // |
| // add and sub function |
| function [`BYTE_WIDTH:0] f_full_addsub8; |
| // x +/- (y+cin) |
| input ADDSUB_e opcode; |
| input logic [`BYTE_WIDTH-1:0] src_x; |
| input logic [`BYTE_WIDTH-1:0] src_y; |
| input logic src_cin; |
| |
| logic [`BYTE_WIDTH-1:0] result; |
| logic cout; |
| |
| if (opcode==ADDSUB_VADD) |
| {cout,result} = src_x + src_y + src_cin; |
| else //(opcode==ADDSUB_VSUB) |
| {cout,result} = src_x - src_y - src_cin; |
| |
| return {cout,result}; |
| |
| endfunction |
| |
| function [`BYTE_WIDTH:0] f_half_addsub8; |
| // x +/- cin |
| input ADDSUB_e opcode; |
| input logic [`BYTE_WIDTH:0] src_x; |
| input logic src_cin; |
| |
| logic [`BYTE_WIDTH-1:0] result; |
| logic cout; |
| |
| if (opcode==ADDSUB_VADD) |
| {cout,result} = src_x + src_cin; |
| else //(opcode==ADDSUB_VSUB) |
| {cout,result} = src_x - src_cin; |
| |
| return {cout,result}; |
| |
| endfunction |
| |
| function [`HWORD_WIDTH:0] f_half_addsub16; |
| // x +/- cin |
| input ADDSUB_e opcode; |
| input logic [`HWORD_WIDTH:0] src_x; |
| input logic src_cin; |
| |
| logic [`HWORD_WIDTH-1:0] result; |
| logic cout; |
| |
| if (opcode==ADDSUB_VADD) |
| {cout,result} = src_x + src_cin; |
| else //(opcode==ADDSUB_VSUB) |
| {cout,result} = src_x - src_cin; |
| |
| return {cout,result}; |
| |
| endfunction |
| |
| function [`WORD_WIDTH-1:0] f_src_plus1; |
| // x + cin |
| input logic [`WORD_WIDTH-1:0] src_x; |
| |
| logic [`HWORD_WIDTH-1:0] res_hi; |
| logic [`HWORD_WIDTH:0] res_lo; |
| |
| res_hi = src_x[`WORD_WIDTH-1:`HWORD_WIDTH] + 1'b1; |
| res_lo = src_x[`HWORD_WIDTH-1:0] + 1'b1; |
| |
| if (res_lo[`HWORD_WIDTH]) |
| return {res_hi,res_lo[`HWORD_WIDTH-1:0]}; |
| else |
| return {src_x[`WORD_WIDTH-1:`HWORD_WIDTH],res_lo[`HWORD_WIDTH-1:0]}; |
| |
| endfunction |
| |
| endmodule |