| |
| `include "rvv_backend.svh" |
| `include "rvv_backend_sva.svh" |
| |
| module rvv_backend_alu_unit_shift |
| ( |
| alu_uop_valid, |
| alu_uop, |
| result_valid, |
| result |
| ); |
| // |
| // interface signals |
| // |
| // ALU RS handshake signals |
| input logic alu_uop_valid; |
| input ALU_RS_t alu_uop; |
| |
| // ALU send result signals to ROB |
| output logic result_valid; |
| output PU2ROB_t result; |
| |
| // |
| // internal signals |
| // |
| // ALU_RS_t struct signals |
| logic [`ROB_DEPTH_WIDTH-1:0] rob_entry; |
| FUNCT6_u uop_funct6; |
| logic [`FUNCT3_WIDTH-1:0] uop_funct3; |
| RVVXRM vxrm; |
| logic [`VLEN-1:0] vs1_data; |
| logic vs1_data_valid; |
| logic [`VLEN-1:0] vs2_data; |
| logic vs2_data_valid; |
| EEW_e vs2_eew; |
| logic [`XLEN-1:0] rs1_data; |
| logic rs1_data_valid; |
| logic [`UOP_INDEX_WIDTH-1:0] uop_index; |
| |
| // execute |
| // add and sub instructions |
| logic [`VLENB/2-1:0][`BYTE_WIDTH-1:0] src2_data8; |
| logic [`VLEN/`HWORD_WIDTH/2-1:0][`HWORD_WIDTH-1:0] src2_data16; |
| logic [`VLEN/`WORD_WIDTH-1:0][`WORD_WIDTH-1:0] src2_data32; |
| logic [`VLENB/2-1:0][$clog2(`BYTE_WIDTH)-1:0] shift_amount8; |
| logic [`VLEN/`HWORD_WIDTH/2-1:0][$clog2(`HWORD_WIDTH)-1:0] shift_amount16; |
| logic [`VLEN/`WORD_WIDTH-1:0][$clog2(`WORD_WIDTH)-1:0] shift_amount32; |
| logic [`VLENB/2-1:0][`BYTE_WIDTH-1:0] product8_tmp; |
| logic [`VLEN/`HWORD_WIDTH/2-1:0][`HWORD_WIDTH-1:0] product16_tmp; |
| logic [`VLEN/`WORD_WIDTH-1:0][`WORD_WIDTH-1:0] product32_tmp; |
| logic [`VLENB-1:0][`BYTE_WIDTH-1:0] product8; |
| logic [`VLEN/`HWORD_WIDTH-1:0][`HWORD_WIDTH-1:0] product16; |
| logic [`VLEN/`WORD_WIDTH-1:0][`WORD_WIDTH-1:0] product32; |
| logic [`VLENB/2-1:0][`BYTE_WIDTH-1:0] round_bits8_tmp; |
| logic [`VLEN/`HWORD_WIDTH/2-1:0][`HWORD_WIDTH-1:0] round_bits16_tmp; |
| logic [`VLEN/`WORD_WIDTH-1:0][`WORD_WIDTH-1:0] round_bits32_tmp; |
| logic [`VLENB-1:0][`BYTE_WIDTH-1:0] round_bits8; |
| logic [`VLEN/`HWORD_WIDTH-1:0][`HWORD_WIDTH-1:0] round_bits16; |
| logic [`VLEN/`WORD_WIDTH-1:0][`WORD_WIDTH-1:0] round_bits32; |
| logic [`VLENB-1:0] round_increment8; |
| logic [`VLEN/`HWORD_WIDTH-1:0] round_increment16; |
| logic [`VLEN/`WORD_WIDTH-1:0] round_increment32; |
| logic [`VLENB-1:0][`BYTE_WIDTH-1:0] round8; |
| logic [`VLEN/`HWORD_WIDTH-1:0][`HWORD_WIDTH-1:0] round16; |
| logic [`VLEN/`WORD_WIDTH-1:0][`WORD_WIDTH-1:0] round32; |
| logic [`VLEN/`HWORD_WIDTH-1:0] cout16; |
| logic [`VLEN/`WORD_WIDTH-1:0] cout32; |
| logic [`VLENB-1:0] upoverflow; |
| logic [`VLENB-1:0] underoverflow; |
| logic [`VLEN-1:0] result_data; |
| SHIFT_e opcode; |
| |
| // PU2ROB_t struct signals |
| logic [`VCSR_VXSAT_WIDTH-1:0] vxsat; |
| |
| // for-loop |
| genvar j; |
| |
| // |
| // prepare source data to calculate |
| // |
| // split ALU_RS_t struct |
| assign rob_entry = alu_uop.rob_entry; |
| assign uop_funct6 = alu_uop.uop_funct6; |
| assign uop_funct3 = alu_uop.uop_funct3; |
| assign vxrm = alu_uop.vxrm; |
| assign vs1_data = alu_uop.vs1_data; |
| assign vs1_data_valid = alu_uop.vs1_data_valid; |
| assign vs2_data = alu_uop.vs2_data; |
| assign vs2_data_valid = alu_uop.vs2_data_valid; |
| assign vs2_eew = alu_uop.vs2_eew; |
| assign rs1_data = alu_uop.rs1_data; |
| assign rs1_data_valid = alu_uop.rs1_data_valid; |
| assign uop_index = alu_uop.uop_index; |
| |
| // |
| // prepare source data |
| // |
| // prepare valid signal |
| always_comb begin |
| // initial the data |
| result_valid = 'b0; |
| |
| case({alu_uop_valid,uop_funct3}) |
| {1'b1,OPIVV}: begin |
| case(uop_funct6.ari_funct6) |
| VSLL, |
| VSRL, |
| VSRA, |
| VSSRL, |
| VSSRA: begin |
| if (vs2_data_valid&vs1_data_valid) begin |
| result_valid = 'b1; |
| end |
| |
| `ifdef ASSERT_ON |
| assert #0 (result_valid==1'b1) |
| else $error("result_valid(%d) should be 1.\n",result_valid); |
| `endif |
| end |
| |
| VNSRL, |
| VNSRA, |
| VNCLIPU, |
| VNCLIP: begin |
| if (vs2_data_valid&vs1_data_valid&((vs2_eew==EEW16)|(vs2_eew==EEW32))) begin |
| result_valid = 'b1; |
| end |
| |
| `ifdef ASSERT_ON |
| assert #0 (result_valid==1'b1) |
| else $error("result_valid(%d) should be 1.\n",result_valid); |
| `endif |
| end |
| endcase |
| end |
| |
| {1'b1,OPIVX}, |
| {1'b1,OPIVI}: begin |
| case(uop_funct6.ari_funct6) |
| VSLL, |
| VSRL, |
| VSRA, |
| VSSRL, |
| VSSRA: begin |
| if (vs2_data_valid&rs1_data_valid) begin |
| result_valid = 'b1; |
| end |
| |
| `ifdef ASSERT_ON |
| assert #0 (result_valid==1'b1) |
| else $error("result_valid(%d) should be 1.\n",result_valid); |
| `endif |
| end |
| |
| VNSRL, |
| VNSRA, |
| VNCLIPU, |
| VNCLIP: begin |
| if (vs2_data_valid&rs1_data_valid&((vs2_eew==EEW16)|(vs2_eew==EEW32))) begin |
| result_valid = 'b1; |
| end |
| |
| `ifdef ASSERT_ON |
| assert #0 (result_valid==1'b1) |
| else $error("result_valid(%d) should be 1.\n",result_valid); |
| `endif |
| end |
| endcase |
| end |
| endcase |
| end |
| |
| // prepare source data |
| always_comb begin |
| // initial the data |
| src2_data8 = 'b0; |
| src2_data16 = 'b0; |
| src2_data32 = 'b0; |
| shift_amount8 = 'b0; |
| shift_amount16 = 'b0; |
| shift_amount32 = 'b0; |
| |
| case(uop_funct3) |
| OPIVV: begin |
| case(uop_funct6.ari_funct6) |
| VSLL, |
| VSRL, |
| VSSRL: begin |
| case(vs2_eew) |
| EEW8: begin |
| for(int i=0;i<`VLENB/2;i=i+1) begin |
| src2_data8[i] = vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| shift_amount8[i] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]; |
| end |
| for(int i=`VLENB/2;i<`VLENB*3/4;i=i+1) begin |
| src2_data16[ i-`VLENB/2] = {8'b0,vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; |
| shift_amount16[i-`VLENB/2] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]; |
| end |
| for(int i=`VLENB*3/4;i<`VLENB;i=i+1) begin |
| src2_data32[ i-`VLENB*3/4] = {24'b0,vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; |
| shift_amount32[i-`VLENB*3/4] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]; |
| end |
| end |
| EEW16: begin |
| for(int i=0;i<`VLEN/`HWORD_WIDTH/2;i=i+1) begin |
| src2_data16[i] = vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]; |
| shift_amount16[i] = vs1_data[i*`HWORD_WIDTH +: $clog2(`HWORD_WIDTH)]; |
| end |
| for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {16'b0,vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; |
| shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[i*`HWORD_WIDTH +: $clog2(`HWORD_WIDTH)]; |
| end |
| end |
| EEW32: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| src2_data32[i] = vs2_data[i*`WORD_WIDTH +: `WORD_WIDTH]; |
| shift_amount32[i] = vs1_data[i*`WORD_WIDTH +: $clog2(`WORD_WIDTH)]; |
| end |
| end |
| endcase |
| end |
| |
| VSRA, |
| VSSRA: begin |
| case(vs2_eew) |
| EEW8: begin |
| for(int i=0;i<`VLENB/2;i=i+1) begin |
| src2_data8[i] = vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| shift_amount8[i] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]; |
| end |
| for(int i=`VLENB/2;i<`VLENB*3/4;i=i+1) begin |
| src2_data16[ i-`VLENB/2] = {{8{vs2_data[(i+1)*`BYTE_WIDTH-1]}},vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; |
| shift_amount16[i-`VLENB/2] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]; |
| end |
| for(int i=`VLENB*3/4;i<`VLENB;i=i+1) begin |
| src2_data32[ i-`VLENB*3/4] = {{24{vs2_data[(i+1)*`BYTE_WIDTH-1]}},vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; |
| shift_amount32[i-`VLENB*3/4] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]; |
| end |
| end |
| EEW16: begin |
| for(int i=0;i<`VLEN/`HWORD_WIDTH/2;i=i+1) begin |
| src2_data16[i] = vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]; |
| shift_amount16[i] = vs1_data[i*`HWORD_WIDTH +: $clog2(`HWORD_WIDTH)]; |
| end |
| for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {{16{vs2_data[(i+1)*`HWORD_WIDTH-1]}},vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; |
| shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[i*`HWORD_WIDTH +: $clog2(`HWORD_WIDTH)]; |
| end |
| end |
| EEW32: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| src2_data32[i] = vs2_data[i*`WORD_WIDTH +: `WORD_WIDTH]; |
| shift_amount32[i] = vs1_data[i*`WORD_WIDTH +: $clog2(`WORD_WIDTH)]; |
| end |
| end |
| endcase |
| end |
| |
| VNSRL, |
| VNCLIPU: begin |
| case(vs2_eew) |
| EEW16: begin |
| for(int i=0;i<`VLEN/`HWORD_WIDTH/2;i=i+1) begin |
| src2_data16[i] = vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]; |
| if (uop_index[0]==1'b0) |
| shift_amount16[i] = vs1_data[i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]; |
| else |
| shift_amount16[i] = vs1_data[`VLEN/2+i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]; |
| end |
| for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {16'b0,vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; |
| if (uop_index[0]==1'b0) |
| shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]; |
| else |
| shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[`VLEN/2+i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]; |
| end |
| end |
| EEW32: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| src2_data32[i] = vs2_data[i*`WORD_WIDTH +: `WORD_WIDTH]; |
| if (uop_index[0]==1'b0) |
| shift_amount32[i] = vs1_data[i*`HWORD_WIDTH +: $clog2(`WORD_WIDTH)]; |
| else |
| shift_amount32[i] = vs1_data[`VLEN/2+i*`HWORD_WIDTH +: $clog2(`WORD_WIDTH)]; |
| end |
| end |
| endcase |
| end |
| |
| VNSRA, |
| VNCLIP: begin |
| case(vs2_eew) |
| EEW16: begin |
| for(int i=0;i<`VLEN/`HWORD_WIDTH/2;i=i+1) begin |
| src2_data16[i] = vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]; |
| if (uop_index[0]==1'b0) |
| shift_amount16[i] = vs1_data[i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]; |
| else |
| shift_amount16[i] = vs1_data[`VLEN/2+i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]; |
| end |
| for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {{16{vs2_data[(i+1)*`HWORD_WIDTH-1]}},vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; |
| if (uop_index[0]==1'b0) |
| shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]; |
| else |
| shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[`VLEN/2+i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]; |
| end |
| end |
| EEW32: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| src2_data32[i] = vs2_data[i*`WORD_WIDTH +: `WORD_WIDTH]; |
| if (uop_index[0]==1'b0) |
| shift_amount32[i] = vs1_data[i*`HWORD_WIDTH +: $clog2(`WORD_WIDTH)]; |
| else |
| shift_amount32[i] = vs1_data[`VLEN/2+i*`HWORD_WIDTH +: $clog2(`WORD_WIDTH)]; |
| end |
| end |
| endcase |
| end |
| endcase |
| end |
| |
| OPIVX, |
| OPIVI: begin |
| case(uop_funct6.ari_funct6) |
| VSLL, |
| VSRL, |
| VSSRL: begin |
| case(vs2_eew) |
| EEW8: begin |
| for(int i=0;i<`VLENB/2;i=i+1) begin |
| src2_data8[i] = vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| shift_amount8[i] = rs1_data[0 +: $clog2(`BYTE_WIDTH)]; |
| end |
| for(int i=`VLENB/2;i<`VLENB*3/4;i=i+1) begin |
| src2_data16[ i-`VLENB/2] = {8'b0,vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; |
| shift_amount16[i-`VLENB/2] = rs1_data[0 +: $clog2(`BYTE_WIDTH)]; |
| end |
| for(int i=`VLENB*3/4;i<`VLENB;i=i+1) begin |
| src2_data32[ i-`VLENB*3/4] = {24'b0,vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; |
| shift_amount32[i-`VLENB*3/4] = rs1_data[0 +: $clog2(`BYTE_WIDTH)]; |
| end |
| end |
| EEW16: begin |
| for(int i=0;i<`VLEN/`HWORD_WIDTH/2;i=i+1) begin |
| src2_data16[i] = vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]; |
| shift_amount16[i] = rs1_data[0 +: $clog2(`HWORD_WIDTH)]; |
| end |
| for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {16'b0,vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; |
| shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = rs1_data[0 +: $clog2(`HWORD_WIDTH)]; |
| end |
| end |
| EEW32: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| src2_data32[i] = vs2_data[i*`WORD_WIDTH +: `WORD_WIDTH]; |
| shift_amount32[i] = rs1_data[0 +: $clog2(`WORD_WIDTH)]; |
| end |
| end |
| endcase |
| end |
| |
| VSRA, |
| VSSRA: begin |
| case(vs2_eew) |
| EEW8: begin |
| for(int i=0;i<`VLENB/2;i=i+1) begin |
| src2_data8[i] = vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| shift_amount8[i] = rs1_data[0 +: $clog2(`BYTE_WIDTH)]; |
| end |
| for(int i=`VLENB/2;i<`VLENB*3/4;i=i+1) begin |
| src2_data16[ i-`VLENB/2] = {{8{vs2_data[(i+1)*`BYTE_WIDTH-1]}},vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; |
| shift_amount16[i-`VLENB/2] = rs1_data[0 +: $clog2(`BYTE_WIDTH)]; |
| end |
| for(int i=`VLENB*3/4;i<`VLENB;i=i+1) begin |
| src2_data32[ i-`VLENB*3/4] = {{24{vs2_data[(i+1)*`BYTE_WIDTH-1]}},vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; |
| shift_amount32[i-`VLENB*3/4] = rs1_data[0 +: $clog2(`BYTE_WIDTH)]; |
| end |
| end |
| EEW16: begin |
| for(int i=0;i<`VLEN/`HWORD_WIDTH/2;i=i+1) begin |
| src2_data16[i] = vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]; |
| shift_amount16[i] = rs1_data[0 +: $clog2(`HWORD_WIDTH)]; |
| end |
| for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {{16{vs2_data[(i+1)*`HWORD_WIDTH-1]}},vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; |
| shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = rs1_data[0 +: $clog2(`HWORD_WIDTH)]; |
| end |
| end |
| EEW32: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| src2_data32[i] = vs2_data[i*`WORD_WIDTH +: `WORD_WIDTH]; |
| shift_amount32[i] = rs1_data[0 +: $clog2(`WORD_WIDTH)]; |
| end |
| end |
| endcase |
| end |
| |
| VNSRL, |
| VNCLIPU: begin |
| case(vs2_eew) |
| EEW16: begin |
| for(int i=0;i<`VLEN/`HWORD_WIDTH/2;i=i+1) begin |
| src2_data16[i] = vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]; |
| shift_amount16[i] = rs1_data[0 +: $clog2(`HWORD_WIDTH)]; |
| end |
| for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {16'b0,vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; |
| shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = rs1_data[0 +: $clog2(`HWORD_WIDTH)]; |
| end |
| end |
| EEW32: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| src2_data32[i] = vs2_data[i*`WORD_WIDTH +: `WORD_WIDTH]; |
| shift_amount32[i] = rs1_data[0 +: $clog2(`WORD_WIDTH)]; |
| end |
| end |
| endcase |
| end |
| |
| VNSRA, |
| VNCLIP: begin |
| case(vs2_eew) |
| EEW16: begin |
| for(int i=0;i<`VLEN/`HWORD_WIDTH/2;i=i+1) begin |
| src2_data16[i] = vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]; |
| shift_amount16[i] = rs1_data[0 +: $clog2(`HWORD_WIDTH)]; |
| end |
| for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {{16{vs2_data[(i+1)*`HWORD_WIDTH-1]}},vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; |
| shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = rs1_data[0 +: $clog2(`HWORD_WIDTH)]; |
| end |
| end |
| EEW32: begin |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| src2_data32[i] = vs2_data[i*`WORD_WIDTH +: `WORD_WIDTH]; |
| shift_amount32[i] = rs1_data[0 +: $clog2(`WORD_WIDTH)]; |
| end |
| end |
| endcase |
| end |
| endcase |
| end |
| endcase |
| end |
| |
| // get opcode for f_addsub |
| always_comb begin |
| // initial the data |
| opcode = SHIFT_SLL; |
| |
| // prepare source data |
| case(uop_funct3) |
| OPIVV, |
| OPIVX, |
| OPIVI: begin |
| case(uop_funct6.ari_funct6) |
| VSLL: begin |
| opcode = SHIFT_SLL; |
| end |
| VSRL, |
| VNSRL, |
| VSSRL, |
| VNCLIPU: begin |
| opcode = SHIFT_SRL; |
| end |
| VSRA, |
| VNSRA, |
| VSSRA, |
| VNCLIP: begin |
| opcode = SHIFT_SRA; |
| end |
| endcase |
| end |
| endcase |
| end |
| |
| // |
| // calculate the result |
| // |
| // shift instructions |
| generate |
| for (j=0;j<`VLENB/2;j=j+1) begin: EXE_PROD8 |
| assign {product8_tmp[j], round_bits8_tmp[j]} = f_shift8(opcode, src2_data8[j], shift_amount8[j]); |
| end |
| endgenerate |
| |
| generate |
| for (j=0;j<`VLEN/`HWORD_WIDTH/2;j=j+1) begin: EXE_PROD16 |
| assign {product16_tmp[j], round_bits16_tmp[j]} = f_shift16(opcode, src2_data16[j], shift_amount16[j]); |
| end |
| endgenerate |
| |
| generate |
| for (j=0;j<`VLEN/`WORD_WIDTH;j=j+1) begin: EXE_PROD32 |
| assign {product32_tmp[j], round_bits32_tmp[j]} = f_shift32(opcode, src2_data32[j], shift_amount32[j]); |
| end |
| endgenerate |
| |
| always_comb begin |
| product8 = 'b0; |
| round_bits8 = 'b0; |
| |
| for(int i=0;i<`VLENB/2;i=i+1) begin |
| product8[i] = product8_tmp[i]; |
| round_bits8[i] = round_bits8_tmp[i]; |
| end |
| for(int i=`VLENB/2;i<`VLENB*3/4;i=i+1) begin |
| product8[i] = product16_tmp[ i-`VLENB/2][0 +: `BYTE_WIDTH]; |
| round_bits8[i] = round_bits16_tmp[i-`VLENB/2][`BYTE_WIDTH +: `BYTE_WIDTH]; |
| end |
| for(int i=`VLENB*3/4;i<`VLENB;i=i+1) begin |
| product8[i] = product32_tmp[ i-`VLENB*3/4][0 +: `BYTE_WIDTH]; |
| round_bits8[i] = round_bits32_tmp[i-`VLENB*3/4][3*`BYTE_WIDTH +: `BYTE_WIDTH]; |
| end |
| end |
| |
| always_comb begin |
| product16 = 'b0; |
| round_bits16 = 'b0; |
| |
| for(int i=0;i<`VLEN/`HWORD_WIDTH/2;i=i+1) begin |
| product16[i] = product16_tmp[i]; |
| round_bits16[i] = round_bits16_tmp[i]; |
| end |
| for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin |
| product16[i] = product32_tmp[ i-`VLEN/`HWORD_WIDTH/2][0 +: `HWORD_WIDTH]; |
| round_bits16[i] = round_bits32_tmp[i-`VLEN/`HWORD_WIDTH/2][`HWORD_WIDTH +: `HWORD_WIDTH]; |
| end |
| end |
| |
| always_comb begin |
| product32 = 'b0; |
| round_bits32 = 'b0; |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i=i+1) begin |
| product32[i] = product32_tmp[i]; |
| round_bits32[i] = round_bits32_tmp[i]; |
| end |
| end |
| |
| // round increment |
| generate |
| for (j=0;j<`VLENB;j++) begin: INCREMENT8 |
| always_comb begin |
| round_increment8[j] = 'b0; |
| |
| case(vxrm) |
| RNU: begin |
| round_increment8[j] = round_bits8[j][`BYTE_WIDTH-1]; |
| end |
| RNE: begin |
| round_increment8[j] = round_bits8[j][`BYTE_WIDTH-1] & ( |
| (round_bits8[j][`BYTE_WIDTH-2:0]!='b0) | |
| product8[j][0]); |
| end |
| RDN: begin |
| round_increment8[j] = 'b0; |
| end |
| ROD: begin |
| round_increment8[j] = (!product8[j][0]) & (round_bits8[j]!='b0); |
| end |
| endcase |
| end |
| end |
| endgenerate |
| |
| generate |
| for (j=0;j<`VLEN/`HWORD_WIDTH;j++) begin: INCREMENT16 |
| always_comb begin |
| round_increment16[j] = 'b0; |
| |
| case(vxrm) |
| RNU: begin |
| round_increment16[j] = round_bits16[j][`HWORD_WIDTH-1]; |
| end |
| RNE: begin |
| round_increment16[j] = round_bits16[j][`HWORD_WIDTH-1] & ( |
| (round_bits16[j][`HWORD_WIDTH-2:0]!='b0) | |
| product16[j][0]); |
| end |
| RDN: begin |
| round_increment16[j] = 'b0; |
| end |
| ROD: begin |
| round_increment16[j] = (!product16[j][0]) & (round_bits16[j]!='b0); |
| end |
| endcase |
| end |
| end |
| endgenerate |
| |
| generate |
| for (j=0;j<`VLEN/`WORD_WIDTH;j++) begin: INCREMENT32 |
| always_comb begin |
| round_increment32[j] = 'b0; |
| |
| case(vxrm) |
| RNU: begin |
| round_increment32[j] = round_bits32[j][`WORD_WIDTH-1]; |
| end |
| RNE: begin |
| round_increment32[j] = round_bits32[j][`WORD_WIDTH-1] & ( |
| (round_bits32[j][`WORD_WIDTH-2:0]!='b0) | |
| product32[j][0]); |
| end |
| RDN: begin |
| round_increment32[j] = 'b0; |
| end |
| ROD: begin |
| round_increment32[j] = (!product32[j][0]) & (round_bits32[j]!='b0); |
| end |
| endcase |
| end |
| end |
| endgenerate |
| |
| // rounding result |
| generate |
| for (j=0;j<`VLENB;j++) begin: ROUND8 |
| always_comb begin |
| round8[j] = 'b0; |
| |
| if (opcode == SHIFT_SRL) |
| round8[j] = f_half_add8({1'b0, product8[j]}, round_increment8[j]); |
| else if (opcode == SHIFT_SRA) |
| round8[j] = f_half_add8({product8[j][`BYTE_WIDTH-1], product8[j]}, round_increment8[j]); |
| end |
| end |
| endgenerate |
| |
| generate |
| for (j=0;j<`VLEN/`HWORD_WIDTH;j++) begin: ROUND16 |
| always_comb begin |
| cout16[j] = 'b0; |
| round16[j] = 'b0; |
| |
| if (opcode == SHIFT_SRL) |
| {cout16[j], round16[j]} = f_half_add16({1'b0, product16[j]}, round_increment16[j]); |
| else if (opcode == SHIFT_SRA) |
| {cout16[j], round16[j]} = f_half_add16({product16[j][`HWORD_WIDTH-1], product16[j]}, round_increment16[j]); |
| end |
| end |
| endgenerate |
| |
| generate |
| for (j=0;j<`VLEN/`WORD_WIDTH;j++) begin: ROUND32 |
| always_comb begin |
| cout32[j] = 'b0; |
| round32[j] = 'b0; |
| |
| if (opcode == SHIFT_SRL) |
| {cout32[j], round32[j]} = f_half_add32({1'b0, product32[j]}, round_increment32[j]); |
| else if (opcode == SHIFT_SRA) |
| {cout32[j], round32[j]} = f_half_add32({product32[j][`WORD_WIDTH-1], product32[j]}, round_increment32[j]); |
| end |
| end |
| endgenerate |
| |
| // overflow check for vnclipu and vnclip |
| generate |
| for (j=0;j<`VLEN/`WORD_WIDTH/2;j++) begin: GET_OVERFLOW |
| always_comb begin |
| // initial |
| upoverflow[ 4*j +: 4] = 'b0; |
| underoverflow[4*j +: 4] = 'b0; |
| upoverflow[ 4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = 'b0; |
| underoverflow[4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = 'b0; |
| |
| case(vs2_eew) |
| EEW16: begin |
| case(opcode) |
| SHIFT_SRL: begin |
| // unsigned overflow check for vnclipu |
| if(uop_index[0]==1'b0) begin |
| upoverflow[4*j +: 4] = { |
| ({cout16[4*j+3], round16[4*j+3][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0), |
| ({cout16[4*j+2], round16[4*j+2][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0), |
| ({cout16[4*j+1], round16[4*j+1][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0), |
| ({cout16[4*j ], round16[4*j ][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0)}; |
| |
| upoverflow[4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = 'b0; |
| end |
| else begin |
| upoverflow[4*j +: 4] = 'b0; |
| |
| upoverflow[4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = { |
| ({cout16[4*j+3], round16[4*j+3][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0), |
| ({cout16[4*j+2], round16[4*j+2][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0), |
| ({cout16[4*j+1], round16[4*j+1][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0), |
| ({cout16[4*j ], round16[4*j ][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0)}; |
| end |
| end |
| SHIFT_SRA: begin |
| // signed overflow check for vnclip |
| if(uop_index[0]==1'b0) begin |
| upoverflow[4*j +: 4] = { |
| ({cout16[4*j+3], round16[4*j+3][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0)&(round16[4*j+3][`HWORD_WIDTH-1]==1'b0), |
| ({cout16[4*j+2], round16[4*j+2][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0)&(round16[4*j+2][`HWORD_WIDTH-1]==1'b0), |
| ({cout16[4*j+1], round16[4*j+1][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0)&(round16[4*j+1][`HWORD_WIDTH-1]==1'b0), |
| ({cout16[4*j ], round16[4*j ][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0)&(round16[4*j ][`HWORD_WIDTH-1]==1'b0)}; |
| |
| underoverflow[4*j +: 4] = { |
| ({cout16[4*j+3], round16[4*j+3][`BYTE_WIDTH +: `BYTE_WIDTH]}!='1)&(round16[4*j+3][`HWORD_WIDTH-1]==1'b1), |
| ({cout16[4*j+2], round16[4*j+2][`BYTE_WIDTH +: `BYTE_WIDTH]}!='1)&(round16[4*j+2][`HWORD_WIDTH-1]==1'b1), |
| ({cout16[4*j+1], round16[4*j+1][`BYTE_WIDTH +: `BYTE_WIDTH]}!='1)&(round16[4*j+1][`HWORD_WIDTH-1]==1'b1), |
| ({cout16[4*j ], round16[4*j ][`BYTE_WIDTH +: `BYTE_WIDTH]}!='1)&(round16[4*j ][`HWORD_WIDTH-1]==1'b1)}; |
| |
| upoverflow[4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = 'b0; |
| underoverflow[4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = 'b0; |
| end |
| else begin |
| upoverflow[4*j +: 4] = 'b0; |
| underoverflow[4*j +: 4] = 'b0; |
| |
| upoverflow[4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = { |
| ({cout16[4*j+3], round16[4*j+3][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0)&(round16[4*j+3][`HWORD_WIDTH-1]==1'b0), |
| ({cout16[4*j+2], round16[4*j+2][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0)&(round16[4*j+2][`HWORD_WIDTH-1]==1'b0), |
| ({cout16[4*j+1], round16[4*j+1][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0)&(round16[4*j+1][`HWORD_WIDTH-1]==1'b0), |
| ({cout16[4*j ], round16[4*j ][`BYTE_WIDTH +: `BYTE_WIDTH]}!='b0)&(round16[4*j ][`HWORD_WIDTH-1]==1'b0)}; |
| |
| underoverflow[4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = { |
| ({cout16[4*j+3], round16[4*j+3][`BYTE_WIDTH +: `BYTE_WIDTH]}!='1)&(round16[4*j+3][`HWORD_WIDTH-1]==1'b1), |
| ({cout16[4*j+2], round16[4*j+2][`BYTE_WIDTH +: `BYTE_WIDTH]}!='1)&(round16[4*j+2][`HWORD_WIDTH-1]==1'b1), |
| ({cout16[4*j+1], round16[4*j+1][`BYTE_WIDTH +: `BYTE_WIDTH]}!='1)&(round16[4*j+1][`HWORD_WIDTH-1]==1'b1), |
| ({cout16[4*j ], round16[4*j ][`BYTE_WIDTH +: `BYTE_WIDTH]}!='1)&(round16[4*j ][`HWORD_WIDTH-1]==1'b1)}; |
| end |
| end |
| endcase |
| end |
| EEW32: begin |
| case(opcode) |
| SHIFT_SRL: begin |
| // unsigned overflow check for vnclipu |
| if(uop_index[0]==1'b0) begin |
| upoverflow[4*j +: 4] = { |
| ({cout32[2*j+1], round32[2*j+1][`HWORD_WIDTH +: `HWORD_WIDTH]}!='b0),1'b0, |
| ({cout32[2*j ], round32[2*j ][`HWORD_WIDTH +: `HWORD_WIDTH]}!='b0),1'b0}; |
| |
| upoverflow[4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = 'b0; |
| end |
| else begin |
| upoverflow[4*j +: 4] = 'b0; |
| |
| upoverflow[4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = { |
| ({cout32[2*j+1], round32[2*j+1][`HWORD_WIDTH +: `HWORD_WIDTH]}!='b0),1'b0, |
| ({cout32[2*j ], round32[2*j ][`HWORD_WIDTH +: `HWORD_WIDTH]}!='b0),1'b0}; |
| end |
| end |
| SHIFT_SRA: begin |
| // unsigned overflow check for vnclip |
| if(uop_index[0]==1'b0) begin |
| upoverflow[4*j +: 4] = { |
| ({cout32[2*j+1], round32[2*j+1][`HWORD_WIDTH +: `HWORD_WIDTH]}!='b0)&(round32[2*j+1][`WORD_WIDTH-1]==1'b0),1'b0, |
| ({cout32[2*j ], round32[2*j ][`HWORD_WIDTH +: `HWORD_WIDTH]}!='b0)&(round32[2*j ][`WORD_WIDTH-1]==1'b0),1'b0}; |
| |
| underoverflow[4*j +: 4] = { |
| ({cout32[2*j+1], round32[2*j+1][`HWORD_WIDTH +: `HWORD_WIDTH]}!='1)&(round32[2*j+1][`WORD_WIDTH-1]==1'b1),1'b0, |
| ({cout32[2*j ], round32[2*j ][`HWORD_WIDTH +: `HWORD_WIDTH]}!='1)&(round32[2*j ][`WORD_WIDTH-1]==1'b1),1'b0}; |
| |
| upoverflow[4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = 'b0; |
| underoverflow[4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = 'b0; |
| end |
| else begin |
| upoverflow[4*j +: 4] = 'b0; |
| underoverflow[4*j +: 4] = 'b0; |
| |
| upoverflow[4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = { |
| ({cout32[2*j+1], round32[2*j+1][`HWORD_WIDTH +: `HWORD_WIDTH]}!='b0)&(round32[2*j+1][`WORD_WIDTH-1]==1'b0),1'b0, |
| ({cout32[2*j ], round32[2*j ][`HWORD_WIDTH +: `HWORD_WIDTH]}!='b0)&(round32[2*j ][`WORD_WIDTH-1]==1'b0),1'b0}; |
| |
| underoverflow[4*(j+`VLEN/`WORD_WIDTH/2) +: 4] = { |
| ({cout32[2*j+1], round32[2*j+1][`HWORD_WIDTH +: `HWORD_WIDTH]}!='1)&(round32[2*j+1][`WORD_WIDTH-1]==1'b1),1'b0, |
| ({cout32[2*j ], round32[2*j ][`HWORD_WIDTH +: `HWORD_WIDTH]}!='1)&(round32[2*j ][`WORD_WIDTH-1]==1'b1),1'b0}; |
| end |
| end |
| endcase |
| end |
| endcase |
| end |
| end |
| endgenerate |
| |
| // assign to result_data |
| always_comb begin |
| // initial the data |
| result_data = 'b0; |
| |
| for(int i=0;i<`VLEN/`WORD_WIDTH;i++) begin |
| // calculate result data |
| case(uop_funct3) |
| OPIVV, |
| OPIVX, |
| OPIVI: begin |
| case(uop_funct6.ari_funct6) |
| VSLL, |
| VSRL, |
| VSRA: begin |
| case(vs2_eew) |
| EEW8: begin |
| result_data[i*`WORD_WIDTH +: `WORD_WIDTH] = {product8[4*i+3],product8[4*i+2],product8[4*i+1],product8[4*i]}; |
| end |
| EEW16: begin |
| result_data[i*`WORD_WIDTH +: `WORD_WIDTH] = {product16[2*i+1],product16[2*i]}; |
| end |
| EEW32: begin |
| result_data[i*`WORD_WIDTH +: `WORD_WIDTH] = product32[i]; |
| end |
| endcase |
| end |
| |
| VNSRL, |
| VNSRA: begin |
| case(vs2_eew) |
| EEW16: begin |
| if (uop_index[0]==1'b0) |
| result_data[i*`HWORD_WIDTH +: `HWORD_WIDTH] = {product16[2*i+1][`BYTE_WIDTH-1:0],product16[2*i][`BYTE_WIDTH-1:0]}; |
| else |
| result_data[`VLEN/2+i*`HWORD_WIDTH +: `HWORD_WIDTH] = {product16[2*i+1][`BYTE_WIDTH-1:0],product16[2*i][`BYTE_WIDTH-1:0]}; |
| end |
| EEW32: begin |
| if (uop_index[0]==1'b0) |
| result_data[i*`HWORD_WIDTH +: `HWORD_WIDTH] = product32[i][`HWORD_WIDTH-1:0]; |
| else |
| result_data[`VLEN/2+i*`HWORD_WIDTH +: `HWORD_WIDTH] = product32[i][`HWORD_WIDTH-1:0]; |
| end |
| endcase |
| end |
| |
| VSSRL, |
| VSSRA: begin |
| case(vs2_eew) |
| EEW8: begin |
| result_data[i*`WORD_WIDTH +: `WORD_WIDTH] = {round8[4*i+3],round8[4*i+2],round8[4*i+1],round8[4*i]}; |
| end |
| EEW16: begin |
| result_data[i*`WORD_WIDTH +: `WORD_WIDTH] = {round16[2*i+1],round16[2*i]}; |
| end |
| EEW32: begin |
| result_data[i*`WORD_WIDTH +: `WORD_WIDTH] = round32[i]; |
| end |
| endcase |
| end |
| |
| VNCLIPU: begin |
| case(vs2_eew) |
| EEW16: begin |
| if (i<`VLEN/`WORD_WIDTH/2) begin |
| if (upoverflow[4*i]) |
| result_data[(4*i)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'hff; |
| else |
| result_data[(4*i)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*i][`BYTE_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+1]) |
| result_data[(4*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'hff; |
| else |
| result_data[(4*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*i+1][`BYTE_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+2]) |
| result_data[(4*i+2)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'hff; |
| else |
| result_data[(4*i+2)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*i+2][`BYTE_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+3]) |
| result_data[(4*i+3)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'hff; |
| else |
| result_data[(4*i+3)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*i+3][`BYTE_WIDTH-1 : 0]; |
| end |
| else begin |
| if (upoverflow[4*i]) |
| result_data[(4*i)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'hff; |
| else |
| result_data[(4*i)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*(i-`VLEN/`WORD_WIDTH/2)][`BYTE_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+1]) |
| result_data[(4*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'hff; |
| else |
| result_data[(4*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*(i-`VLEN/`WORD_WIDTH/2)+1][`BYTE_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+2]) |
| result_data[(4*i+2)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'hff; |
| else |
| result_data[(4*i+2)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*(i-`VLEN/`WORD_WIDTH/2)+2][`BYTE_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+3]) |
| result_data[(4*i+3)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'hff; |
| else |
| result_data[(4*i+3)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*(i-`VLEN/`WORD_WIDTH/2)+3][`BYTE_WIDTH-1 : 0]; |
| end |
| end |
| EEW32: begin |
| if (i<`VLEN/`WORD_WIDTH/2) begin |
| if (upoverflow[4*i+1]) |
| result_data[(2*i)*`HWORD_WIDTH +: `HWORD_WIDTH] = 'hffff; |
| else |
| result_data[(2*i)*`HWORD_WIDTH +: `HWORD_WIDTH] = round32[2*i][`HWORD_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+3]) |
| result_data[(2*i+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = 'hffff; |
| else |
| result_data[(2*i+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = round32[2*i+1][`HWORD_WIDTH-1 : 0]; |
| end |
| else begin |
| if (upoverflow[4*i+1]) |
| result_data[(2*i)*`HWORD_WIDTH +: `HWORD_WIDTH] = 'hffff; |
| else |
| result_data[(2*i)*`HWORD_WIDTH +: `HWORD_WIDTH] = round32[2*(i-`VLEN/`WORD_WIDTH/2)][`HWORD_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+3]) |
| result_data[(2*i+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = 'hffff; |
| else |
| result_data[(2*i+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = round32[2*(i-`VLEN/`WORD_WIDTH/2)+1][`HWORD_WIDTH-1 : 0]; |
| end |
| end |
| endcase |
| end |
| |
| VNCLIP: begin |
| case(vs2_eew) |
| EEW16: begin |
| if (i<`VLEN/`WORD_WIDTH/2) begin |
| if (upoverflow[4*i]) |
| result_data[(4*i)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (underoverflow[4*i]) |
| result_data[(4*i)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[(4*i)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*i][`BYTE_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+1]) |
| result_data[(4*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (underoverflow[4*i+1]) |
| result_data[(4*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[(4*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*i+1][`BYTE_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+2]) |
| result_data[(4*i+2)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (underoverflow[4*i+2]) |
| result_data[(4*i+2)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[(4*i+2)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*i+2][`BYTE_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+3]) |
| result_data[(4*i+3)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (underoverflow[4*i+3]) |
| result_data[(4*i+3)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[(4*i+3)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*i+3][`BYTE_WIDTH-1 : 0]; |
| end |
| else begin |
| if (upoverflow[4*i]) |
| result_data[(4*i)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (underoverflow[4*i]) |
| result_data[(4*i)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[(4*i)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*(i-`VLEN/`WORD_WIDTH/2)][`BYTE_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+1]) |
| result_data[(4*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (underoverflow[4*i+1]) |
| result_data[(4*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[(4*i+1)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*(i-`VLEN/`WORD_WIDTH/2)+1][`BYTE_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+2]) |
| result_data[(4*i+2)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (underoverflow[4*i+2]) |
| result_data[(4*i+2)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[(4*i+2)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*(i-`VLEN/`WORD_WIDTH/2)+2][`BYTE_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+3]) |
| result_data[(4*i+3)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h7f; |
| else if (underoverflow[4*i+3]) |
| result_data[(4*i+3)*`BYTE_WIDTH +: `BYTE_WIDTH] = 'h80; |
| else |
| result_data[(4*i+3)*`BYTE_WIDTH +: `BYTE_WIDTH] = round16[4*(i-`VLEN/`WORD_WIDTH/2)+3][`BYTE_WIDTH-1 : 0]; |
| end |
| end |
| EEW32: begin |
| if (i<`VLEN/`WORD_WIDTH/2) begin |
| if (upoverflow[4*i+1]) |
| result_data[(2*i)*`HWORD_WIDTH +: `HWORD_WIDTH] = 'h7fff; |
| else if (underoverflow[4*i+1]) |
| result_data[(2*i)*`HWORD_WIDTH +: `HWORD_WIDTH] = 'h8000; |
| else |
| result_data[(2*i)*`HWORD_WIDTH +: `HWORD_WIDTH] = round32[2*i][`HWORD_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+3]) |
| result_data[(2*i+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = 'h7fff; |
| else if (underoverflow[4*i+3]) |
| result_data[(2*i+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = 'h8000; |
| else |
| result_data[(2*i+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = round32[2*i+1][`HWORD_WIDTH-1 : 0]; |
| end |
| else begin |
| if (upoverflow[4*i+1]) |
| result_data[(2*i)*`HWORD_WIDTH +: `HWORD_WIDTH] = 'h7fff; |
| else if (underoverflow[4*i+1]) |
| result_data[(2*i)*`HWORD_WIDTH +: `HWORD_WIDTH] = 'h8000; |
| else |
| result_data[(2*i)*`HWORD_WIDTH +: `HWORD_WIDTH] = round32[2*(i-`VLEN/`WORD_WIDTH/2)][`HWORD_WIDTH-1 : 0]; |
| |
| if (upoverflow[4*i+3]) |
| result_data[(2*i+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = 'h7fff; |
| else if (underoverflow[4*i+3]) |
| result_data[(2*i+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = 'h8000; |
| else |
| result_data[(2*i+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = round32[2*(i-`VLEN/`WORD_WIDTH/2)+1][`HWORD_WIDTH-1 : 0]; |
| end |
| end |
| endcase |
| end |
| endcase |
| end |
| endcase |
| end |
| end |
| |
| // |
| // submit result to ROB |
| // |
| `ifdef TB_SUPPORT |
| assign result.uop_pc = alu_uop.uop_pc; |
| `endif |
| assign result.rob_entry = rob_entry; |
| |
| assign result.w_data = result_data; |
| |
| assign result.w_valid = result_valid; |
| |
| // saturate signal |
| always_comb begin |
| // initial |
| result.vsaturate = 'b0; |
| |
| case({alu_uop_valid,uop_funct3}) |
| {1'b1,OPIVV}, |
| {1'b1,OPIVX}, |
| {1'b1,OPIVI}: begin |
| case(uop_funct6.ari_funct6) |
| VNCLIPU: begin |
| result.vsaturate = upoverflow; |
| end |
| VNCLIP: begin |
| result.vsaturate = underoverflow|upoverflow; |
| end |
| endcase |
| end |
| endcase |
| end |
| |
| // |
| // function unit |
| // |
| // shifter function |
| function [2*`BYTE_WIDTH-1:0] f_shift8; |
| input SHIFT_e opcode; |
| input logic [`BYTE_WIDTH-1:0] operand; |
| input logic [$clog2(`BYTE_WIDTH)-1:0] amount; |
| |
| logic signed [2*`BYTE_WIDTH:0] src; |
| logic signed [2*`BYTE_WIDTH:0] result; |
| |
| if (opcode==SHIFT_SLL) |
| src = {1'b0,operand,{`BYTE_WIDTH{1'b0}}}; |
| else if (opcode==SHIFT_SRL) |
| src = {1'b0,operand,{`BYTE_WIDTH{1'b0}}}; |
| else if (opcode==SHIFT_SRA) |
| src = {operand[`BYTE_WIDTH-1],operand,{`BYTE_WIDTH{1'b0}}}; |
| else |
| src = 'b0; |
| |
| if (opcode==SHIFT_SLL) |
| result = src<<amount; |
| else if ((opcode==SHIFT_SRL)||(opcode==SHIFT_SRA)) |
| result = src>>>amount; |
| else |
| result = 'b0; |
| |
| return result[2*`BYTE_WIDTH-1:0]; |
| endfunction |
| |
| function [2*`HWORD_WIDTH-1:0] f_shift16; |
| input SHIFT_e opcode; |
| input logic [`HWORD_WIDTH-1:0] operand; |
| input logic [$clog2(`HWORD_WIDTH)-1:0] amount; |
| |
| logic signed [2*`HWORD_WIDTH:0] src; |
| logic signed [2*`HWORD_WIDTH:0] result; |
| |
| if (opcode==SHIFT_SLL) |
| src = {1'b0,operand,{`HWORD_WIDTH{1'b0}}}; |
| else if (opcode==SHIFT_SRL) |
| src = {1'b0,operand,{`HWORD_WIDTH{1'b0}}}; |
| else if (opcode==SHIFT_SRA) |
| src = {operand[`HWORD_WIDTH-1],operand,{`HWORD_WIDTH{1'b0}}}; |
| else |
| src = 'b0; |
| |
| if (opcode==SHIFT_SLL) |
| result = src<<amount; |
| else if ((opcode==SHIFT_SRL)||(opcode==SHIFT_SRA)) |
| result = src>>>amount; |
| else |
| result = 'b0; |
| |
| return result[2*`HWORD_WIDTH-1:0]; |
| endfunction |
| |
| function [2*`WORD_WIDTH-1:0] f_shift32; |
| input SHIFT_e opcode; |
| input logic [`WORD_WIDTH-1:0] operand; |
| input logic [$clog2(`WORD_WIDTH)-1:0] amount; |
| |
| logic signed [2*`WORD_WIDTH:0] src; |
| logic signed [2*`WORD_WIDTH:0] result; |
| |
| if (opcode==SHIFT_SLL) |
| src = {1'b0,operand,{`WORD_WIDTH{1'b0}}}; |
| else if (opcode==SHIFT_SRL) |
| src = {1'b0,operand,{`WORD_WIDTH{1'b0}}}; |
| else if (opcode==SHIFT_SRA) |
| src = {operand[`WORD_WIDTH-1],operand,{`WORD_WIDTH{1'b0}}}; |
| else |
| src = 'b0; |
| |
| if (opcode==SHIFT_SLL) |
| result = src<<amount; |
| else if ((opcode==SHIFT_SRL)||(opcode==SHIFT_SRA)) |
| result = src>>>amount; |
| else |
| result = 'b0; |
| |
| return result[2*`WORD_WIDTH-1:0]; |
| endfunction |
| |
| function [`BYTE_WIDTH-1:0] f_half_add8; |
| // x + cin |
| input logic [`BYTE_WIDTH:0] src_x; |
| input logic cin; |
| |
| logic [`BYTE_WIDTH:0] result; |
| |
| result = cin ? src_x + 1'b1 : src_x; |
| |
| f_half_add8 = result[`BYTE_WIDTH-1:0]; |
| endfunction |
| |
| function [`HWORD_WIDTH:0] f_half_add16; |
| // x + cin |
| input logic [`HWORD_WIDTH:0] src_x; |
| input logic cin; |
| |
| f_half_add16 = cin ? src_x + 1'b1 : src_x; |
| endfunction |
| |
| function [`WORD_WIDTH:0] f_half_add32; |
| // x + cin |
| input logic [`WORD_WIDTH:0] src_x; |
| input logic cin; |
| |
| f_half_add32 = cin ? src_x + 1'b1 : src_x; |
| endfunction |
| |
| endmodule |