Clean lint warning: W416b,W362,W486... Change-Id: Ie5350b57c334c856fee8815411ea03960649bfa5
diff --git a/hdl/verilog/rvv/common/multi_fifo.sv b/hdl/verilog/rvv/common/multi_fifo.sv index a8be9ed..16deccf 100644 --- a/hdl/verilog/rvv/common/multi_fifo.sv +++ b/hdl/verilog/rvv/common/multi_fifo.sv
@@ -121,7 +121,7 @@ endgenerate // dataout always_comb begin - pop_count = pop[0]; + pop_count = {(DEPTH_BITS)'(0), pop[0]}; for (int j=1; j<N; j++) pop_count = pop_count + pop[j]; end @@ -146,7 +146,9 @@ always_ff @(posedge clk) begin if ((i<remain_count)&(|pop)) dataout[i] <= mem[current_rptr_mem[i]]; - else if ((push_seq[current_rptr_psh[i]]&(current_rptr_psh[i]<M))&((|pop)|(|push_seq))) + else if ((push_seq[current_rptr_psh[i]]&(current_rptr_psh[i]<(DEPTH_BITS)'(M)))& + ((|pop)|(|push_seq)) + ) dataout[i] <= datain_seq[current_rptr_psh[i]]; end end @@ -155,7 +157,9 @@ always_ff @(posedge clk) begin if ((i<remain_count)&(|pop)) dataout[i] <= mem[current_rptr_mem[i]]; - else if ((push[current_rptr_psh[i]]&(current_rptr_psh[i]<M))&((|pop)|(|push))) + else if ((push[current_rptr_psh[i]]&(current_rptr_psh[i]<(DEPTH_BITS)'(M)))& + ((|pop)|(|push)) + ) dataout[i] <= datain[current_rptr_psh[i]]; end end @@ -170,7 +174,7 @@ // datain always_comb begin - push_count = push[0]; + push_count = {(DEPTH_BITS)'(0), push[0]}; for (int j=1; j<M; j++) push_count = push_count + push[j]; end
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_addsub.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_addsub.sv index 2d6aa0f..cc35a7e 100644 --- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_addsub.sv +++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_addsub.sv
@@ -1674,9 +1674,9 @@ logic cout; if (opcode==ADDSUB_VADD) - {cout,result} = src_x + src_y + src_cin; + {cout,result} = (`BYTE_WIDTH+1)'(src_x) + (`BYTE_WIDTH+1)'(src_y) + (`BYTE_WIDTH+1)'(src_cin); else //(opcode==ADDSUB_VSUB) - {cout,result} = src_x - src_y - src_cin; + {cout,result} = (`BYTE_WIDTH+1)'(src_x) - (`BYTE_WIDTH+1)'(src_y) - (`BYTE_WIDTH+1)'(src_cin); return {cout,result}; @@ -1726,7 +1726,7 @@ logic [`HWORD_WIDTH:0] res_lo; res_hi = src_x[`WORD_WIDTH-1:`HWORD_WIDTH] + 1'b1; - res_lo = src_x[`HWORD_WIDTH-1:0] + 1'b1; + res_lo = (`HWORD_WIDTH+1)'(src_x[`HWORD_WIDTH-1:0]) + 1'b1; if (res_lo[`HWORD_WIDTH]) return {res_hi,res_lo[`HWORD_WIDTH-1:0]};
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_execution_p1.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_execution_p1.sv index 4251810..dbc75bc 100644 --- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_execution_p1.sv +++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_execution_p1.sv
@@ -32,8 +32,8 @@ generate if(`VLEN==128) begin for(j=0;j<64;j++) begin: GET_VIOTA128 - assign result_data_viota[j] = alu_uop.data_viota_per64[0][j]; - assign result_data_viota[j+64] = {1'b0,alu_uop.data_viota_per64[1][j]} + {1'b0,alu_uop.data_viota_per64[0][63]}; + assign result_data_viota[j] = ($clog2(`VLEN)+1)'(alu_uop.data_viota_per64[0][j]); + assign result_data_viota[j+64] = ($clog2(`VLEN)+1)'({1'b0,alu_uop.data_viota_per64[1][j]} + {1'b0,alu_uop.data_viota_per64[0][63]}); end end endgenerate @@ -53,7 +53,7 @@ endgenerate // vcpop - assign result_data_vcpop = result_data_viota[`VLEN-1]; + assign result_data_vcpop = (`XLEN)'(result_data_viota[`VLEN-1]); // // submit result to ROB @@ -72,7 +72,7 @@ // calculate result data case(alu_uop.alu_sub_opcode) OP_VCPOP: begin - result.w_data = result_data_vcpop; + result.w_data = (`VLEN)'(result_data_vcpop); result.vsaturate = 'b0; end OP_VIOTA: begin @@ -81,17 +81,17 @@ case(alu_uop.vd_eew) EEW8: begin for(int i=0; i<`VLENB;i++) begin - result.w_data[i*`BYTE_WIDTH +: `BYTE_WIDTH] = result_data_viota8[i]; + result.w_data[i*`BYTE_WIDTH +: `BYTE_WIDTH] = (`BYTE_WIDTH)'(result_data_viota8[i]); end end EEW16: begin for(int i=0; i<`VLEN/`HWORD_WIDTH;i++) begin - result.w_data[i*`HWORD_WIDTH +: `HWORD_WIDTH] = result_data_viota16[i]; + result.w_data[i*`HWORD_WIDTH +: `HWORD_WIDTH] = (`HWORD_WIDTH)'(result_data_viota16[i]); end end EEW32: begin for(int i=0; i<`VLEN/`WORD_WIDTH;i++) begin - result.w_data[i*`WORD_WIDTH +: `WORD_WIDTH] = result_data_viota32[i]; + result.w_data[i*`WORD_WIDTH +: `WORD_WIDTH] = (`WORD_WIDTH)'(result_data_viota32[i]); end end endcase
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask.sv index a6d88fb..256d6e3 100644 --- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask.sv +++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask.sv
@@ -191,11 +191,11 @@ result_valid = alu_uop_valid&(vs1_data_valid==1'b0)&vs2_data_valid&((vm==1'b1)||((vm==1'b0)&v0_data_valid)); alu_sub_opcode = OP_VIOTA; // it can get the viota result in one cycle whose element index in vd belongs to 0-31. - // Otherwise, it will get the result in next cycle. + // Otherwise, it will get the result in the next cycle. case(vd_eew) - EEW8 : result_2cycle = uop_index >= 32/(`VLEN/8); - EEW16 : result_2cycle = uop_index >= 32/(`VLEN/16); - default: result_2cycle = uop_index >= 32/(`VLEN/32); //EEW32 + EEW8 : result_2cycle = uop_index >= (`UOP_INDEX_WIDTH)'(32/(`VLEN/8)); + EEW16 : result_2cycle = uop_index >= (`UOP_INDEX_WIDTH)'(32/(`VLEN/16)); + default: result_2cycle = uop_index >= (`UOP_INDEX_WIDTH)'(32/(`VLEN/32)); //EEW32 endcase end VID: begin @@ -330,7 +330,7 @@ else begin for(int i=0;i<`VLEN;i++) begin if (result_data_vmsof[i]==1'b1) - result_data_vfirst = i; // one-hot to 8421BCD. get the index of first 1 + result_data_vfirst = (`VLEN)'(i); // one-hot to 8421BCD. get the index of first 1 end end end @@ -347,24 +347,24 @@ end for(j=0; j<`VLENB;j++) begin: GET_VIOTA8 - if ($clog2(32/`VLENB)<=3) // There may be up to 8 uops, so RHS in if-condition is $clog2(8)=3 - assign result_data_viota8[j] = data_viota_per32[0][{alu_uop.uop_index[$clog2(32/`VLENB)-1:0],j[$clog2(`VLENB)-1:0]}]; + if ((3)'($clog2(32/`VLENB)) <= 3'd3) // There may be up to 8 uops, so RHS in if-condition is $clog2(8)=3 + assign result_data_viota8[j] = ($clog2(`VLEN)+1)'(data_viota_per32[0][{alu_uop.uop_index[$clog2(32/`VLENB)-1:0],j[$clog2(`VLENB)-1:0]}]); else - assign result_data_viota8[j] = data_viota_per32[0][{alu_uop.uop_index[2:0],j[$clog2(`VLENB)-1:0]}]; + assign result_data_viota8[j] = ($clog2(`VLEN)+1)'(data_viota_per32[0][{alu_uop.uop_index[2:0],j[$clog2(`VLENB)-1:0]}]); end for(j=0; j<`VLEN/`HWORD_WIDTH;j++) begin: GET_VIOTA16 - if ($clog2(32/(`VLEN/`HWORD_WIDTH))<=3) - assign result_data_viota16[j] = data_viota_per32[0][{alu_uop.uop_index[$clog2(32/(`VLEN/`HWORD_WIDTH))-1:0],j[$clog2(`VLEN/`HWORD_WIDTH)-1:0]}]; + if ((3)'($clog2(32/(`VLEN/`HWORD_WIDTH))) <= 3'd3) + assign result_data_viota16[j] = ($clog2(`VLEN)+1)'(data_viota_per32[0][{alu_uop.uop_index[$clog2(32/(`VLEN/`HWORD_WIDTH))-1:0],j[$clog2(`VLEN/`HWORD_WIDTH)-1:0]}]); else - assign result_data_viota16[j] = data_viota_per32[0][{alu_uop.uop_index[2:0],j[$clog2(`VLEN/`HWORD_WIDTH)-1:0]}]; + assign result_data_viota16[j] = ($clog2(`VLEN)+1)'(data_viota_per32[0][{alu_uop.uop_index[2:0],j[$clog2(`VLEN/`HWORD_WIDTH)-1:0]}]); end for(j=0; j<`VLEN/`WORD_WIDTH;j++) begin: GET_VIOTA32 - if ($clog2(32/(`VLEN/`WORD_WIDTH))<=3) - assign result_data_viota32[j] = data_viota_per32[0][{alu_uop.uop_index[$clog2(32/(`VLEN/`WORD_WIDTH))-1:0],j[$clog2(`VLEN/`WORD_WIDTH)-1:0]}]; + if ((3)'($clog2(32/(`VLEN/`WORD_WIDTH))) <= 3'd3) + assign result_data_viota32[j] = ($clog2(`VLEN)+1)'(data_viota_per32[0][{alu_uop.uop_index[$clog2(32/(`VLEN/`WORD_WIDTH))-1:0],j[$clog2(`VLEN/`WORD_WIDTH)-1:0]}]); else - assign result_data_viota32[j] = data_viota_per32[0][{alu_uop.uop_index[2:0],j[$clog2(`VLEN/`WORD_WIDTH)-1:0]}]; + assign result_data_viota32[j] = ($clog2(`VLEN)+1)'(data_viota_per32[0][{alu_uop.uop_index[2:0],j[$clog2(`VLEN/`WORD_WIDTH)-1:0]}]); end for(j=0;j<`VLEN/64;j++) begin: GET_VIOTA_PER64_J @@ -378,19 +378,19 @@ // vid generate for(j=0;j<`VLENB;j++) begin: GET_VID8 - assign result_data_vid8[j*`BYTE_WIDTH +: `BYTE_WIDTH] = {uop_index, j[$clog2(`VLENB)-1:0]}; + assign result_data_vid8[j*`BYTE_WIDTH +: `BYTE_WIDTH] = (`BYTE_WIDTH)'({uop_index, j[$clog2(`VLENB)-1:0]}); end endgenerate generate for(j=0;j<`VLEN/`HWORD_WIDTH;j++) begin: GET_VID16 - assign result_data_vid16[j*`HWORD_WIDTH +: `HWORD_WIDTH] = {uop_index, j[$clog2(`VLEN/`HWORD_WIDTH)-1:0]}; + assign result_data_vid16[j*`HWORD_WIDTH +: `HWORD_WIDTH] = (`HWORD_WIDTH)'({uop_index, j[$clog2(`VLEN/`HWORD_WIDTH)-1:0]}); end endgenerate generate for(j=0;j<`VLEN/`WORD_WIDTH;j++) begin: GET_VID32 - assign result_data_vid32[j*`WORD_WIDTH +: `WORD_WIDTH] = {uop_index, j[$clog2(`VLEN/`WORD_WIDTH)-1:0]}; + assign result_data_vid32[j*`WORD_WIDTH +: `WORD_WIDTH] = (`WORD_WIDTH)'({uop_index, j[$clog2(`VLEN/`WORD_WIDTH)-1:0]}); end endgenerate @@ -464,17 +464,17 @@ case(vd_eew) EEW8: begin for(int i=0; i<`VLENB;i++) begin - result_data[i*`BYTE_WIDTH +: `BYTE_WIDTH] = result_data_viota8[i]; + result_data[i*`BYTE_WIDTH +: `BYTE_WIDTH] = (`BYTE_WIDTH)'(result_data_viota8[i]); end end EEW16: begin for(int i=0; i<`VLEN/`HWORD_WIDTH;i++) begin - result_data[i*`HWORD_WIDTH +: `HWORD_WIDTH] = result_data_viota16[i]; + result_data[i*`HWORD_WIDTH +: `HWORD_WIDTH] = (`HWORD_WIDTH)'(result_data_viota16[i]); end end EEW32: begin for(int i=0; i<`VLEN/`WORD_WIDTH;i++) begin - result_data[i*`WORD_WIDTH +: `WORD_WIDTH] = result_data_viota32[i]; + result_data[i*`WORD_WIDTH +: `WORD_WIDTH] = (`WORD_WIDTH)'(result_data_viota32[i]); end end endcase @@ -502,7 +502,7 @@ // // submit result to ROB // - assign vstart_onehot = 1'b1<<vstart; + assign vstart_onehot = (`VLEN)'('b1)<<vstart; assign vstart_onehot_sub1 = vstart_onehot - 1'b1; always_comb begin
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask_viota.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask_viota.sv index 30a9876..3d3b5cd 100644 --- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask_viota.sv +++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask_viota.sv
@@ -143,10 +143,11 @@ generate for(j=0;j<7;j++) begin: GET_VIOTA32_27_0 - assign result_viota32[j] = result_viota7[0][j]; - assign result_viota32[j+7] = result_viota7[1][j]+result_viota7[0][6]; - assign result_viota32[j+14] = sum_20to14[j]+{carry_20to14[j],1'b0}; - assign result_viota32[j+21] = sum_27to21[j]+{({1'b0,carry_27to21[j]}+{1'b0,cout_27to21[j]}),1'b0}; + assign result_viota32[j] = ($clog2(32)+1)'(result_viota7[0][j]); + assign result_viota32[j+7] = ($clog2(32)+1)'(result_viota7[1][j])+($clog2(32)+1)'(result_viota7[0][6]); + assign result_viota32[j+14] = ($clog2(32)+1)'(sum_20to14[j])+($clog2(32)+1)'({carry_20to14[j],1'b0}); + assign result_viota32[j+21] = ($clog2(32)+1)'(sum_27to21[j])+($clog2(32)+1)'({({1'b0,carry_27to21[j]})+($clog2(32)+1)'({1'b0,cout_27to21[j]}),1'b0}); + compressor_3_2 #( @@ -179,7 +180,9 @@ end for(j=0;j<4;j++) begin: GET_VIOTA32_31_28 - assign result_viota32[j+28] = sum_31to28[j]+{({1'b0,carry_31to28[j]}+{1'b0,cout_31to28[j]}),1'b0}; + assign result_viota32[j+28] = ($clog2(32)+1)'(sum_31to28[j])+ + ($clog2(32)+1)'({({1'b0,carry_31to28[j]})+ + ($clog2(32)+1)'({1'b0,cout_31to28[j]}),1'b0}); compressor_4_2 #(
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_other.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_other.sv index a9fd507..b9d5498 100644 --- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_other.sv +++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_other.sv
@@ -371,36 +371,36 @@ VZEXT_VF2: begin case(vs2_eew) EEW8: begin - result_data_extend[(2*j )*`HWORD_WIDTH +: `HWORD_WIDTH] = src2_data[(2*j )*`BYTE_WIDTH +: `BYTE_WIDTH]; - result_data_extend[(2*j+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = src2_data[(2*j+1)*`BYTE_WIDTH +: `BYTE_WIDTH]; + result_data_extend[(2*j )*`HWORD_WIDTH +: `HWORD_WIDTH] = {8'b0, src2_data[(2*j )*`BYTE_WIDTH +: `BYTE_WIDTH]}; + result_data_extend[(2*j+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = {8'b0, src2_data[(2*j+1)*`BYTE_WIDTH +: `BYTE_WIDTH]}; end EEW16: begin - result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = src2_data[j*`HWORD_WIDTH +: `HWORD_WIDTH]; + result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = {16'b0, src2_data[j*`HWORD_WIDTH +: `HWORD_WIDTH]}; end endcase end VSEXT_VF2: begin case(vs2_eew) EEW8: begin - result_data_extend[(2*j )*`HWORD_WIDTH +: `HWORD_WIDTH] = $signed(src2_data[(2*j )*`BYTE_WIDTH +: `BYTE_WIDTH]); - result_data_extend[(2*j+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = $signed(src2_data[(2*j+1)*`BYTE_WIDTH +: `BYTE_WIDTH]); + result_data_extend[(2*j )*`HWORD_WIDTH +: `HWORD_WIDTH] = {{8{src2_data[(2*j+1)*`BYTE_WIDTH-1]}}, src2_data[(2*j )*`BYTE_WIDTH +: `BYTE_WIDTH]}; + result_data_extend[(2*j+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = {{8{src2_data[(2*j+2)*`BYTE_WIDTH-1]}}, src2_data[(2*j+1)*`BYTE_WIDTH +: `BYTE_WIDTH]}; end EEW16: begin - result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = $signed(src2_data[j*`HWORD_WIDTH +: `HWORD_WIDTH]); + result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = {{16{src2_data[(j+1)*`HWORD_WIDTH-1]}},src2_data[j*`HWORD_WIDTH +: `HWORD_WIDTH]}; end endcase end VZEXT_VF4: begin case(vs2_eew) EEW8: begin - result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = src2_data[j*`BYTE_WIDTH +: `BYTE_WIDTH]; + result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = {24'b0, src2_data[j*`BYTE_WIDTH +: `BYTE_WIDTH]}; end endcase end VSEXT_VF4: begin case(vs2_eew) EEW8: begin - result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = $signed(src2_data[j*`BYTE_WIDTH +: `BYTE_WIDTH]); + result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = {{24{src2_data[(j+1)*`BYTE_WIDTH-1]}}, src2_data[j*`BYTE_WIDTH +: `BYTE_WIDTH]}; end endcase end
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_shift.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_shift.sv index ed55d3e..b84b564 100755 --- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_shift.sv +++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_shift.sv
@@ -168,11 +168,11 @@ end for(int i=`VLENB/2;i<`VLENB*3/4;i=i+1) begin src2_data16[ i-`VLENB/2] = {8'b0,vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; - shift_amount16[i-`VLENB/2] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]; + shift_amount16[i-`VLENB/2] = {1'b0,vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]}; end for(int i=`VLENB*3/4;i<`VLENB;i=i+1) begin src2_data32[ i-`VLENB*3/4] = {24'b0,vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; - shift_amount32[i-`VLENB*3/4] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]; + shift_amount32[i-`VLENB*3/4] = {2'b0, vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]}; end end EEW16: begin @@ -182,7 +182,7 @@ end for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {16'b0,vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; - shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[i*`HWORD_WIDTH +: $clog2(`HWORD_WIDTH)]; + shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, vs1_data[i*`HWORD_WIDTH +: $clog2(`HWORD_WIDTH)]}; end end EEW32: begin @@ -204,11 +204,11 @@ end for(int i=`VLENB/2;i<`VLENB*3/4;i=i+1) begin src2_data16[ i-`VLENB/2] = {{8{vs2_data[(i+1)*`BYTE_WIDTH-1]}},vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; - shift_amount16[i-`VLENB/2] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]; + shift_amount16[i-`VLENB/2] = {1'b0,vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]}; end for(int i=`VLENB*3/4;i<`VLENB;i=i+1) begin src2_data32[ i-`VLENB*3/4] = {{24{vs2_data[(i+1)*`BYTE_WIDTH-1]}},vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; - shift_amount32[i-`VLENB*3/4] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]; + shift_amount32[i-`VLENB*3/4] = {2'b0,vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]}; end end EEW16: begin @@ -218,7 +218,7 @@ end for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {{16{vs2_data[(i+1)*`HWORD_WIDTH-1]}},vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; - shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[i*`HWORD_WIDTH +: $clog2(`HWORD_WIDTH)]; + shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0,vs1_data[i*`HWORD_WIDTH +: $clog2(`HWORD_WIDTH)]}; end end EEW32: begin @@ -244,9 +244,9 @@ for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {16'b0,vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; if (uop_index[0]==1'b0) - shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]; + shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, vs1_data[i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]}; else - shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[`VLEN/2+i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]; + shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, vs1_data[`VLEN/2+i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]}; end end EEW32: begin @@ -275,9 +275,9 @@ for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin src2_data32[i-`VLEN/`HWORD_WIDTH/2] = {{16{vs2_data[(i+1)*`HWORD_WIDTH-1]}},vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; if (uop_index[0]==1'b0) - shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]; + shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, vs1_data[i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]}; else - shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[`VLEN/2+i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]; + shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, vs1_data[`VLEN/2+i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]}; end end EEW32: begin @@ -308,11 +308,11 @@ end for(int i=`VLENB/2;i<`VLENB*3/4;i=i+1) begin src2_data16[ i-`VLENB/2] = {8'b0,vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; - shift_amount16[i-`VLENB/2] = rs1_data[0 +: $clog2(`BYTE_WIDTH)]; + shift_amount16[i-`VLENB/2] = {1'b0,rs1_data[0 +: $clog2(`BYTE_WIDTH)]}; end for(int i=`VLENB*3/4;i<`VLENB;i=i+1) begin src2_data32[ i-`VLENB*3/4] = {24'b0,vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; - shift_amount32[i-`VLENB*3/4] = rs1_data[0 +: $clog2(`BYTE_WIDTH)]; + shift_amount32[i-`VLENB*3/4] = {2'b0, rs1_data[0 +: $clog2(`BYTE_WIDTH)]}; end end EEW16: begin @@ -322,7 +322,7 @@ end for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {16'b0,vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; - shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = rs1_data[0 +: $clog2(`HWORD_WIDTH)]; + shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, rs1_data[0 +: $clog2(`HWORD_WIDTH)]}; end end EEW32: begin @@ -344,11 +344,11 @@ end for(int i=`VLENB/2;i<`VLENB*3/4;i=i+1) begin src2_data16[ i-`VLENB/2] = {{8{vs2_data[(i+1)*`BYTE_WIDTH-1]}},vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; - shift_amount16[i-`VLENB/2] = rs1_data[0 +: $clog2(`BYTE_WIDTH)]; + shift_amount16[i-`VLENB/2] = {1'b0,rs1_data[0 +: $clog2(`BYTE_WIDTH)]}; end for(int i=`VLENB*3/4;i<`VLENB;i=i+1) begin src2_data32[ i-`VLENB*3/4] = {{24{vs2_data[(i+1)*`BYTE_WIDTH-1]}},vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]}; - shift_amount32[i-`VLENB*3/4] = rs1_data[0 +: $clog2(`BYTE_WIDTH)]; + shift_amount32[i-`VLENB*3/4] = {2'b0,rs1_data[0 +: $clog2(`BYTE_WIDTH)]}; end end EEW16: begin @@ -358,7 +358,7 @@ end for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {{16{vs2_data[(i+1)*`HWORD_WIDTH-1]}},vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; - shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = rs1_data[0 +: $clog2(`HWORD_WIDTH)]; + shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0,rs1_data[0 +: $clog2(`HWORD_WIDTH)]}; end end EEW32: begin @@ -380,7 +380,7 @@ end for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {16'b0,vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; - shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = rs1_data[0 +: $clog2(`HWORD_WIDTH)]; + shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, rs1_data[0 +: $clog2(`HWORD_WIDTH)]}; end end EEW32: begin @@ -402,7 +402,7 @@ end for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {{16{vs2_data[(i+1)*`HWORD_WIDTH-1]}},vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]}; - shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = rs1_data[0 +: $clog2(`HWORD_WIDTH)]; + shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0,rs1_data[0 +: $clog2(`HWORD_WIDTH)]}; end end EEW32: begin
diff --git a/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv b/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv index ee86f5f..61b3e2c 100644 --- a/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv +++ b/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv
@@ -3616,17 +3616,14 @@ uop_index_max = 'b0; case(emul_max) - EMUL1: begin - uop_index_max = 'b0; - end EMUL2: begin - uop_index_max = 'd1; + uop_index_max = (`UOP_INDEX_WIDTH)'('d1); end EMUL4: begin - uop_index_max = 'd3; + uop_index_max = (`UOP_INDEX_WIDTH)'('d3); end EMUL8: begin - uop_index_max = 'd7; + uop_index_max = (`UOP_INDEX_WIDTH)'('d7); end endcase end
diff --git a/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv b/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv index e4af58f..a940fe2 100644 --- a/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv +++ b/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv
@@ -3049,7 +3049,7 @@ // calculate the uop_index used in decoding uops generate for(j=0;j<`NUM_DE_UOP;j++) begin: GET_UOP_INDEX - assign uop_index_current[j] = j[`UOP_INDEX_WIDTH-1:0]+uop_index_base; + assign uop_index_current[j] = (`UOP_INDEX_WIDTH+1)'(j[`UOP_INDEX_WIDTH-1:0]+uop_index_base); end endgenerate @@ -3061,29 +3061,26 @@ uop_index_max = 'b0; case(emul_max) - EMUL1: begin - uop_index_max = 'd0; - end EMUL2: begin - uop_index_max = 'd1; + uop_index_max = (`UOP_INDEX_WIDTH)'('d1); end EMUL3: begin - uop_index_max = 'd2; + uop_index_max = (`UOP_INDEX_WIDTH)'('d2); end EMUL4: begin - uop_index_max = 'd3; + uop_index_max = (`UOP_INDEX_WIDTH)'('d3); end EMUL5: begin - uop_index_max = 'd4; + uop_index_max = (`UOP_INDEX_WIDTH)'('d4); end EMUL6: begin - uop_index_max = 'd5; + uop_index_max = (`UOP_INDEX_WIDTH)'('d5); end EMUL7: begin - uop_index_max = 'd6; + uop_index_max = (`UOP_INDEX_WIDTH)'('d6); end EMUL8: begin - uop_index_max = 'd7; + uop_index_max = (`UOP_INDEX_WIDTH)'('d7); end endcase end
diff --git a/hdl/verilog/rvv/design/rvv_backend_dispatch.sv b/hdl/verilog/rvv/design/rvv_backend_dispatch.sv index ed76dc5..e7ef053 100755 --- a/hdl/verilog/rvv/design/rvv_backend_dispatch.sv +++ b/hdl/verilog/rvv/design/rvv_backend_dispatch.sv
@@ -150,7 +150,7 @@ generate for (i=0; i<`NUM_DP_UOP; i++) begin : gen_vlmax assign vlmax_shift[i] ={1'b0, uop_uop2dp[i].vector_csr.lmul[1:0]} + $clog2(`VLENB) - uop_uop2dp[i].vector_csr.sew - {uop_uop2dp[i].vector_csr.lmul[2],2'b00}; - assign vlmax[i] = 'h1 << vlmax_shift[i]; + assign vlmax[i] = (`VL_WIDTH)'(1) << vlmax_shift[i]; end endgenerate
diff --git a/hdl/verilog/rvv/design/rvv_backend_dispatch_opr_byte_type.sv b/hdl/verilog/rvv/design/rvv_backend_dispatch_opr_byte_type.sv index 770243a..2db3a49 100644 --- a/hdl/verilog/rvv/design/rvv_backend_dispatch_opr_byte_type.sv +++ b/hdl/verilog/rvv/design/rvv_backend_dispatch_opr_byte_type.sv
@@ -86,7 +86,7 @@ always_comb begin case (uop_info.uop_exe_unit) RDT:begin - uop_vs2_start = uop_info.uop_index << (VLENB_WIDTH - vs2_eew_shift); + uop_vs2_start = (`VSTART_WIDTH)'(uop_info.uop_index) << (VLENB_WIDTH - vs2_eew_shift); end default:begin case({eew_max,uop_info.vs2_eew}) @@ -94,16 +94,16 @@ {EEW16,EEW16}, {EEW8,EEW8}: begin // regular and narrowing instruction - uop_vs2_start = uop_info.uop_index << (VLENB_WIDTH - vs2_eew_shift); + uop_vs2_start = (`VSTART_WIDTH)'(uop_info.uop_index) << (VLENB_WIDTH - vs2_eew_shift); end {EEW32,EEW16}, {EEW16,EEW8}: begin // widening instruction: EEW_vd:EEW_vs = 2:1 - uop_vs2_start = uop_info.uop_index[`UOP_INDEX_WIDTH-1:1] << (VLENB_WIDTH - vs2_eew_shift); + uop_vs2_start = (`VSTART_WIDTH)'(uop_info.uop_index[`UOP_INDEX_WIDTH-1:1]) << (VLENB_WIDTH - vs2_eew_shift); end {EEW32,EEW8}: begin // widening instruction: EEW_vd:EEW_vs = 4:1 - uop_vs2_start = uop_info.uop_index[`UOP_INDEX_WIDTH-1:2] << (VLENB_WIDTH - vs2_eew_shift); + uop_vs2_start = (`VSTART_WIDTH)'(uop_info.uop_index[`UOP_INDEX_WIDTH-1:2]) << (VLENB_WIDTH - vs2_eew_shift); end default: begin uop_vs2_start = 'b0; @@ -118,7 +118,7 @@ for (i=0; i<`VLENB; i++) begin : gen_vs2_byte_type // ele_index = uop_index * (VLEN/vs2_eew) + BYTE_INDEX[MSB:vs2_eew] assign vs2_enable[i] = uop_info.vm ? 1'b1 : vs2_enable_tmp[i >> vs2_eew_shift]; - assign vs2_ele_index[i] = uop_vs2_start + (i >> vs2_eew_shift); + assign vs2_ele_index[i] = (`VL_WIDTH)'(uop_vs2_start) + (i >> vs2_eew_shift); always_comb begin if (uop_info.ignore_vta&uop_info.ignore_vma) vs2[i] = BODY_ACTIVE; @@ -127,8 +127,7 @@ else if (vs2_ele_index[i] < {1'b0, uop_info.vstart}) vs2[i] = NOT_CHANGE; // prestart else begin - vs2[i] = (vs2_enable[i] || uop_info.ignore_vma) ? BODY_ACTIVE - : BODY_INACTIVE; + vs2[i] = (vs2_enable[i] || uop_info.ignore_vma) ? BODY_ACTIVE : BODY_INACTIVE; end end end @@ -150,21 +149,21 @@ {EEW32,EEW32}, {EEW16,EEW16}, {EEW8,EEW8}: begin - uop_v0_start = uop_info.uop_index << (VLENB_WIDTH - vd_eew_shift); + uop_v0_start = (`VSTART_WIDTH)'(uop_info.uop_index) << (VLENB_WIDTH - vd_eew_shift); uop_vd_start = uop_v0_start; uop_vd_end = uop_vd_start + (`VLENB >> eew_max_shift) - 1'b1; end {EEW32,EEW16}, {EEW16,EEW8}: begin // narrowing instruction: EEW_vd:EEW_vs = 1:2 - uop_v0_start = uop_info.uop_index[`UOP_INDEX_WIDTH-1:1] << (VLENB_WIDTH - vd_eew_shift); + uop_v0_start = (`VSTART_WIDTH)'(uop_info.uop_index[`UOP_INDEX_WIDTH-1:1]) << (VLENB_WIDTH - vd_eew_shift); uop_vd_start = uop_info.uop_index[0] ? uop_v0_start + (`VLENB >> eew_max_shift): uop_v0_start; uop_vd_end = uop_vd_start + (`VLENB >> eew_max_shift) - 1'b1 ; end {EEW32,EEW8}: begin // narrowing instruction: EEW_vd:EEW_vs = 1:4 - uop_v0_start = uop_info.uop_index[`UOP_INDEX_WIDTH-1:2] << VLENB_WIDTH; + uop_v0_start = (`VSTART_WIDTH)'(uop_info.uop_index[`UOP_INDEX_WIDTH-1:2]) << VLENB_WIDTH; case(uop_info.uop_index[1:0]) 2'd3: begin uop_vd_start = uop_v0_start + `VLENB*3/4; @@ -195,7 +194,7 @@ if (i==0) begin // ele_index = uop_index * (VLEN/vd_eew) + BYTE_INDEX[MSB:vd_eew] assign vd_enable[0] = uop_info.vm ? 1'b1 : vd_enable_tmp[0]; - assign vd_ele_index[0] = uop_v0_start; + assign vd_ele_index[0] = (`VL_WIDTH)'(uop_v0_start); always_comb begin v0_strobe[0] = 'b0; @@ -228,7 +227,7 @@ end else begin // ele_index = uop_index * (VLEN/vd_eew) + BYTE_INDEX[MSB:vd_eew] assign vd_enable[i] = uop_info.vm ? 1'b1 : vd_enable_tmp[i >> vd_eew_shift]; - assign vd_ele_index[i] = uop_v0_start + (i >> vd_eew_shift); + assign vd_ele_index[i] = (`VL_WIDTH)'(uop_v0_start) + (i >> vd_eew_shift); always_comb begin v0_strobe[i] = 'b0;
diff --git a/hdl/verilog/rvv/design/rvv_backend_div_unit.sv b/hdl/verilog/rvv/design/rvv_backend_div_unit.sv index 77a66bd..d60697e 100755 --- a/hdl/verilog/rvv/design/rvv_backend_div_unit.sv +++ b/hdl/verilog/rvv/design/rvv_backend_div_unit.sv
@@ -319,7 +319,7 @@ for(j=0;j<`VLENB/2;j++) begin: DIVIDER8 rvv_backend_div_unit_divider #( - .DIV_WIDTH (`BYTE_WIDTH) + .DIV_WIDTH (8'd`BYTE_WIDTH) ) divider_8bit ( @@ -342,7 +342,7 @@ for(j=0;j<`VLEN/`HWORD_WIDTH/2;j++) begin: DIVIDER16 rvv_backend_div_unit_divider #( - .DIV_WIDTH (`HWORD_WIDTH) + .DIV_WIDTH (8'd`HWORD_WIDTH) ) divider_16bit ( @@ -365,7 +365,7 @@ for(j=0;j<`VLEN/`WORD_WIDTH;j++) begin: DIVIDER32 rvv_backend_div_unit_divider #( - .DIV_WIDTH (`WORD_WIDTH) + .DIV_WIDTH (8'd`WORD_WIDTH) ) divider_32bit (
diff --git a/hdl/verilog/rvv/design/rvv_backend_div_unit_divider.sv b/hdl/verilog/rvv/design/rvv_backend_div_unit_divider.sv index a25a32a..336feba 100755 --- a/hdl/verilog/rvv/design/rvv_backend_div_unit_divider.sv +++ b/hdl/verilog/rvv/design/rvv_backend_div_unit_divider.sv
@@ -20,16 +20,13 @@ result_quotient, result_remainder, result_valid, -`ifdef TB_SUPPORT - res_reuse_valid_p1, -`endif result_ready, trap_flush_rvv ); // // parameter // - parameter DIV_WIDTH = `WORD_WIDTH; + parameter logic[7:0] DIV_WIDTH = `WORD_WIDTH; // // interface signals @@ -50,9 +47,6 @@ output logic [DIV_WIDTH-1:0] result_quotient; output logic [DIV_WIDTH-1:0] result_remainder; output logic result_valid; -`ifdef TB_SUPPORT - output logic res_reuse_valid_p1; -`endif input logic result_ready; // trap-flush @@ -202,19 +196,6 @@ .q (r_sgn_q) ); -`ifdef TB_SUPPORT - always_ff @(posedge clk, negedge rst_n) begin - if(rst_n=='b0) - res_reuse_valid_p1 = 'b0; - else if(next_state==DIV_IDLE) - res_reuse_valid_p1 = 'b0; - else if((state==DIV_IDLE)&div_valid) - res_reuse_valid_p1 = res_reuse_valid_p0; - else - res_reuse_valid_p1 = res_reuse_valid_p1; - end -`endif - // // FSM // @@ -266,17 +247,17 @@ endcase end - // computational logic in every state + // count leading zero generate - if (DIV_WIDTH==`WORD_WIDTH) begin + if (DIV_WIDTH== 'd`WORD_WIDTH) begin assign clzb = f_clzb32(dividend_d); assign count_shift = 'd33 - clzb; end - else if (DIV_WIDTH==`HWORD_WIDTH) begin + else if (DIV_WIDTH== 'd`HWORD_WIDTH) begin assign clzb = f_clzb16(dividend_d); assign count_shift = 'd17 - clzb; end - else if (DIV_WIDTH==`BYTE_WIDTH) begin + else if (DIV_WIDTH== 'd`BYTE_WIDTH) begin assign clzb = f_clzb8(dividend_d); assign count_shift = 'd9 - clzb; end
diff --git a/hdl/verilog/rvv/design/rvv_backend_mac_unit.sv b/hdl/verilog/rvv/design/rvv_backend_mac_unit.sv index d356171..6ae4f74 100644 --- a/hdl/verilog/rvv/design/rvv_backend_mac_unit.sv +++ b/hdl/verilog/rvv/design/rvv_backend_mac_unit.sv
@@ -726,12 +726,12 @@ mac_rslt_full_eew8_d1[i*4+j][7+:8] + {7'b0,vsmul_round_incr_eew8_d1[i*4+j]};//right shift 7bit then +"1" vsmul_sat_eew8_d1[i*4+j] = mac_rslt_full_eew8_d1[i*4+j][15:14] == 2'b01; //Below are for vmac related instructions - vmac_mul_add_eew8_no_widen_d1[i*4+j] = mac_addsrc_d1[8*(i*4+j) +: 8] + mac_rslt_eew8_no_widen_d1[8*(i*4+j) +: 8];//9bit - vmac_mul_sub_eew8_no_widen_d1[i*4+j] = mac_addsrc_d1[8*(i*4+j) +: 8] - mac_rslt_eew8_no_widen_d1[8*(i*4+j) +: 8]; + vmac_mul_add_eew8_no_widen_d1[i*4+j] = {1'b0,mac_addsrc_d1[8*(i*4+j) +: 8]} + {1'b0,mac_rslt_eew8_no_widen_d1[8*(i*4+j) +: 8]};//9bit + vmac_mul_sub_eew8_no_widen_d1[i*4+j] = {1'b0,mac_addsrc_d1[8*(i*4+j) +: 8]} - {1'b0,mac_rslt_eew8_no_widen_d1[8*(i*4+j) +: 8]}; vmac_rslt_eew8_no_widen_d1[8*(i*4+j) +:8] = mac_mul_reverse_d1 ? vmac_mul_sub_eew8_no_widen_d1[i*4+j][7:0] : vmac_mul_add_eew8_no_widen_d1[i*4+j][7:0]; - vmac_mul_add_eew8_widen_d1[i*4+j] = mac_addsrc_widen_d1[16*(i*4+j) +: 16] + mac_rslt_eew8_widen_d1[16*(i*4+j) +: 16];//17bit - vmac_mul_sub_eew8_widen_d1[i*4+j] = mac_addsrc_widen_d1[16*(i*4+j) +: 16] - mac_rslt_eew8_widen_d1[16*(i*4+j) +: 16]; + vmac_mul_add_eew8_widen_d1[i*4+j] = {1'b0,mac_addsrc_widen_d1[16*(i*4+j) +: 16]} + {1'b0,mac_rslt_eew8_widen_d1[16*(i*4+j) +: 16]};//17bit + vmac_mul_sub_eew8_widen_d1[i*4+j] = {1'b0,mac_addsrc_widen_d1[16*(i*4+j) +: 16]} - {1'b0,mac_rslt_eew8_widen_d1[16*(i*4+j) +: 16]}; vmac_rslt_eew8_widen_d1[16*(i*4+j) +: 16] = mac_mul_reverse_d1 ? vmac_mul_sub_eew8_widen_d1[i*4+j][15:0] : vmac_mul_add_eew8_widen_d1[i*4+j][15:0]; end @@ -765,12 +765,12 @@ mac_rslt_full_eew16_d1[i*2+j][15+:16] + {15'b0,vsmul_round_incr_eew16_d1[i*2+j]};//right shift 15bit then +"1" vsmul_sat_eew16_d1[i*2+j] = mac_rslt_full_eew16_d1[i*2+j][31:30] == 2'b01; //Below are for vmac related instructions - vmac_mul_add_eew16_no_widen_d1[i*2+j] = mac_addsrc_d1[16*(i*2+j) +: 16] + mac_rslt_eew16_no_widen_d1[16*(i*2+j) +: 16];//17bit - vmac_mul_sub_eew16_no_widen_d1[i*2+j] = mac_addsrc_d1[16*(i*2+j) +: 16] - mac_rslt_eew16_no_widen_d1[16*(i*2+j) +: 16]; + vmac_mul_add_eew16_no_widen_d1[i*2+j] = {1'b0,mac_addsrc_d1[16*(i*2+j) +: 16]} + {1'b0,mac_rslt_eew16_no_widen_d1[16*(i*2+j) +: 16]};//17bit + vmac_mul_sub_eew16_no_widen_d1[i*2+j] = {1'b0,mac_addsrc_d1[16*(i*2+j) +: 16]} - {1'b0,mac_rslt_eew16_no_widen_d1[16*(i*2+j) +: 16]}; vmac_rslt_eew16_no_widen_d1[16*(i*2+j) +:16] = mac_mul_reverse_d1 ? vmac_mul_sub_eew16_no_widen_d1[i*2+j][15:0] : vmac_mul_add_eew16_no_widen_d1[i*2+j][15:0]; - vmac_mul_add_eew16_widen_d1[i*2+j] = mac_addsrc_widen_d1[32*(i*2+j) +: 32] + mac_rslt_eew16_widen_d1[32*(i*2+j) +: 32];//33bit - vmac_mul_sub_eew16_widen_d1[i*2+j] = mac_addsrc_widen_d1[32*(i*2+j) +: 32] - mac_rslt_eew16_widen_d1[32*(i*2+j) +: 32]; + vmac_mul_add_eew16_widen_d1[i*2+j] = {1'b0,mac_addsrc_widen_d1[32*(i*2+j) +: 32]} + {1'b0,mac_rslt_eew16_widen_d1[32*(i*2+j) +: 32]};//33bit + vmac_mul_sub_eew16_widen_d1[i*2+j] = {1'b0,mac_addsrc_widen_d1[32*(i*2+j) +: 32]} - {1'b0,mac_rslt_eew16_widen_d1[32*(i*2+j) +: 32]}; vmac_rslt_eew16_widen_d1[32*(i*2+j) +: 32] = mac_mul_reverse_d1 ? vmac_mul_sub_eew16_widen_d1[i*2+j][31:0] : vmac_mul_add_eew16_widen_d1[i*2+j][31:0]; end @@ -822,12 +822,12 @@ mac_rslt_full_eew32_d1[i][31+:32] + {31'b0,vsmul_round_incr_eew32_d1[i]};//right shift 31bit then +"1" vsmul_sat_eew32_d1[i] = mac_rslt_full_eew32_d1[i][63:62] == 2'b01; //Below are for vmac related instructions - vmac_mul_add_eew32_no_widen_d1[i] = mac_addsrc_d1[32*i +: 32] + mac_rslt_eew32_no_widen_d1[32*i +: 32];//33bit - vmac_mul_sub_eew32_no_widen_d1[i] = mac_addsrc_d1[32*i +: 32] - mac_rslt_eew32_no_widen_d1[32*i +: 32]; + vmac_mul_add_eew32_no_widen_d1[i] = {1'b0,mac_addsrc_d1[32*i +: 32]} + {1'b0,mac_rslt_eew32_no_widen_d1[32*i +: 32]};//33bit + vmac_mul_sub_eew32_no_widen_d1[i] = {1'b0,mac_addsrc_d1[32*i +: 32]} - {1'b0,mac_rslt_eew32_no_widen_d1[32*i +: 32]}; vmac_rslt_eew32_no_widen_d1[32*i +:32] = mac_mul_reverse_d1 ? vmac_mul_sub_eew32_no_widen_d1[i][31:0] : vmac_mul_add_eew32_no_widen_d1[i][31:0]; - vmac_mul_add_eew32_widen_d1[i] = mac_addsrc_widen_d1[64*i +: 64] + mac_rslt_eew32_widen_d1[64*i +: 64];//65bit - vmac_mul_sub_eew32_widen_d1[i] = mac_addsrc_widen_d1[64*i +: 64] - mac_rslt_eew32_widen_d1[64*i +: 64]; + vmac_mul_add_eew32_widen_d1[i] = {1'b0,mac_addsrc_widen_d1[64*i +: 64]} + {1'b0,mac_rslt_eew32_widen_d1[64*i +: 64]};//65bit + vmac_mul_sub_eew32_widen_d1[i] = {1'b0,mac_addsrc_widen_d1[64*i +: 64]} - {1'b0,mac_rslt_eew32_widen_d1[64*i +: 64]}; vmac_rslt_eew32_widen_d1[64*i +: 64] = mac_mul_reverse_d1 ? vmac_mul_sub_eew32_widen_d1[i][63:0] : vmac_mul_add_eew32_widen_d1[i][63:0]; end
diff --git a/hdl/verilog/rvv/design/rvv_backend_mul_unit_mul8.sv b/hdl/verilog/rvv/design/rvv_backend_mul_unit_mul8.sv index c291c85..360cfce 100644 --- a/hdl/verilog/rvv/design/rvv_backend_mul_unit_mul8.sv +++ b/hdl/verilog/rvv/design/rvv_backend_mul_unit_mul8.sv
@@ -24,7 +24,7 @@ wire [8:0] in0_int = {in0_is_signed&in0[7],in0}; wire [8:0] in1_int = {in1_is_signed&in1[7],in1}; -wire [17:0] out_int = $signed(in0_int)*$signed(in1_int); +wire [17:0] out_int = {{9{in0_int[8]}},in0_int} * {{9{in1_int[8]}},in1_int}; assign out = out_int[0+:16];
diff --git a/hdl/verilog/rvv/design/rvv_backend_pmtrdt_unit.sv b/hdl/verilog/rvv/design/rvv_backend_pmtrdt_unit.sv index 862e469..09417e4 100644 --- a/hdl/verilog/rvv/design/rvv_backend_pmtrdt_unit.sv +++ b/hdl/verilog/rvv/design/rvv_backend_pmtrdt_unit.sv
@@ -138,6 +138,7 @@ logic pmt_go, pmt_go_q; // start to execute pmt inst when all uop(s) are in RS logic [`UOP_INDEX_WIDTH-1:0] pmt_uop_done_cnt_d, pmt_uop_done_cnt_q; logic [`VLENB-1:0][`XLEN+1:0] offset; + logic [`VLENB-1:0][`XLEN+1:0] slide_down_offset; logic [`VLENB-1:0] sel_scalar; BYTE_TYPE_t vd_type; logic [`VLMAX_MAX-1:0][7:0] pmt_vs2_data, pmt_vs3_data; @@ -2244,12 +2245,12 @@ // cmp_res_d/cmp_res_q always_comb begin case (pmtrdt_uop.vs2_eew) - EEW32: cmp_res_en = {'0, 1'b1} << cmp_res_en_offset; - EEW16: cmp_res_en = {'0, 2'b11} << cmp_res_en_offset; - default: cmp_res_en = {'0, 4'b1111} << cmp_res_en_offset; + EEW32: cmp_res_en = (2*`VLENB)'('b1) << cmp_res_en_offset; + EEW16: cmp_res_en = (2*`VLENB)'('b11) << cmp_res_en_offset; + default: cmp_res_en = (2*`VLENB)'('b1111) << cmp_res_en_offset; endcase end - assign cmp_res_d = {'0, cmp_res} << cmp_res_offset; + assign cmp_res_d = (`VLEN)'(cmp_res) << cmp_res_offset; for (i=0; i<(2*`VLENB); i++) begin edff #(.T(logic[`VLEN/32-1:0])) cmp_res_reg (.q(cmp_res_q[`VLEN/32*i+:`VLEN/32]), .d(cmp_res_d[`VLEN/32*i+:`VLEN/32]), .e(cmp_res_en[i] & pmtrdt_uop_valid & pmtrdt_uop_ready), .clk(clk), .rst_n(rst_n)); end @@ -2302,29 +2303,29 @@ SLIDE_UP:begin if (pmtrdt_uop.uop_funct3 == OPMVX) case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew - EEW32:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i-4; - EEW16:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i-2; - default:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i-1; + EEW32:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i-4); + EEW16:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i-2); + default:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i-1); endcase else case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew - EEW32:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i - (4*pmtrdt_uop.rs1_data); - EEW16:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i - (2*pmtrdt_uop.rs1_data); - default:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+ i - pmtrdt_uop.rs1_data; + EEW32:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i - (4*pmtrdt_uop.rs1_data)); + EEW16:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i - (2*pmtrdt_uop.rs1_data)); + default:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+ i - pmtrdt_uop.rs1_data); endcase end SLIDE_DOWN:begin if (pmtrdt_uop.uop_funct3 == OPMVX) case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew - EEW32:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i+4; - EEW16:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i+2; - default:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i+1; + EEW32:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i+4); + EEW16:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i+2); + default:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i+1); endcase else case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew - EEW32:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i + (4*pmtrdt_uop.rs1_data); - EEW16:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i + (2*pmtrdt_uop.rs1_data); - default:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i + pmtrdt_uop.rs1_data; + EEW32:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i + (4*pmtrdt_uop.rs1_data)); + EEW16:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i + (2*pmtrdt_uop.rs1_data)); + default:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i + pmtrdt_uop.rs1_data); endcase end GATHER:begin @@ -2332,27 +2333,27 @@ OPIVX, OPIVI:begin case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew - EEW32:offset[i] = i%4 + {pmtrdt_uop.rs1_data,2'b0}; - EEW16:offset[i] = i%2 + {pmtrdt_uop.rs1_data,1'b0}; - default:offset[i] = pmtrdt_uop.rs1_data; + EEW32:offset[i] = (`XLEN+2)'(i%4 + {pmtrdt_uop.rs1_data,2'b0}); + EEW16:offset[i] = (`XLEN+2)'(i%2 + {pmtrdt_uop.rs1_data,1'b0}); + default:offset[i] = (`XLEN+2)'(pmtrdt_uop.rs1_data); endcase end default:begin case (pmtrdt_uop.vs1_eew) - EEW32: offset[i] = i%4 + (4*{{(`XLEN-32){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB/4+i/4)/(`VLENB/4)].vs1_data[32*((i/4)%(`VLENB/4))+:32]}); + EEW32: offset[i] = (`XLEN+2)'(i%4 + (4*{{(`XLEN-32){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB/4+i/4)/(`VLENB/4)].vs1_data[32*((i/4)%(`VLENB/4))+:32]})); EEW16: begin case (pmtrdt_uop.vs2_eew) // vrgatherei16 - EEW32:offset[i] = i%4 + (4*{{(`XLEN-16){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB/4+i/4)/(`VLENB/4)].vs1_data[16*((pmt_uop_done_cnt_q*`VLENB/4+i/4)%(`VLENB/2))+:16]}); - EEW16:offset[i] = i%2 + (2*{{(`XLEN-16){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB/2+i/2)/(`VLENB/2)].vs1_data[16*((i/2)%(`VLENB/2))+:16]}); - default:offset[i] = {{(`XLEN-16){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB+i)/(`VLENB)].vs1_data[16*(i%(`VLENB/2))+:16]}; + EEW32:offset[i] = (`XLEN+2)'(i%4 + (4*{{(`XLEN-16){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB/4+i/4)/(`VLENB/4)].vs1_data[16*((pmt_uop_done_cnt_q*`VLENB/4+i/4)%(`VLENB/2))+:16]})); + EEW16:offset[i] = (`XLEN+2)'(i%2 + (2*{{(`XLEN-16){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB/2+i/2)/(`VLENB/2)].vs1_data[16*((i/2)%(`VLENB/2))+:16]})); + default:offset[i] = (`XLEN+2)'({{(`XLEN-16){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB+i)/(`VLENB)].vs1_data[16*(i%(`VLENB/2))+:16]}); endcase end - default: offset[i] = {{(`XLEN-8){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB+i)/(`VLENB)].vs1_data[8*(i%(`VLENB))+:8]}; + default: offset[i] = (`XLEN+2)'({{(`XLEN-8){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB+i)/(`VLENB)].vs1_data[8*(i%(`VLENB))+:8]}); endcase end endcase end - default: offset[i] = i; + default: offset[i] = (`XLEN+2)'(i); endcase end end @@ -2366,18 +2367,21 @@ SLIDE_UP:begin if (pmt_uop_done_cnt_q == 0) case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew - EEW32:sel_scalar = 'hF; - EEW16:sel_scalar = 'h3; - default:sel_scalar = 'h1; + EEW32:sel_scalar = (`VLENB)'('hF); + EEW16:sel_scalar = (`VLENB)'('h3); + default:sel_scalar = (`VLENB)'('h1); endcase else sel_scalar = '0; end SLIDE_DOWN:begin case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew - EEW32:sel_scalar = (uop_data[pmt_uop_done_cnt_q].uop_index+1'b1)*(`VLENB/4) >= rdt_ctrl.vl ? 'hF << ((rdt_ctrl.vl-1)%(`VLENB/4))*4 : '0; - EEW16:sel_scalar = (uop_data[pmt_uop_done_cnt_q].uop_index+1'b1)*(`VLENB/2) >= rdt_ctrl.vl ? 'h3 << ((rdt_ctrl.vl-1)%(`VLENB/2))*2 : '0; - default:sel_scalar = (uop_data[pmt_uop_done_cnt_q].uop_index+1'b1)*`VLENB >= rdt_ctrl.vl ? 'h1 << ((rdt_ctrl.vl-1)%(`VLENB))*1 : '0; + EEW32:sel_scalar = (uop_data[pmt_uop_done_cnt_q].uop_index+1'b1)*(`VLENB/4) >= rdt_ctrl.vl ? + (`VLENB)'('hF) << ((rdt_ctrl.vl-1)%(`VLENB/4))*4 : '0; + EEW16:sel_scalar = (uop_data[pmt_uop_done_cnt_q].uop_index+1'b1)*(`VLENB/2) >= rdt_ctrl.vl ? + (`VLENB)'('h3) << ((rdt_ctrl.vl-1)%(`VLENB/2))*2 : '0; + default:sel_scalar = (uop_data[pmt_uop_done_cnt_q].uop_index+1'b1)*`VLENB >= rdt_ctrl.vl ? + (`VLENB)'('h1) << ((rdt_ctrl.vl-1)%(`VLENB))*1 : '0; endcase end default:sel_scalar = '0; @@ -2415,34 +2419,29 @@ assign pmt_res_en = pmt_go; for (i=0; i<`VLENB; i++) begin always_comb begin + slide_down_offset[i] = offset[i]-(pmtrdt_uop.uop_index*`VLENB); if (sel_scalar[i]) pmt_res_d[i] = pmt_rs1_data[8*(i%4)+:8]; else case (pmt_ctrl.pmt_opr) SLIDE_UP:begin case (pmtrdt_uop.vs2_eew) // permutation instruction - // TODO(derekjchow): Fix me - // EEW32: pmt_res_d[i] = offset[i] >= 4*pmtrdt_uop.vlmax ? pmt_vs3_data[pmt_uop_done_cnt_q*`VLENB+i] : pmt_vs2_data[offset[i]]; - // EEW16: pmt_res_d[i] = offset[i] >= 2*pmtrdt_uop.vlmax ? pmt_vs3_data[pmt_uop_done_cnt_q*`VLENB+i] : pmt_vs2_data[offset[i]]; - // default: pmt_res_d[i] = offset[i] >= pmtrdt_uop.vlmax ? pmt_vs3_data[pmt_uop_done_cnt_q*`VLENB+i] : pmt_vs2_data[offset[i]]; - default: pmt_res_d[i] = 0; + EEW32: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(4*pmtrdt_uop.vlmax) ? pmt_vs3_data[pmt_uop_done_cnt_q*`VLENB+i] : pmt_vs2_data[offset[i][7:0]]; + EEW16: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(2*pmtrdt_uop.vlmax) ? pmt_vs3_data[pmt_uop_done_cnt_q*`VLENB+i] : pmt_vs2_data[offset[i][7:0]]; + default: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(pmtrdt_uop.vlmax) ? pmt_vs3_data[pmt_uop_done_cnt_q*`VLENB+i] : pmt_vs2_data[offset[i][7:0]]; endcase end SLIDE_DOWN:begin case (pmtrdt_uop.vs2_eew) - // TODO(derekjchow): Fix me - // EEW32: pmt_res_d[i] = offset[i] >= 4*pmtrdt_uop.vlmax ? '0 : pmt_vs2_data[offset[i]-(pmtrdt_uop.uop_index*`VLENB)]; - // EEW16: pmt_res_d[i] = offset[i] >= 2*pmtrdt_uop.vlmax ? '0 : pmt_vs2_data[offset[i]-(pmtrdt_uop.uop_index*`VLENB)]; - // default: pmt_res_d[i] = offset[i] >= pmtrdt_uop.vlmax ? '0 : pmt_vs2_data[offset[i]-(pmtrdt_uop.uop_index*`VLENB)]; - default: pmt_res_d[i] = 0; + EEW32: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(4*pmtrdt_uop.vlmax) ? '0 : pmt_vs2_data[slide_down_offset[i][7:0]]; + EEW16: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(2*pmtrdt_uop.vlmax) ? '0 : pmt_vs2_data[slide_down_offset[i][7:0]]; + default: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(pmtrdt_uop.vlmax) ? '0 : pmt_vs2_data[slide_down_offset[i][7:0]]; endcase end default: begin case (pmtrdt_uop.vs2_eew) - // TODO(derekjchow): Fix me - // EEW32: pmt_res_d[i] = offset[i] >= 4*pmtrdt_uop.vlmax ? '0 : pmt_vs2_data[offset[i]]; - // EEW16: pmt_res_d[i] = offset[i] >= 2*pmtrdt_uop.vlmax ? '0 : pmt_vs2_data[offset[i]]; - // default: pmt_res_d[i] = offset[i] >= pmtrdt_uop.vlmax ? '0 : pmt_vs2_data[offset[i]]; - default: pmt_res_d[i] = 0; + EEW32: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(4*pmtrdt_uop.vlmax) ? '0 : pmt_vs2_data[offset[i][7:0]]; + EEW16: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(2*pmtrdt_uop.vlmax) ? '0 : pmt_vs2_data[offset[i][7:0]]; + default: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(pmtrdt_uop.vlmax) ? '0 : pmt_vs2_data[offset[i][7:0]]; endcase end endcase @@ -2519,14 +2518,14 @@ // compress_res is driven by compress_value and compress_cnt. always_comb begin - if (pmtrdt_uop.first_uop_valid) compress_res_d = {'0, compress_value}; + if (pmtrdt_uop.first_uop_valid) compress_res_d = (2*`VLENB*8)'(compress_value); else compress_res_d = f_circular_shift(compress_value, compress_cnt_q); end // compress_res_en always_comb begin if (compress_ctrl_push) - if (pmtrdt_uop.first_uop_valid) compress_res_en = {'0, f_pack_1s(compress_enable)}; + if (pmtrdt_uop.first_uop_valid) compress_res_en = (2*`VLENB)'(f_pack_1s(compress_enable)); else compress_res_en = f_circular_en(compress_enable,compress_cnt_q); else compress_res_en = '0; @@ -2681,7 +2680,7 @@ for (i=0; i<`VLENB; i++) results[i] = '1; for (i=0; i<`VLENB; i++) begin if (enables[i]) begin - results[j] = i; + results[j] = (VLENB_WIDTH+1)'(i); j++; end end @@ -2699,7 +2698,7 @@ logic [1:0][`VLEN-1:0] result; begin value_tmp = value; - {buf2,buf1,buf0} = value_tmp << (shift*8); + {buf2,buf1,buf0} = (3*`VLEN)'(value_tmp) << (shift*8); result = shift[VLENB_WIDTH] ? {buf1, buf2} : {buf1,buf0}; f_circular_shift = result; end @@ -2733,7 +2732,7 @@ logic [1:0][`VLENB-1:0] result; begin value_pack_1s = f_pack_1s(value); - {en2,en1,en0} = value_pack_1s << shift; + {en2,en1,en0} = (3*`VLENB)'(value_pack_1s) << shift; result = shift[VLENB_WIDTH] ? {en1, en2} : {en1, en0}; f_circular_en = result; end