Clean lint warning: W416b,W362,W486...
Change-Id: Ie5350b57c334c856fee8815411ea03960649bfa5
diff --git a/hdl/verilog/rvv/common/multi_fifo.sv b/hdl/verilog/rvv/common/multi_fifo.sv
index a8be9ed..16deccf 100644
--- a/hdl/verilog/rvv/common/multi_fifo.sv
+++ b/hdl/verilog/rvv/common/multi_fifo.sv
@@ -121,7 +121,7 @@
endgenerate
// dataout
always_comb begin
- pop_count = pop[0];
+ pop_count = {(DEPTH_BITS)'(0), pop[0]};
for (int j=1; j<N; j++) pop_count = pop_count + pop[j];
end
@@ -146,7 +146,9 @@
always_ff @(posedge clk) begin
if ((i<remain_count)&(|pop))
dataout[i] <= mem[current_rptr_mem[i]];
- else if ((push_seq[current_rptr_psh[i]]&(current_rptr_psh[i]<M))&((|pop)|(|push_seq)))
+ else if ((push_seq[current_rptr_psh[i]]&(current_rptr_psh[i]<(DEPTH_BITS)'(M)))&
+ ((|pop)|(|push_seq))
+ )
dataout[i] <= datain_seq[current_rptr_psh[i]];
end
end
@@ -155,7 +157,9 @@
always_ff @(posedge clk) begin
if ((i<remain_count)&(|pop))
dataout[i] <= mem[current_rptr_mem[i]];
- else if ((push[current_rptr_psh[i]]&(current_rptr_psh[i]<M))&((|pop)|(|push)))
+ else if ((push[current_rptr_psh[i]]&(current_rptr_psh[i]<(DEPTH_BITS)'(M)))&
+ ((|pop)|(|push))
+ )
dataout[i] <= datain[current_rptr_psh[i]];
end
end
@@ -170,7 +174,7 @@
// datain
always_comb begin
- push_count = push[0];
+ push_count = {(DEPTH_BITS)'(0), push[0]};
for (int j=1; j<M; j++) push_count = push_count + push[j];
end
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_addsub.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_addsub.sv
index 2d6aa0f..cc35a7e 100644
--- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_addsub.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_addsub.sv
@@ -1674,9 +1674,9 @@
logic cout;
if (opcode==ADDSUB_VADD)
- {cout,result} = src_x + src_y + src_cin;
+ {cout,result} = (`BYTE_WIDTH+1)'(src_x) + (`BYTE_WIDTH+1)'(src_y) + (`BYTE_WIDTH+1)'(src_cin);
else //(opcode==ADDSUB_VSUB)
- {cout,result} = src_x - src_y - src_cin;
+ {cout,result} = (`BYTE_WIDTH+1)'(src_x) - (`BYTE_WIDTH+1)'(src_y) - (`BYTE_WIDTH+1)'(src_cin);
return {cout,result};
@@ -1726,7 +1726,7 @@
logic [`HWORD_WIDTH:0] res_lo;
res_hi = src_x[`WORD_WIDTH-1:`HWORD_WIDTH] + 1'b1;
- res_lo = src_x[`HWORD_WIDTH-1:0] + 1'b1;
+ res_lo = (`HWORD_WIDTH+1)'(src_x[`HWORD_WIDTH-1:0]) + 1'b1;
if (res_lo[`HWORD_WIDTH])
return {res_hi,res_lo[`HWORD_WIDTH-1:0]};
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_execution_p1.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_execution_p1.sv
index 4251810..dbc75bc 100644
--- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_execution_p1.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_execution_p1.sv
@@ -32,8 +32,8 @@
generate
if(`VLEN==128) begin
for(j=0;j<64;j++) begin: GET_VIOTA128
- assign result_data_viota[j] = alu_uop.data_viota_per64[0][j];
- assign result_data_viota[j+64] = {1'b0,alu_uop.data_viota_per64[1][j]} + {1'b0,alu_uop.data_viota_per64[0][63]};
+ assign result_data_viota[j] = ($clog2(`VLEN)+1)'(alu_uop.data_viota_per64[0][j]);
+ assign result_data_viota[j+64] = ($clog2(`VLEN)+1)'({1'b0,alu_uop.data_viota_per64[1][j]} + {1'b0,alu_uop.data_viota_per64[0][63]});
end
end
endgenerate
@@ -53,7 +53,7 @@
endgenerate
// vcpop
- assign result_data_vcpop = result_data_viota[`VLEN-1];
+ assign result_data_vcpop = (`XLEN)'(result_data_viota[`VLEN-1]);
//
// submit result to ROB
@@ -72,7 +72,7 @@
// calculate result data
case(alu_uop.alu_sub_opcode)
OP_VCPOP: begin
- result.w_data = result_data_vcpop;
+ result.w_data = (`VLEN)'(result_data_vcpop);
result.vsaturate = 'b0;
end
OP_VIOTA: begin
@@ -81,17 +81,17 @@
case(alu_uop.vd_eew)
EEW8: begin
for(int i=0; i<`VLENB;i++) begin
- result.w_data[i*`BYTE_WIDTH +: `BYTE_WIDTH] = result_data_viota8[i];
+ result.w_data[i*`BYTE_WIDTH +: `BYTE_WIDTH] = (`BYTE_WIDTH)'(result_data_viota8[i]);
end
end
EEW16: begin
for(int i=0; i<`VLEN/`HWORD_WIDTH;i++) begin
- result.w_data[i*`HWORD_WIDTH +: `HWORD_WIDTH] = result_data_viota16[i];
+ result.w_data[i*`HWORD_WIDTH +: `HWORD_WIDTH] = (`HWORD_WIDTH)'(result_data_viota16[i]);
end
end
EEW32: begin
for(int i=0; i<`VLEN/`WORD_WIDTH;i++) begin
- result.w_data[i*`WORD_WIDTH +: `WORD_WIDTH] = result_data_viota32[i];
+ result.w_data[i*`WORD_WIDTH +: `WORD_WIDTH] = (`WORD_WIDTH)'(result_data_viota32[i]);
end
end
endcase
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask.sv
index a6d88fb..256d6e3 100644
--- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask.sv
@@ -191,11 +191,11 @@
result_valid = alu_uop_valid&(vs1_data_valid==1'b0)&vs2_data_valid&((vm==1'b1)||((vm==1'b0)&v0_data_valid));
alu_sub_opcode = OP_VIOTA;
// it can get the viota result in one cycle whose element index in vd belongs to 0-31.
- // Otherwise, it will get the result in next cycle.
+ // Otherwise, it will get the result in the next cycle.
case(vd_eew)
- EEW8 : result_2cycle = uop_index >= 32/(`VLEN/8);
- EEW16 : result_2cycle = uop_index >= 32/(`VLEN/16);
- default: result_2cycle = uop_index >= 32/(`VLEN/32); //EEW32
+ EEW8 : result_2cycle = uop_index >= (`UOP_INDEX_WIDTH)'(32/(`VLEN/8));
+ EEW16 : result_2cycle = uop_index >= (`UOP_INDEX_WIDTH)'(32/(`VLEN/16));
+ default: result_2cycle = uop_index >= (`UOP_INDEX_WIDTH)'(32/(`VLEN/32)); //EEW32
endcase
end
VID: begin
@@ -330,7 +330,7 @@
else begin
for(int i=0;i<`VLEN;i++) begin
if (result_data_vmsof[i]==1'b1)
- result_data_vfirst = i; // one-hot to 8421BCD. get the index of first 1
+ result_data_vfirst = (`VLEN)'(i); // one-hot to 8421BCD. get the index of first 1
end
end
end
@@ -347,24 +347,24 @@
end
for(j=0; j<`VLENB;j++) begin: GET_VIOTA8
- if ($clog2(32/`VLENB)<=3) // There may be up to 8 uops, so RHS in if-condition is $clog2(8)=3
- assign result_data_viota8[j] = data_viota_per32[0][{alu_uop.uop_index[$clog2(32/`VLENB)-1:0],j[$clog2(`VLENB)-1:0]}];
+ if ((3)'($clog2(32/`VLENB)) <= 3'd3) // There may be up to 8 uops, so RHS in if-condition is $clog2(8)=3
+ assign result_data_viota8[j] = ($clog2(`VLEN)+1)'(data_viota_per32[0][{alu_uop.uop_index[$clog2(32/`VLENB)-1:0],j[$clog2(`VLENB)-1:0]}]);
else
- assign result_data_viota8[j] = data_viota_per32[0][{alu_uop.uop_index[2:0],j[$clog2(`VLENB)-1:0]}];
+ assign result_data_viota8[j] = ($clog2(`VLEN)+1)'(data_viota_per32[0][{alu_uop.uop_index[2:0],j[$clog2(`VLENB)-1:0]}]);
end
for(j=0; j<`VLEN/`HWORD_WIDTH;j++) begin: GET_VIOTA16
- if ($clog2(32/(`VLEN/`HWORD_WIDTH))<=3)
- assign result_data_viota16[j] = data_viota_per32[0][{alu_uop.uop_index[$clog2(32/(`VLEN/`HWORD_WIDTH))-1:0],j[$clog2(`VLEN/`HWORD_WIDTH)-1:0]}];
+ if ((3)'($clog2(32/(`VLEN/`HWORD_WIDTH))) <= 3'd3)
+ assign result_data_viota16[j] = ($clog2(`VLEN)+1)'(data_viota_per32[0][{alu_uop.uop_index[$clog2(32/(`VLEN/`HWORD_WIDTH))-1:0],j[$clog2(`VLEN/`HWORD_WIDTH)-1:0]}]);
else
- assign result_data_viota16[j] = data_viota_per32[0][{alu_uop.uop_index[2:0],j[$clog2(`VLEN/`HWORD_WIDTH)-1:0]}];
+ assign result_data_viota16[j] = ($clog2(`VLEN)+1)'(data_viota_per32[0][{alu_uop.uop_index[2:0],j[$clog2(`VLEN/`HWORD_WIDTH)-1:0]}]);
end
for(j=0; j<`VLEN/`WORD_WIDTH;j++) begin: GET_VIOTA32
- if ($clog2(32/(`VLEN/`WORD_WIDTH))<=3)
- assign result_data_viota32[j] = data_viota_per32[0][{alu_uop.uop_index[$clog2(32/(`VLEN/`WORD_WIDTH))-1:0],j[$clog2(`VLEN/`WORD_WIDTH)-1:0]}];
+ if ((3)'($clog2(32/(`VLEN/`WORD_WIDTH))) <= 3'd3)
+ assign result_data_viota32[j] = ($clog2(`VLEN)+1)'(data_viota_per32[0][{alu_uop.uop_index[$clog2(32/(`VLEN/`WORD_WIDTH))-1:0],j[$clog2(`VLEN/`WORD_WIDTH)-1:0]}]);
else
- assign result_data_viota32[j] = data_viota_per32[0][{alu_uop.uop_index[2:0],j[$clog2(`VLEN/`WORD_WIDTH)-1:0]}];
+ assign result_data_viota32[j] = ($clog2(`VLEN)+1)'(data_viota_per32[0][{alu_uop.uop_index[2:0],j[$clog2(`VLEN/`WORD_WIDTH)-1:0]}]);
end
for(j=0;j<`VLEN/64;j++) begin: GET_VIOTA_PER64_J
@@ -378,19 +378,19 @@
// vid
generate
for(j=0;j<`VLENB;j++) begin: GET_VID8
- assign result_data_vid8[j*`BYTE_WIDTH +: `BYTE_WIDTH] = {uop_index, j[$clog2(`VLENB)-1:0]};
+ assign result_data_vid8[j*`BYTE_WIDTH +: `BYTE_WIDTH] = (`BYTE_WIDTH)'({uop_index, j[$clog2(`VLENB)-1:0]});
end
endgenerate
generate
for(j=0;j<`VLEN/`HWORD_WIDTH;j++) begin: GET_VID16
- assign result_data_vid16[j*`HWORD_WIDTH +: `HWORD_WIDTH] = {uop_index, j[$clog2(`VLEN/`HWORD_WIDTH)-1:0]};
+ assign result_data_vid16[j*`HWORD_WIDTH +: `HWORD_WIDTH] = (`HWORD_WIDTH)'({uop_index, j[$clog2(`VLEN/`HWORD_WIDTH)-1:0]});
end
endgenerate
generate
for(j=0;j<`VLEN/`WORD_WIDTH;j++) begin: GET_VID32
- assign result_data_vid32[j*`WORD_WIDTH +: `WORD_WIDTH] = {uop_index, j[$clog2(`VLEN/`WORD_WIDTH)-1:0]};
+ assign result_data_vid32[j*`WORD_WIDTH +: `WORD_WIDTH] = (`WORD_WIDTH)'({uop_index, j[$clog2(`VLEN/`WORD_WIDTH)-1:0]});
end
endgenerate
@@ -464,17 +464,17 @@
case(vd_eew)
EEW8: begin
for(int i=0; i<`VLENB;i++) begin
- result_data[i*`BYTE_WIDTH +: `BYTE_WIDTH] = result_data_viota8[i];
+ result_data[i*`BYTE_WIDTH +: `BYTE_WIDTH] = (`BYTE_WIDTH)'(result_data_viota8[i]);
end
end
EEW16: begin
for(int i=0; i<`VLEN/`HWORD_WIDTH;i++) begin
- result_data[i*`HWORD_WIDTH +: `HWORD_WIDTH] = result_data_viota16[i];
+ result_data[i*`HWORD_WIDTH +: `HWORD_WIDTH] = (`HWORD_WIDTH)'(result_data_viota16[i]);
end
end
EEW32: begin
for(int i=0; i<`VLEN/`WORD_WIDTH;i++) begin
- result_data[i*`WORD_WIDTH +: `WORD_WIDTH] = result_data_viota32[i];
+ result_data[i*`WORD_WIDTH +: `WORD_WIDTH] = (`WORD_WIDTH)'(result_data_viota32[i]);
end
end
endcase
@@ -502,7 +502,7 @@
//
// submit result to ROB
//
- assign vstart_onehot = 1'b1<<vstart;
+ assign vstart_onehot = (`VLEN)'('b1)<<vstart;
assign vstart_onehot_sub1 = vstart_onehot - 1'b1;
always_comb begin
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask_viota.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask_viota.sv
index 30a9876..3d3b5cd 100644
--- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask_viota.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_mask_viota.sv
@@ -143,10 +143,11 @@
generate
for(j=0;j<7;j++) begin: GET_VIOTA32_27_0
- assign result_viota32[j] = result_viota7[0][j];
- assign result_viota32[j+7] = result_viota7[1][j]+result_viota7[0][6];
- assign result_viota32[j+14] = sum_20to14[j]+{carry_20to14[j],1'b0};
- assign result_viota32[j+21] = sum_27to21[j]+{({1'b0,carry_27to21[j]}+{1'b0,cout_27to21[j]}),1'b0};
+ assign result_viota32[j] = ($clog2(32)+1)'(result_viota7[0][j]);
+ assign result_viota32[j+7] = ($clog2(32)+1)'(result_viota7[1][j])+($clog2(32)+1)'(result_viota7[0][6]);
+ assign result_viota32[j+14] = ($clog2(32)+1)'(sum_20to14[j])+($clog2(32)+1)'({carry_20to14[j],1'b0});
+ assign result_viota32[j+21] = ($clog2(32)+1)'(sum_27to21[j])+($clog2(32)+1)'({({1'b0,carry_27to21[j]})+($clog2(32)+1)'({1'b0,cout_27to21[j]}),1'b0});
+
compressor_3_2
#(
@@ -179,7 +180,9 @@
end
for(j=0;j<4;j++) begin: GET_VIOTA32_31_28
- assign result_viota32[j+28] = sum_31to28[j]+{({1'b0,carry_31to28[j]}+{1'b0,cout_31to28[j]}),1'b0};
+ assign result_viota32[j+28] = ($clog2(32)+1)'(sum_31to28[j])+
+ ($clog2(32)+1)'({({1'b0,carry_31to28[j]})+
+ ($clog2(32)+1)'({1'b0,cout_31to28[j]}),1'b0});
compressor_4_2
#(
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_other.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_other.sv
index a9fd507..b9d5498 100644
--- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_other.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_other.sv
@@ -371,36 +371,36 @@
VZEXT_VF2: begin
case(vs2_eew)
EEW8: begin
- result_data_extend[(2*j )*`HWORD_WIDTH +: `HWORD_WIDTH] = src2_data[(2*j )*`BYTE_WIDTH +: `BYTE_WIDTH];
- result_data_extend[(2*j+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = src2_data[(2*j+1)*`BYTE_WIDTH +: `BYTE_WIDTH];
+ result_data_extend[(2*j )*`HWORD_WIDTH +: `HWORD_WIDTH] = {8'b0, src2_data[(2*j )*`BYTE_WIDTH +: `BYTE_WIDTH]};
+ result_data_extend[(2*j+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = {8'b0, src2_data[(2*j+1)*`BYTE_WIDTH +: `BYTE_WIDTH]};
end
EEW16: begin
- result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = src2_data[j*`HWORD_WIDTH +: `HWORD_WIDTH];
+ result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = {16'b0, src2_data[j*`HWORD_WIDTH +: `HWORD_WIDTH]};
end
endcase
end
VSEXT_VF2: begin
case(vs2_eew)
EEW8: begin
- result_data_extend[(2*j )*`HWORD_WIDTH +: `HWORD_WIDTH] = $signed(src2_data[(2*j )*`BYTE_WIDTH +: `BYTE_WIDTH]);
- result_data_extend[(2*j+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = $signed(src2_data[(2*j+1)*`BYTE_WIDTH +: `BYTE_WIDTH]);
+ result_data_extend[(2*j )*`HWORD_WIDTH +: `HWORD_WIDTH] = {{8{src2_data[(2*j+1)*`BYTE_WIDTH-1]}}, src2_data[(2*j )*`BYTE_WIDTH +: `BYTE_WIDTH]};
+ result_data_extend[(2*j+1)*`HWORD_WIDTH +: `HWORD_WIDTH] = {{8{src2_data[(2*j+2)*`BYTE_WIDTH-1]}}, src2_data[(2*j+1)*`BYTE_WIDTH +: `BYTE_WIDTH]};
end
EEW16: begin
- result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = $signed(src2_data[j*`HWORD_WIDTH +: `HWORD_WIDTH]);
+ result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = {{16{src2_data[(j+1)*`HWORD_WIDTH-1]}},src2_data[j*`HWORD_WIDTH +: `HWORD_WIDTH]};
end
endcase
end
VZEXT_VF4: begin
case(vs2_eew)
EEW8: begin
- result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = src2_data[j*`BYTE_WIDTH +: `BYTE_WIDTH];
+ result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = {24'b0, src2_data[j*`BYTE_WIDTH +: `BYTE_WIDTH]};
end
endcase
end
VSEXT_VF4: begin
case(vs2_eew)
EEW8: begin
- result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = $signed(src2_data[j*`BYTE_WIDTH +: `BYTE_WIDTH]);
+ result_data_extend[j*`WORD_WIDTH +: `WORD_WIDTH] = {{24{src2_data[(j+1)*`BYTE_WIDTH-1]}}, src2_data[j*`BYTE_WIDTH +: `BYTE_WIDTH]};
end
endcase
end
diff --git a/hdl/verilog/rvv/design/rvv_backend_alu_unit_shift.sv b/hdl/verilog/rvv/design/rvv_backend_alu_unit_shift.sv
index ed55d3e..b84b564 100755
--- a/hdl/verilog/rvv/design/rvv_backend_alu_unit_shift.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_alu_unit_shift.sv
@@ -168,11 +168,11 @@
end
for(int i=`VLENB/2;i<`VLENB*3/4;i=i+1) begin
src2_data16[ i-`VLENB/2] = {8'b0,vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]};
- shift_amount16[i-`VLENB/2] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)];
+ shift_amount16[i-`VLENB/2] = {1'b0,vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]};
end
for(int i=`VLENB*3/4;i<`VLENB;i=i+1) begin
src2_data32[ i-`VLENB*3/4] = {24'b0,vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]};
- shift_amount32[i-`VLENB*3/4] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)];
+ shift_amount32[i-`VLENB*3/4] = {2'b0, vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]};
end
end
EEW16: begin
@@ -182,7 +182,7 @@
end
for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin
src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {16'b0,vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]};
- shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[i*`HWORD_WIDTH +: $clog2(`HWORD_WIDTH)];
+ shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, vs1_data[i*`HWORD_WIDTH +: $clog2(`HWORD_WIDTH)]};
end
end
EEW32: begin
@@ -204,11 +204,11 @@
end
for(int i=`VLENB/2;i<`VLENB*3/4;i=i+1) begin
src2_data16[ i-`VLENB/2] = {{8{vs2_data[(i+1)*`BYTE_WIDTH-1]}},vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]};
- shift_amount16[i-`VLENB/2] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)];
+ shift_amount16[i-`VLENB/2] = {1'b0,vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]};
end
for(int i=`VLENB*3/4;i<`VLENB;i=i+1) begin
src2_data32[ i-`VLENB*3/4] = {{24{vs2_data[(i+1)*`BYTE_WIDTH-1]}},vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]};
- shift_amount32[i-`VLENB*3/4] = vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)];
+ shift_amount32[i-`VLENB*3/4] = {2'b0,vs1_data[i*`BYTE_WIDTH +: $clog2(`BYTE_WIDTH)]};
end
end
EEW16: begin
@@ -218,7 +218,7 @@
end
for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin
src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {{16{vs2_data[(i+1)*`HWORD_WIDTH-1]}},vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]};
- shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[i*`HWORD_WIDTH +: $clog2(`HWORD_WIDTH)];
+ shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0,vs1_data[i*`HWORD_WIDTH +: $clog2(`HWORD_WIDTH)]};
end
end
EEW32: begin
@@ -244,9 +244,9 @@
for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin
src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {16'b0,vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]};
if (uop_index[0]==1'b0)
- shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)];
+ shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, vs1_data[i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]};
else
- shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[`VLEN/2+i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)];
+ shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, vs1_data[`VLEN/2+i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]};
end
end
EEW32: begin
@@ -275,9 +275,9 @@
for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin
src2_data32[i-`VLEN/`HWORD_WIDTH/2] = {{16{vs2_data[(i+1)*`HWORD_WIDTH-1]}},vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]};
if (uop_index[0]==1'b0)
- shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)];
+ shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, vs1_data[i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]};
else
- shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = vs1_data[`VLEN/2+i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)];
+ shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, vs1_data[`VLEN/2+i*`BYTE_WIDTH +: $clog2(`HWORD_WIDTH)]};
end
end
EEW32: begin
@@ -308,11 +308,11 @@
end
for(int i=`VLENB/2;i<`VLENB*3/4;i=i+1) begin
src2_data16[ i-`VLENB/2] = {8'b0,vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]};
- shift_amount16[i-`VLENB/2] = rs1_data[0 +: $clog2(`BYTE_WIDTH)];
+ shift_amount16[i-`VLENB/2] = {1'b0,rs1_data[0 +: $clog2(`BYTE_WIDTH)]};
end
for(int i=`VLENB*3/4;i<`VLENB;i=i+1) begin
src2_data32[ i-`VLENB*3/4] = {24'b0,vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]};
- shift_amount32[i-`VLENB*3/4] = rs1_data[0 +: $clog2(`BYTE_WIDTH)];
+ shift_amount32[i-`VLENB*3/4] = {2'b0, rs1_data[0 +: $clog2(`BYTE_WIDTH)]};
end
end
EEW16: begin
@@ -322,7 +322,7 @@
end
for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin
src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {16'b0,vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]};
- shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = rs1_data[0 +: $clog2(`HWORD_WIDTH)];
+ shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, rs1_data[0 +: $clog2(`HWORD_WIDTH)]};
end
end
EEW32: begin
@@ -344,11 +344,11 @@
end
for(int i=`VLENB/2;i<`VLENB*3/4;i=i+1) begin
src2_data16[ i-`VLENB/2] = {{8{vs2_data[(i+1)*`BYTE_WIDTH-1]}},vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]};
- shift_amount16[i-`VLENB/2] = rs1_data[0 +: $clog2(`BYTE_WIDTH)];
+ shift_amount16[i-`VLENB/2] = {1'b0,rs1_data[0 +: $clog2(`BYTE_WIDTH)]};
end
for(int i=`VLENB*3/4;i<`VLENB;i=i+1) begin
src2_data32[ i-`VLENB*3/4] = {{24{vs2_data[(i+1)*`BYTE_WIDTH-1]}},vs2_data[i*`BYTE_WIDTH +: `BYTE_WIDTH]};
- shift_amount32[i-`VLENB*3/4] = rs1_data[0 +: $clog2(`BYTE_WIDTH)];
+ shift_amount32[i-`VLENB*3/4] = {2'b0,rs1_data[0 +: $clog2(`BYTE_WIDTH)]};
end
end
EEW16: begin
@@ -358,7 +358,7 @@
end
for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin
src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {{16{vs2_data[(i+1)*`HWORD_WIDTH-1]}},vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]};
- shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = rs1_data[0 +: $clog2(`HWORD_WIDTH)];
+ shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0,rs1_data[0 +: $clog2(`HWORD_WIDTH)]};
end
end
EEW32: begin
@@ -380,7 +380,7 @@
end
for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin
src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {16'b0,vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]};
- shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = rs1_data[0 +: $clog2(`HWORD_WIDTH)];
+ shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0, rs1_data[0 +: $clog2(`HWORD_WIDTH)]};
end
end
EEW32: begin
@@ -402,7 +402,7 @@
end
for(int i=`VLEN/`HWORD_WIDTH/2;i<`VLEN/`HWORD_WIDTH;i=i+1) begin
src2_data32[ i-`VLEN/`HWORD_WIDTH/2] = {{16{vs2_data[(i+1)*`HWORD_WIDTH-1]}},vs2_data[i*`HWORD_WIDTH +: `HWORD_WIDTH]};
- shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = rs1_data[0 +: $clog2(`HWORD_WIDTH)];
+ shift_amount32[i-`VLEN/`HWORD_WIDTH/2] = {1'b0,rs1_data[0 +: $clog2(`HWORD_WIDTH)]};
end
end
EEW32: begin
diff --git a/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv b/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv
index ee86f5f..61b3e2c 100644
--- a/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv
@@ -3616,17 +3616,14 @@
uop_index_max = 'b0;
case(emul_max)
- EMUL1: begin
- uop_index_max = 'b0;
- end
EMUL2: begin
- uop_index_max = 'd1;
+ uop_index_max = (`UOP_INDEX_WIDTH)'('d1);
end
EMUL4: begin
- uop_index_max = 'd3;
+ uop_index_max = (`UOP_INDEX_WIDTH)'('d3);
end
EMUL8: begin
- uop_index_max = 'd7;
+ uop_index_max = (`UOP_INDEX_WIDTH)'('d7);
end
endcase
end
diff --git a/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv b/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv
index e4af58f..a940fe2 100644
--- a/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv
@@ -3049,7 +3049,7 @@
// calculate the uop_index used in decoding uops
generate
for(j=0;j<`NUM_DE_UOP;j++) begin: GET_UOP_INDEX
- assign uop_index_current[j] = j[`UOP_INDEX_WIDTH-1:0]+uop_index_base;
+ assign uop_index_current[j] = (`UOP_INDEX_WIDTH+1)'(j[`UOP_INDEX_WIDTH-1:0]+uop_index_base);
end
endgenerate
@@ -3061,29 +3061,26 @@
uop_index_max = 'b0;
case(emul_max)
- EMUL1: begin
- uop_index_max = 'd0;
- end
EMUL2: begin
- uop_index_max = 'd1;
+ uop_index_max = (`UOP_INDEX_WIDTH)'('d1);
end
EMUL3: begin
- uop_index_max = 'd2;
+ uop_index_max = (`UOP_INDEX_WIDTH)'('d2);
end
EMUL4: begin
- uop_index_max = 'd3;
+ uop_index_max = (`UOP_INDEX_WIDTH)'('d3);
end
EMUL5: begin
- uop_index_max = 'd4;
+ uop_index_max = (`UOP_INDEX_WIDTH)'('d4);
end
EMUL6: begin
- uop_index_max = 'd5;
+ uop_index_max = (`UOP_INDEX_WIDTH)'('d5);
end
EMUL7: begin
- uop_index_max = 'd6;
+ uop_index_max = (`UOP_INDEX_WIDTH)'('d6);
end
EMUL8: begin
- uop_index_max = 'd7;
+ uop_index_max = (`UOP_INDEX_WIDTH)'('d7);
end
endcase
end
diff --git a/hdl/verilog/rvv/design/rvv_backend_dispatch.sv b/hdl/verilog/rvv/design/rvv_backend_dispatch.sv
index ed76dc5..e7ef053 100755
--- a/hdl/verilog/rvv/design/rvv_backend_dispatch.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_dispatch.sv
@@ -150,7 +150,7 @@
generate
for (i=0; i<`NUM_DP_UOP; i++) begin : gen_vlmax
assign vlmax_shift[i] ={1'b0, uop_uop2dp[i].vector_csr.lmul[1:0]} + $clog2(`VLENB) - uop_uop2dp[i].vector_csr.sew - {uop_uop2dp[i].vector_csr.lmul[2],2'b00};
- assign vlmax[i] = 'h1 << vlmax_shift[i];
+ assign vlmax[i] = (`VL_WIDTH)'(1) << vlmax_shift[i];
end
endgenerate
diff --git a/hdl/verilog/rvv/design/rvv_backend_dispatch_opr_byte_type.sv b/hdl/verilog/rvv/design/rvv_backend_dispatch_opr_byte_type.sv
index 770243a..2db3a49 100644
--- a/hdl/verilog/rvv/design/rvv_backend_dispatch_opr_byte_type.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_dispatch_opr_byte_type.sv
@@ -86,7 +86,7 @@
always_comb begin
case (uop_info.uop_exe_unit)
RDT:begin
- uop_vs2_start = uop_info.uop_index << (VLENB_WIDTH - vs2_eew_shift);
+ uop_vs2_start = (`VSTART_WIDTH)'(uop_info.uop_index) << (VLENB_WIDTH - vs2_eew_shift);
end
default:begin
case({eew_max,uop_info.vs2_eew})
@@ -94,16 +94,16 @@
{EEW16,EEW16},
{EEW8,EEW8}: begin
// regular and narrowing instruction
- uop_vs2_start = uop_info.uop_index << (VLENB_WIDTH - vs2_eew_shift);
+ uop_vs2_start = (`VSTART_WIDTH)'(uop_info.uop_index) << (VLENB_WIDTH - vs2_eew_shift);
end
{EEW32,EEW16},
{EEW16,EEW8}: begin
// widening instruction: EEW_vd:EEW_vs = 2:1
- uop_vs2_start = uop_info.uop_index[`UOP_INDEX_WIDTH-1:1] << (VLENB_WIDTH - vs2_eew_shift);
+ uop_vs2_start = (`VSTART_WIDTH)'(uop_info.uop_index[`UOP_INDEX_WIDTH-1:1]) << (VLENB_WIDTH - vs2_eew_shift);
end
{EEW32,EEW8}: begin
// widening instruction: EEW_vd:EEW_vs = 4:1
- uop_vs2_start = uop_info.uop_index[`UOP_INDEX_WIDTH-1:2] << (VLENB_WIDTH - vs2_eew_shift);
+ uop_vs2_start = (`VSTART_WIDTH)'(uop_info.uop_index[`UOP_INDEX_WIDTH-1:2]) << (VLENB_WIDTH - vs2_eew_shift);
end
default: begin
uop_vs2_start = 'b0;
@@ -118,7 +118,7 @@
for (i=0; i<`VLENB; i++) begin : gen_vs2_byte_type
// ele_index = uop_index * (VLEN/vs2_eew) + BYTE_INDEX[MSB:vs2_eew]
assign vs2_enable[i] = uop_info.vm ? 1'b1 : vs2_enable_tmp[i >> vs2_eew_shift];
- assign vs2_ele_index[i] = uop_vs2_start + (i >> vs2_eew_shift);
+ assign vs2_ele_index[i] = (`VL_WIDTH)'(uop_vs2_start) + (i >> vs2_eew_shift);
always_comb begin
if (uop_info.ignore_vta&uop_info.ignore_vma)
vs2[i] = BODY_ACTIVE;
@@ -127,8 +127,7 @@
else if (vs2_ele_index[i] < {1'b0, uop_info.vstart})
vs2[i] = NOT_CHANGE; // prestart
else begin
- vs2[i] = (vs2_enable[i] || uop_info.ignore_vma) ? BODY_ACTIVE
- : BODY_INACTIVE;
+ vs2[i] = (vs2_enable[i] || uop_info.ignore_vma) ? BODY_ACTIVE : BODY_INACTIVE;
end
end
end
@@ -150,21 +149,21 @@
{EEW32,EEW32},
{EEW16,EEW16},
{EEW8,EEW8}: begin
- uop_v0_start = uop_info.uop_index << (VLENB_WIDTH - vd_eew_shift);
+ uop_v0_start = (`VSTART_WIDTH)'(uop_info.uop_index) << (VLENB_WIDTH - vd_eew_shift);
uop_vd_start = uop_v0_start;
uop_vd_end = uop_vd_start + (`VLENB >> eew_max_shift) - 1'b1;
end
{EEW32,EEW16},
{EEW16,EEW8}: begin
// narrowing instruction: EEW_vd:EEW_vs = 1:2
- uop_v0_start = uop_info.uop_index[`UOP_INDEX_WIDTH-1:1] << (VLENB_WIDTH - vd_eew_shift);
+ uop_v0_start = (`VSTART_WIDTH)'(uop_info.uop_index[`UOP_INDEX_WIDTH-1:1]) << (VLENB_WIDTH - vd_eew_shift);
uop_vd_start = uop_info.uop_index[0] ? uop_v0_start + (`VLENB >> eew_max_shift):
uop_v0_start;
uop_vd_end = uop_vd_start + (`VLENB >> eew_max_shift) - 1'b1 ;
end
{EEW32,EEW8}: begin
// narrowing instruction: EEW_vd:EEW_vs = 1:4
- uop_v0_start = uop_info.uop_index[`UOP_INDEX_WIDTH-1:2] << VLENB_WIDTH;
+ uop_v0_start = (`VSTART_WIDTH)'(uop_info.uop_index[`UOP_INDEX_WIDTH-1:2]) << VLENB_WIDTH;
case(uop_info.uop_index[1:0])
2'd3: begin
uop_vd_start = uop_v0_start + `VLENB*3/4;
@@ -195,7 +194,7 @@
if (i==0) begin
// ele_index = uop_index * (VLEN/vd_eew) + BYTE_INDEX[MSB:vd_eew]
assign vd_enable[0] = uop_info.vm ? 1'b1 : vd_enable_tmp[0];
- assign vd_ele_index[0] = uop_v0_start;
+ assign vd_ele_index[0] = (`VL_WIDTH)'(uop_v0_start);
always_comb begin
v0_strobe[0] = 'b0;
@@ -228,7 +227,7 @@
end else begin
// ele_index = uop_index * (VLEN/vd_eew) + BYTE_INDEX[MSB:vd_eew]
assign vd_enable[i] = uop_info.vm ? 1'b1 : vd_enable_tmp[i >> vd_eew_shift];
- assign vd_ele_index[i] = uop_v0_start + (i >> vd_eew_shift);
+ assign vd_ele_index[i] = (`VL_WIDTH)'(uop_v0_start) + (i >> vd_eew_shift);
always_comb begin
v0_strobe[i] = 'b0;
diff --git a/hdl/verilog/rvv/design/rvv_backend_div_unit.sv b/hdl/verilog/rvv/design/rvv_backend_div_unit.sv
index 77a66bd..d60697e 100755
--- a/hdl/verilog/rvv/design/rvv_backend_div_unit.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_div_unit.sv
@@ -319,7 +319,7 @@
for(j=0;j<`VLENB/2;j++) begin: DIVIDER8
rvv_backend_div_unit_divider
#(
- .DIV_WIDTH (`BYTE_WIDTH)
+ .DIV_WIDTH (8'd`BYTE_WIDTH)
)
divider_8bit
(
@@ -342,7 +342,7 @@
for(j=0;j<`VLEN/`HWORD_WIDTH/2;j++) begin: DIVIDER16
rvv_backend_div_unit_divider
#(
- .DIV_WIDTH (`HWORD_WIDTH)
+ .DIV_WIDTH (8'd`HWORD_WIDTH)
)
divider_16bit
(
@@ -365,7 +365,7 @@
for(j=0;j<`VLEN/`WORD_WIDTH;j++) begin: DIVIDER32
rvv_backend_div_unit_divider
#(
- .DIV_WIDTH (`WORD_WIDTH)
+ .DIV_WIDTH (8'd`WORD_WIDTH)
)
divider_32bit
(
diff --git a/hdl/verilog/rvv/design/rvv_backend_div_unit_divider.sv b/hdl/verilog/rvv/design/rvv_backend_div_unit_divider.sv
index a25a32a..336feba 100755
--- a/hdl/verilog/rvv/design/rvv_backend_div_unit_divider.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_div_unit_divider.sv
@@ -20,16 +20,13 @@
result_quotient,
result_remainder,
result_valid,
-`ifdef TB_SUPPORT
- res_reuse_valid_p1,
-`endif
result_ready,
trap_flush_rvv
);
//
// parameter
//
- parameter DIV_WIDTH = `WORD_WIDTH;
+ parameter logic[7:0] DIV_WIDTH = `WORD_WIDTH;
//
// interface signals
@@ -50,9 +47,6 @@
output logic [DIV_WIDTH-1:0] result_quotient;
output logic [DIV_WIDTH-1:0] result_remainder;
output logic result_valid;
-`ifdef TB_SUPPORT
- output logic res_reuse_valid_p1;
-`endif
input logic result_ready;
// trap-flush
@@ -202,19 +196,6 @@
.q (r_sgn_q)
);
-`ifdef TB_SUPPORT
- always_ff @(posedge clk, negedge rst_n) begin
- if(rst_n=='b0)
- res_reuse_valid_p1 = 'b0;
- else if(next_state==DIV_IDLE)
- res_reuse_valid_p1 = 'b0;
- else if((state==DIV_IDLE)&div_valid)
- res_reuse_valid_p1 = res_reuse_valid_p0;
- else
- res_reuse_valid_p1 = res_reuse_valid_p1;
- end
-`endif
-
//
// FSM
//
@@ -266,17 +247,17 @@
endcase
end
- // computational logic in every state
+ // count leading zero
generate
- if (DIV_WIDTH==`WORD_WIDTH) begin
+ if (DIV_WIDTH== 'd`WORD_WIDTH) begin
assign clzb = f_clzb32(dividend_d);
assign count_shift = 'd33 - clzb;
end
- else if (DIV_WIDTH==`HWORD_WIDTH) begin
+ else if (DIV_WIDTH== 'd`HWORD_WIDTH) begin
assign clzb = f_clzb16(dividend_d);
assign count_shift = 'd17 - clzb;
end
- else if (DIV_WIDTH==`BYTE_WIDTH) begin
+ else if (DIV_WIDTH== 'd`BYTE_WIDTH) begin
assign clzb = f_clzb8(dividend_d);
assign count_shift = 'd9 - clzb;
end
diff --git a/hdl/verilog/rvv/design/rvv_backend_mac_unit.sv b/hdl/verilog/rvv/design/rvv_backend_mac_unit.sv
index d356171..6ae4f74 100644
--- a/hdl/verilog/rvv/design/rvv_backend_mac_unit.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_mac_unit.sv
@@ -726,12 +726,12 @@
mac_rslt_full_eew8_d1[i*4+j][7+:8] + {7'b0,vsmul_round_incr_eew8_d1[i*4+j]};//right shift 7bit then +"1"
vsmul_sat_eew8_d1[i*4+j] = mac_rslt_full_eew8_d1[i*4+j][15:14] == 2'b01;
//Below are for vmac related instructions
- vmac_mul_add_eew8_no_widen_d1[i*4+j] = mac_addsrc_d1[8*(i*4+j) +: 8] + mac_rslt_eew8_no_widen_d1[8*(i*4+j) +: 8];//9bit
- vmac_mul_sub_eew8_no_widen_d1[i*4+j] = mac_addsrc_d1[8*(i*4+j) +: 8] - mac_rslt_eew8_no_widen_d1[8*(i*4+j) +: 8];
+ vmac_mul_add_eew8_no_widen_d1[i*4+j] = {1'b0,mac_addsrc_d1[8*(i*4+j) +: 8]} + {1'b0,mac_rslt_eew8_no_widen_d1[8*(i*4+j) +: 8]};//9bit
+ vmac_mul_sub_eew8_no_widen_d1[i*4+j] = {1'b0,mac_addsrc_d1[8*(i*4+j) +: 8]} - {1'b0,mac_rslt_eew8_no_widen_d1[8*(i*4+j) +: 8]};
vmac_rslt_eew8_no_widen_d1[8*(i*4+j) +:8] = mac_mul_reverse_d1 ? vmac_mul_sub_eew8_no_widen_d1[i*4+j][7:0] :
vmac_mul_add_eew8_no_widen_d1[i*4+j][7:0];
- vmac_mul_add_eew8_widen_d1[i*4+j] = mac_addsrc_widen_d1[16*(i*4+j) +: 16] + mac_rslt_eew8_widen_d1[16*(i*4+j) +: 16];//17bit
- vmac_mul_sub_eew8_widen_d1[i*4+j] = mac_addsrc_widen_d1[16*(i*4+j) +: 16] - mac_rslt_eew8_widen_d1[16*(i*4+j) +: 16];
+ vmac_mul_add_eew8_widen_d1[i*4+j] = {1'b0,mac_addsrc_widen_d1[16*(i*4+j) +: 16]} + {1'b0,mac_rslt_eew8_widen_d1[16*(i*4+j) +: 16]};//17bit
+ vmac_mul_sub_eew8_widen_d1[i*4+j] = {1'b0,mac_addsrc_widen_d1[16*(i*4+j) +: 16]} - {1'b0,mac_rslt_eew8_widen_d1[16*(i*4+j) +: 16]};
vmac_rslt_eew8_widen_d1[16*(i*4+j) +: 16] = mac_mul_reverse_d1 ? vmac_mul_sub_eew8_widen_d1[i*4+j][15:0] :
vmac_mul_add_eew8_widen_d1[i*4+j][15:0];
end
@@ -765,12 +765,12 @@
mac_rslt_full_eew16_d1[i*2+j][15+:16] + {15'b0,vsmul_round_incr_eew16_d1[i*2+j]};//right shift 15bit then +"1"
vsmul_sat_eew16_d1[i*2+j] = mac_rslt_full_eew16_d1[i*2+j][31:30] == 2'b01;
//Below are for vmac related instructions
- vmac_mul_add_eew16_no_widen_d1[i*2+j] = mac_addsrc_d1[16*(i*2+j) +: 16] + mac_rslt_eew16_no_widen_d1[16*(i*2+j) +: 16];//17bit
- vmac_mul_sub_eew16_no_widen_d1[i*2+j] = mac_addsrc_d1[16*(i*2+j) +: 16] - mac_rslt_eew16_no_widen_d1[16*(i*2+j) +: 16];
+ vmac_mul_add_eew16_no_widen_d1[i*2+j] = {1'b0,mac_addsrc_d1[16*(i*2+j) +: 16]} + {1'b0,mac_rslt_eew16_no_widen_d1[16*(i*2+j) +: 16]};//17bit
+ vmac_mul_sub_eew16_no_widen_d1[i*2+j] = {1'b0,mac_addsrc_d1[16*(i*2+j) +: 16]} - {1'b0,mac_rslt_eew16_no_widen_d1[16*(i*2+j) +: 16]};
vmac_rslt_eew16_no_widen_d1[16*(i*2+j) +:16] = mac_mul_reverse_d1 ? vmac_mul_sub_eew16_no_widen_d1[i*2+j][15:0] :
vmac_mul_add_eew16_no_widen_d1[i*2+j][15:0];
- vmac_mul_add_eew16_widen_d1[i*2+j] = mac_addsrc_widen_d1[32*(i*2+j) +: 32] + mac_rslt_eew16_widen_d1[32*(i*2+j) +: 32];//33bit
- vmac_mul_sub_eew16_widen_d1[i*2+j] = mac_addsrc_widen_d1[32*(i*2+j) +: 32] - mac_rslt_eew16_widen_d1[32*(i*2+j) +: 32];
+ vmac_mul_add_eew16_widen_d1[i*2+j] = {1'b0,mac_addsrc_widen_d1[32*(i*2+j) +: 32]} + {1'b0,mac_rslt_eew16_widen_d1[32*(i*2+j) +: 32]};//33bit
+ vmac_mul_sub_eew16_widen_d1[i*2+j] = {1'b0,mac_addsrc_widen_d1[32*(i*2+j) +: 32]} - {1'b0,mac_rslt_eew16_widen_d1[32*(i*2+j) +: 32]};
vmac_rslt_eew16_widen_d1[32*(i*2+j) +: 32] = mac_mul_reverse_d1 ? vmac_mul_sub_eew16_widen_d1[i*2+j][31:0] :
vmac_mul_add_eew16_widen_d1[i*2+j][31:0];
end
@@ -822,12 +822,12 @@
mac_rslt_full_eew32_d1[i][31+:32] + {31'b0,vsmul_round_incr_eew32_d1[i]};//right shift 31bit then +"1"
vsmul_sat_eew32_d1[i] = mac_rslt_full_eew32_d1[i][63:62] == 2'b01;
//Below are for vmac related instructions
- vmac_mul_add_eew32_no_widen_d1[i] = mac_addsrc_d1[32*i +: 32] + mac_rslt_eew32_no_widen_d1[32*i +: 32];//33bit
- vmac_mul_sub_eew32_no_widen_d1[i] = mac_addsrc_d1[32*i +: 32] - mac_rslt_eew32_no_widen_d1[32*i +: 32];
+ vmac_mul_add_eew32_no_widen_d1[i] = {1'b0,mac_addsrc_d1[32*i +: 32]} + {1'b0,mac_rslt_eew32_no_widen_d1[32*i +: 32]};//33bit
+ vmac_mul_sub_eew32_no_widen_d1[i] = {1'b0,mac_addsrc_d1[32*i +: 32]} - {1'b0,mac_rslt_eew32_no_widen_d1[32*i +: 32]};
vmac_rslt_eew32_no_widen_d1[32*i +:32] = mac_mul_reverse_d1 ? vmac_mul_sub_eew32_no_widen_d1[i][31:0] :
vmac_mul_add_eew32_no_widen_d1[i][31:0];
- vmac_mul_add_eew32_widen_d1[i] = mac_addsrc_widen_d1[64*i +: 64] + mac_rslt_eew32_widen_d1[64*i +: 64];//65bit
- vmac_mul_sub_eew32_widen_d1[i] = mac_addsrc_widen_d1[64*i +: 64] - mac_rslt_eew32_widen_d1[64*i +: 64];
+ vmac_mul_add_eew32_widen_d1[i] = {1'b0,mac_addsrc_widen_d1[64*i +: 64]} + {1'b0,mac_rslt_eew32_widen_d1[64*i +: 64]};//65bit
+ vmac_mul_sub_eew32_widen_d1[i] = {1'b0,mac_addsrc_widen_d1[64*i +: 64]} - {1'b0,mac_rslt_eew32_widen_d1[64*i +: 64]};
vmac_rslt_eew32_widen_d1[64*i +: 64] = mac_mul_reverse_d1 ? vmac_mul_sub_eew32_widen_d1[i][63:0] :
vmac_mul_add_eew32_widen_d1[i][63:0];
end
diff --git a/hdl/verilog/rvv/design/rvv_backend_mul_unit_mul8.sv b/hdl/verilog/rvv/design/rvv_backend_mul_unit_mul8.sv
index c291c85..360cfce 100644
--- a/hdl/verilog/rvv/design/rvv_backend_mul_unit_mul8.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_mul_unit_mul8.sv
@@ -24,7 +24,7 @@
wire [8:0] in0_int = {in0_is_signed&in0[7],in0};
wire [8:0] in1_int = {in1_is_signed&in1[7],in1};
-wire [17:0] out_int = $signed(in0_int)*$signed(in1_int);
+wire [17:0] out_int = {{9{in0_int[8]}},in0_int} * {{9{in1_int[8]}},in1_int};
assign out = out_int[0+:16];
diff --git a/hdl/verilog/rvv/design/rvv_backend_pmtrdt_unit.sv b/hdl/verilog/rvv/design/rvv_backend_pmtrdt_unit.sv
index 862e469..09417e4 100644
--- a/hdl/verilog/rvv/design/rvv_backend_pmtrdt_unit.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_pmtrdt_unit.sv
@@ -138,6 +138,7 @@
logic pmt_go, pmt_go_q; // start to execute pmt inst when all uop(s) are in RS
logic [`UOP_INDEX_WIDTH-1:0] pmt_uop_done_cnt_d, pmt_uop_done_cnt_q;
logic [`VLENB-1:0][`XLEN+1:0] offset;
+ logic [`VLENB-1:0][`XLEN+1:0] slide_down_offset;
logic [`VLENB-1:0] sel_scalar;
BYTE_TYPE_t vd_type;
logic [`VLMAX_MAX-1:0][7:0] pmt_vs2_data, pmt_vs3_data;
@@ -2244,12 +2245,12 @@
// cmp_res_d/cmp_res_q
always_comb begin
case (pmtrdt_uop.vs2_eew)
- EEW32: cmp_res_en = {'0, 1'b1} << cmp_res_en_offset;
- EEW16: cmp_res_en = {'0, 2'b11} << cmp_res_en_offset;
- default: cmp_res_en = {'0, 4'b1111} << cmp_res_en_offset;
+ EEW32: cmp_res_en = (2*`VLENB)'('b1) << cmp_res_en_offset;
+ EEW16: cmp_res_en = (2*`VLENB)'('b11) << cmp_res_en_offset;
+ default: cmp_res_en = (2*`VLENB)'('b1111) << cmp_res_en_offset;
endcase
end
- assign cmp_res_d = {'0, cmp_res} << cmp_res_offset;
+ assign cmp_res_d = (`VLEN)'(cmp_res) << cmp_res_offset;
for (i=0; i<(2*`VLENB); i++) begin
edff #(.T(logic[`VLEN/32-1:0])) cmp_res_reg (.q(cmp_res_q[`VLEN/32*i+:`VLEN/32]), .d(cmp_res_d[`VLEN/32*i+:`VLEN/32]), .e(cmp_res_en[i] & pmtrdt_uop_valid & pmtrdt_uop_ready), .clk(clk), .rst_n(rst_n));
end
@@ -2302,29 +2303,29 @@
SLIDE_UP:begin
if (pmtrdt_uop.uop_funct3 == OPMVX)
case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew
- EEW32:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i-4;
- EEW16:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i-2;
- default:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i-1;
+ EEW32:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i-4);
+ EEW16:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i-2);
+ default:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i-1);
endcase
else
case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew
- EEW32:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i - (4*pmtrdt_uop.rs1_data);
- EEW16:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i - (2*pmtrdt_uop.rs1_data);
- default:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+ i - pmtrdt_uop.rs1_data;
+ EEW32:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i - (4*pmtrdt_uop.rs1_data));
+ EEW16:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i - (2*pmtrdt_uop.rs1_data));
+ default:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+ i - pmtrdt_uop.rs1_data);
endcase
end
SLIDE_DOWN:begin
if (pmtrdt_uop.uop_funct3 == OPMVX)
case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew
- EEW32:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i+4;
- EEW16:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i+2;
- default:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i+1;
+ EEW32:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i+4);
+ EEW16:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i+2);
+ default:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB+i+1);
endcase
else
case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew
- EEW32:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i + (4*pmtrdt_uop.rs1_data);
- EEW16:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i + (2*pmtrdt_uop.rs1_data);
- default:offset[i] = uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i + pmtrdt_uop.rs1_data;
+ EEW32:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i + (4*pmtrdt_uop.rs1_data));
+ EEW16:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i + (2*pmtrdt_uop.rs1_data));
+ default:offset[i] = (`XLEN+2)'(uop_data[pmt_uop_done_cnt_q].uop_index*`VLENB + i + pmtrdt_uop.rs1_data);
endcase
end
GATHER:begin
@@ -2332,27 +2333,27 @@
OPIVX,
OPIVI:begin
case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew
- EEW32:offset[i] = i%4 + {pmtrdt_uop.rs1_data,2'b0};
- EEW16:offset[i] = i%2 + {pmtrdt_uop.rs1_data,1'b0};
- default:offset[i] = pmtrdt_uop.rs1_data;
+ EEW32:offset[i] = (`XLEN+2)'(i%4 + {pmtrdt_uop.rs1_data,2'b0});
+ EEW16:offset[i] = (`XLEN+2)'(i%2 + {pmtrdt_uop.rs1_data,1'b0});
+ default:offset[i] = (`XLEN+2)'(pmtrdt_uop.rs1_data);
endcase
end
default:begin
case (pmtrdt_uop.vs1_eew)
- EEW32: offset[i] = i%4 + (4*{{(`XLEN-32){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB/4+i/4)/(`VLENB/4)].vs1_data[32*((i/4)%(`VLENB/4))+:32]});
+ EEW32: offset[i] = (`XLEN+2)'(i%4 + (4*{{(`XLEN-32){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB/4+i/4)/(`VLENB/4)].vs1_data[32*((i/4)%(`VLENB/4))+:32]}));
EEW16: begin
case (pmtrdt_uop.vs2_eew) // vrgatherei16
- EEW32:offset[i] = i%4 + (4*{{(`XLEN-16){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB/4+i/4)/(`VLENB/4)].vs1_data[16*((pmt_uop_done_cnt_q*`VLENB/4+i/4)%(`VLENB/2))+:16]});
- EEW16:offset[i] = i%2 + (2*{{(`XLEN-16){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB/2+i/2)/(`VLENB/2)].vs1_data[16*((i/2)%(`VLENB/2))+:16]});
- default:offset[i] = {{(`XLEN-16){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB+i)/(`VLENB)].vs1_data[16*(i%(`VLENB/2))+:16]};
+ EEW32:offset[i] = (`XLEN+2)'(i%4 + (4*{{(`XLEN-16){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB/4+i/4)/(`VLENB/4)].vs1_data[16*((pmt_uop_done_cnt_q*`VLENB/4+i/4)%(`VLENB/2))+:16]}));
+ EEW16:offset[i] = (`XLEN+2)'(i%2 + (2*{{(`XLEN-16){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB/2+i/2)/(`VLENB/2)].vs1_data[16*((i/2)%(`VLENB/2))+:16]}));
+ default:offset[i] = (`XLEN+2)'({{(`XLEN-16){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB+i)/(`VLENB)].vs1_data[16*(i%(`VLENB/2))+:16]});
endcase
end
- default: offset[i] = {{(`XLEN-8){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB+i)/(`VLENB)].vs1_data[8*(i%(`VLENB))+:8]};
+ default: offset[i] = (`XLEN+2)'({{(`XLEN-8){1'b0}}, uop_data[(pmt_uop_done_cnt_q*`VLENB+i)/(`VLENB)].vs1_data[8*(i%(`VLENB))+:8]});
endcase
end
endcase
end
- default: offset[i] = i;
+ default: offset[i] = (`XLEN+2)'(i);
endcase
end
end
@@ -2366,18 +2367,21 @@
SLIDE_UP:begin
if (pmt_uop_done_cnt_q == 0)
case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew
- EEW32:sel_scalar = 'hF;
- EEW16:sel_scalar = 'h3;
- default:sel_scalar = 'h1;
+ EEW32:sel_scalar = (`VLENB)'('hF);
+ EEW16:sel_scalar = (`VLENB)'('h3);
+ default:sel_scalar = (`VLENB)'('h1);
endcase
else
sel_scalar = '0;
end
SLIDE_DOWN:begin
case (pmtrdt_uop.vs2_eew) // Permutation instruction: vd_eew == vs2_eew
- EEW32:sel_scalar = (uop_data[pmt_uop_done_cnt_q].uop_index+1'b1)*(`VLENB/4) >= rdt_ctrl.vl ? 'hF << ((rdt_ctrl.vl-1)%(`VLENB/4))*4 : '0;
- EEW16:sel_scalar = (uop_data[pmt_uop_done_cnt_q].uop_index+1'b1)*(`VLENB/2) >= rdt_ctrl.vl ? 'h3 << ((rdt_ctrl.vl-1)%(`VLENB/2))*2 : '0;
- default:sel_scalar = (uop_data[pmt_uop_done_cnt_q].uop_index+1'b1)*`VLENB >= rdt_ctrl.vl ? 'h1 << ((rdt_ctrl.vl-1)%(`VLENB))*1 : '0;
+ EEW32:sel_scalar = (uop_data[pmt_uop_done_cnt_q].uop_index+1'b1)*(`VLENB/4) >= rdt_ctrl.vl ?
+ (`VLENB)'('hF) << ((rdt_ctrl.vl-1)%(`VLENB/4))*4 : '0;
+ EEW16:sel_scalar = (uop_data[pmt_uop_done_cnt_q].uop_index+1'b1)*(`VLENB/2) >= rdt_ctrl.vl ?
+ (`VLENB)'('h3) << ((rdt_ctrl.vl-1)%(`VLENB/2))*2 : '0;
+ default:sel_scalar = (uop_data[pmt_uop_done_cnt_q].uop_index+1'b1)*`VLENB >= rdt_ctrl.vl ?
+ (`VLENB)'('h1) << ((rdt_ctrl.vl-1)%(`VLENB))*1 : '0;
endcase
end
default:sel_scalar = '0;
@@ -2415,34 +2419,29 @@
assign pmt_res_en = pmt_go;
for (i=0; i<`VLENB; i++) begin
always_comb begin
+ slide_down_offset[i] = offset[i]-(pmtrdt_uop.uop_index*`VLENB);
if (sel_scalar[i]) pmt_res_d[i] = pmt_rs1_data[8*(i%4)+:8];
else
case (pmt_ctrl.pmt_opr)
SLIDE_UP:begin
case (pmtrdt_uop.vs2_eew) // permutation instruction
- // TODO(derekjchow): Fix me
- // EEW32: pmt_res_d[i] = offset[i] >= 4*pmtrdt_uop.vlmax ? pmt_vs3_data[pmt_uop_done_cnt_q*`VLENB+i] : pmt_vs2_data[offset[i]];
- // EEW16: pmt_res_d[i] = offset[i] >= 2*pmtrdt_uop.vlmax ? pmt_vs3_data[pmt_uop_done_cnt_q*`VLENB+i] : pmt_vs2_data[offset[i]];
- // default: pmt_res_d[i] = offset[i] >= pmtrdt_uop.vlmax ? pmt_vs3_data[pmt_uop_done_cnt_q*`VLENB+i] : pmt_vs2_data[offset[i]];
- default: pmt_res_d[i] = 0;
+ EEW32: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(4*pmtrdt_uop.vlmax) ? pmt_vs3_data[pmt_uop_done_cnt_q*`VLENB+i] : pmt_vs2_data[offset[i][7:0]];
+ EEW16: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(2*pmtrdt_uop.vlmax) ? pmt_vs3_data[pmt_uop_done_cnt_q*`VLENB+i] : pmt_vs2_data[offset[i][7:0]];
+ default: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(pmtrdt_uop.vlmax) ? pmt_vs3_data[pmt_uop_done_cnt_q*`VLENB+i] : pmt_vs2_data[offset[i][7:0]];
endcase
end
SLIDE_DOWN:begin
case (pmtrdt_uop.vs2_eew)
- // TODO(derekjchow): Fix me
- // EEW32: pmt_res_d[i] = offset[i] >= 4*pmtrdt_uop.vlmax ? '0 : pmt_vs2_data[offset[i]-(pmtrdt_uop.uop_index*`VLENB)];
- // EEW16: pmt_res_d[i] = offset[i] >= 2*pmtrdt_uop.vlmax ? '0 : pmt_vs2_data[offset[i]-(pmtrdt_uop.uop_index*`VLENB)];
- // default: pmt_res_d[i] = offset[i] >= pmtrdt_uop.vlmax ? '0 : pmt_vs2_data[offset[i]-(pmtrdt_uop.uop_index*`VLENB)];
- default: pmt_res_d[i] = 0;
+ EEW32: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(4*pmtrdt_uop.vlmax) ? '0 : pmt_vs2_data[slide_down_offset[i][7:0]];
+ EEW16: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(2*pmtrdt_uop.vlmax) ? '0 : pmt_vs2_data[slide_down_offset[i][7:0]];
+ default: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(pmtrdt_uop.vlmax) ? '0 : pmt_vs2_data[slide_down_offset[i][7:0]];
endcase
end
default: begin
case (pmtrdt_uop.vs2_eew)
- // TODO(derekjchow): Fix me
- // EEW32: pmt_res_d[i] = offset[i] >= 4*pmtrdt_uop.vlmax ? '0 : pmt_vs2_data[offset[i]];
- // EEW16: pmt_res_d[i] = offset[i] >= 2*pmtrdt_uop.vlmax ? '0 : pmt_vs2_data[offset[i]];
- // default: pmt_res_d[i] = offset[i] >= pmtrdt_uop.vlmax ? '0 : pmt_vs2_data[offset[i]];
- default: pmt_res_d[i] = 0;
+ EEW32: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(4*pmtrdt_uop.vlmax) ? '0 : pmt_vs2_data[offset[i][7:0]];
+ EEW16: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(2*pmtrdt_uop.vlmax) ? '0 : pmt_vs2_data[offset[i][7:0]];
+ default: pmt_res_d[i] = offset[i] >= (`XLEN+2)'(pmtrdt_uop.vlmax) ? '0 : pmt_vs2_data[offset[i][7:0]];
endcase
end
endcase
@@ -2519,14 +2518,14 @@
// compress_res is driven by compress_value and compress_cnt.
always_comb begin
- if (pmtrdt_uop.first_uop_valid) compress_res_d = {'0, compress_value};
+ if (pmtrdt_uop.first_uop_valid) compress_res_d = (2*`VLENB*8)'(compress_value);
else compress_res_d = f_circular_shift(compress_value, compress_cnt_q);
end
// compress_res_en
always_comb begin
if (compress_ctrl_push)
- if (pmtrdt_uop.first_uop_valid) compress_res_en = {'0, f_pack_1s(compress_enable)};
+ if (pmtrdt_uop.first_uop_valid) compress_res_en = (2*`VLENB)'(f_pack_1s(compress_enable));
else compress_res_en = f_circular_en(compress_enable,compress_cnt_q);
else
compress_res_en = '0;
@@ -2681,7 +2680,7 @@
for (i=0; i<`VLENB; i++) results[i] = '1;
for (i=0; i<`VLENB; i++) begin
if (enables[i]) begin
- results[j] = i;
+ results[j] = (VLENB_WIDTH+1)'(i);
j++;
end
end
@@ -2699,7 +2698,7 @@
logic [1:0][`VLEN-1:0] result;
begin
value_tmp = value;
- {buf2,buf1,buf0} = value_tmp << (shift*8);
+ {buf2,buf1,buf0} = (3*`VLEN)'(value_tmp) << (shift*8);
result = shift[VLENB_WIDTH] ? {buf1, buf2} : {buf1,buf0};
f_circular_shift = result;
end
@@ -2733,7 +2732,7 @@
logic [1:0][`VLENB-1:0] result;
begin
value_pack_1s = f_pack_1s(value);
- {en2,en1,en0} = value_pack_1s << shift;
+ {en2,en1,en0} = (3*`VLENB)'(value_pack_1s) << shift;
result = shift[VLENB_WIDTH] ? {en1, en2} : {en1, en0};
f_circular_en = result;
end