Adjust vector register order in Decoder for segment load/store. Update rvv_backend_tb for lsu changes.
Change-Id: I60d55196d033b70f5e64baddf17ae2d40e96c574
diff --git a/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv b/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv
index 61b3e2c..dd4febd 100644
--- a/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv
@@ -102,15 +102,18 @@
logic [`NUM_DE_UOP-1:0] force_vta_agnostic;
logic [`NUM_DE_UOP-1:0] vm;
logic [`NUM_DE_UOP-1:0] v0_valid;
- logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vd_index;
+ logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vd_index;
+ logic [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-1:0] vd_offset;
EEW_e [`NUM_DE_UOP-1:0] vd_eew;
logic [`NUM_DE_UOP-1:0] vd_valid;
logic [`NUM_DE_UOP-1:0] vs3_valid;
- logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs1;
+ logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs1;
+ logic [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-1:0] vs1_offset;
EEW_e [`NUM_DE_UOP-1:0] vs1_eew;
logic [`NUM_DE_UOP-1:0] vs1_index_valid;
logic [`NUM_DE_UOP-1:0] vs1_opcode_valid;
- logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs2_index;
+ logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs2_index;
+ logic [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-1:0] vs2_offset;
EEW_e [`NUM_DE_UOP-1:0] vs2_eew;
logic [`NUM_DE_UOP-1:0] vs2_valid;
logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] rd_index;
@@ -4182,14 +4185,12 @@
end
end
- // update vd_index, eew and valid
+ // update vd_offset and valid
always_comb begin
- // initial
- vd_index = 'b0;
- vd_eew = EEW_NONE;
- vd_valid = 'b0;
-
- for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD
+ vd_offset = 'b0;
+ vd_valid = 'b0;
+
+ for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD_OFFSET
case(1'b1)
valid_opi: begin
case(funct6_ari.ari_funct6)
@@ -4212,9 +4213,8 @@
OPIVV,
OPIVX,
OPIVI: begin
- vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vd_valid[i] = 1'b1;
end
endcase
end
@@ -4230,9 +4230,8 @@
case(inst_funct3)
OPIVV,
OPIVX: begin
- vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vd_valid[i] = 1'b1;
end
endcase
end
@@ -4242,9 +4241,8 @@
case(inst_funct3)
OPIVX,
OPIVI: begin
- vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = 'b0;
+ vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
end
endcase
end
@@ -4258,9 +4256,8 @@
OPIVV,
OPIVX,
OPIVI: begin
- vd_index[i] = inst_vd;
- vd_eew[i] = eew_vd;
- vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+ vd_offset[i] = 'b0;
+ vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
end
endcase
end
@@ -4271,9 +4268,8 @@
case(inst_funct3)
OPIVV,
OPIVX: begin
- vd_index[i] = inst_vd;
- vd_eew[i] = eew_vd;
- vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+ vd_offset[i] = 'b0;
+ vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
end
endcase
end
@@ -4283,9 +4279,8 @@
case(inst_funct3)
OPIVX,
OPIVI: begin
- vd_index[i] = inst_vd;
- vd_eew[i] = eew_vd;
- vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+ vd_offset[i] = 'b0;
+ vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
end
endcase
end
@@ -4298,9 +4293,8 @@
OPIVV,
OPIVX,
OPIVI: begin
- vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]};
+ vd_valid[i] = 1'b1;
end
endcase
end
@@ -4308,9 +4302,8 @@
VWREDSUMU,
VWREDSUM: begin
if(inst_funct3==OPIVV) begin
- vd_index[i] = inst_vd;
- vd_eew[i] = eew_vd;
- vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+ vd_offset[i] = 'b0;
+ vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
end
end
@@ -4322,24 +4315,21 @@
{EMUL2,EMUL2},
{EMUL4,EMUL4},
{EMUL8,EMUL8}: begin
- vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vd_valid[i] = 1'b1;
end
{EMUL2,EMUL1},
{EMUL4,EMUL2},
{EMUL8,EMUL4}: begin
- vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]};
+ vd_valid[i] = 1'b1;
end
endcase
end
OPIVX,
OPIVI: begin
- vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vd_valid[i] = 1'b1;
end
endcase
end
@@ -4382,9 +4372,8 @@
case(inst_funct3)
OPMVV,
OPMVX: begin
- vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vd_valid[i] = 1'b1;
end
endcase
end
@@ -4393,9 +4382,8 @@
VCOMPRESS: begin
case(inst_funct3)
OPMVV: begin
- vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vd_valid[i] = 1'b1;
end
endcase
end
@@ -4405,9 +4393,8 @@
VSLIDE1DOWN: begin
case(inst_funct3)
OPMVX: begin
- vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vd_valid[i] = 1'b1;
end
endcase
end
@@ -4422,9 +4409,8 @@
VREDXOR: begin
case(inst_funct3)
OPMVV: begin
- vd_index[i] = inst_vd;
- vd_eew[i] = eew_vd;
- vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+ vd_offset[i] = 'b0;
+ vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
end
endcase
end
@@ -4439,9 +4425,8 @@
VMXNOR: begin
case(inst_funct3)
OPMVV: begin
- vd_index[i] = inst_vd;
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = 'b0;
+ vd_valid[i] = 1'b1;
end
endcase
end
@@ -4449,9 +4434,8 @@
VWXUNARY0: begin
case(inst_funct3)
OPMVX: begin
- vd_index[i] = inst_vd;
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = 'b0;
+ vd_valid[i] = 1'b1;
end
endcase
end
@@ -4463,15 +4447,13 @@
VMSBF,
VMSIF,
VMSOF: begin
- vd_index[i] = inst_vd;
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = 'b0;
+ vd_valid[i] = 1'b1;
end
VIOTA,
VID: begin
- vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vd_eew[i] = eew_vd;
- vd_valid[i] = 1'b1;
+ vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vd_valid[i] = 1'b1;
end
endcase
end
@@ -4483,6 +4465,14 @@
end
end
+ // update vd_index and eew
+ always_comb begin
+ for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD_OFFSET
+ vd_index[i] = inst_vd + {2'b0, vd_offset[i]};
+ vd_eew[i] = eew_vd;
+ end
+ end
+
// some uop need vd as the vs3 vector operand
always_comb begin
// initial
@@ -4610,14 +4600,12 @@
end
end
- // update vs1
+ // update vs1_offset and valid
always_comb begin
- // initial
- vs1 = 'b0;
- vs1_eew = EEW_NONE;
+ vs1_offset = 'b0;
vs1_index_valid = 'b0;
- for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS1
+ for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS1_OFFSET
case(inst_funct3)
OPIVV: begin
case(funct6_ari.ari_funct6)
@@ -4652,25 +4640,22 @@
VSSRL,
VSSRA,
VRGATHER: begin
- vs1[i] = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vs1_eew[i] = eew_vs1;
- vs1_index_valid[i] = 1'b1;
+ vs1_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vs1_index_valid[i] = 1'b1;
end
VNSRL,
VNSRA,
VNCLIPU,
VNCLIP: begin
- vs1[i] = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
- vs1_eew[i] = eew_vs1;
- vs1_index_valid[i] = 1'b1;
+ vs1_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]};
+ vs1_index_valid[i] = 1'b1;
end
VWREDSUMU,
VWREDSUM: begin
- vs1[i] = inst_vs1;
- vs1_eew[i] = eew_vs1;
- vs1_index_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+ vs1_offset[i] = 'b0;
+ vs1_index_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
end
VSLIDEUP_RGATHEREI16: begin
@@ -4679,16 +4664,14 @@
{EMUL2,EMUL2},
{EMUL4,EMUL4},
{EMUL8,EMUL8}: begin
- vs1[i] = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vs1_eew[i] = eew_vs1;
- vs1_index_valid[i] = 1'b1;
+ vs1_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vs1_index_valid[i] = 1'b1;
end
{EMUL2,EMUL1},
{EMUL4,EMUL2},
{EMUL8,EMUL4}: begin
- vs1[i] = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
- vs1_eew[i] = eew_vs1;
- vs1_index_valid[i] = 1'b1;
+ vs1_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]};
+ vs1_index_valid[i] = 1'b1;
end
endcase
end
@@ -4711,17 +4694,15 @@
VWMACCU,
VWMACC,
VWMACCSU: begin
- vs1[i] = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
- vs1_eew[i] = eew_vs1;
- vs1_index_valid[i] = 1'b1;
+ vs1_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]};
+ vs1_index_valid[i] = 1'b1;
end
VXUNARY0,
VWXUNARY0,
VMUNARY0: begin
- vs1[i] = inst_vs1; // vs1 is regarded as opcode
- vs1_eew[i] = eew_vs1;
- vs1_index_valid[i] = 'b0;
+ vs1_offset[i] = 'b0; // vs1 is regarded as opcode
+ vs1_index_valid[i] = 'b0;
end
VMUL,
@@ -4740,9 +4721,8 @@
VAADD,
VASUBU,
VASUB: begin
- vs1[i] = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vs1_eew[i] = eew_vs1;
- vs1_index_valid[i] = 1'b1;
+ vs1_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vs1_index_valid[i] = 1'b1;
end
// reduction
@@ -4754,9 +4734,8 @@
VREDAND,
VREDOR,
VREDXOR: begin
- vs1[i] = inst_vs1;
- vs1_eew[i] = eew_vs1;
- vs1_index_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
+ vs1_offset[i] = 'b0;
+ vs1_index_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max;
end
VMAND,
@@ -4767,16 +4746,14 @@
VMNOR,
VMORN,
VMXNOR: begin
- vs1[i] = inst_vs1;
- vs1_eew[i] = eew_vs1;
- vs1_index_valid[i] = 1'b1;
+ vs1_offset[i] = 'b0;
+ vs1_index_valid[i] = 1'b1;
end
VCOMPRESS: begin
if (uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_vstart) begin
- vs1[i] = inst_vs1;
- vs1_eew[i] = eew_vs1;
- vs1_index_valid[i] = 1'b1;
+ vs1_offset[i] = 'b0;
+ vs1_index_valid[i] = 1'b1;
end
end
endcase
@@ -4785,6 +4762,14 @@
end
end
+ // update vs1(index or opcode) and eew
+ always_comb begin
+ for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS1
+ vs1[i] = inst_vs1 + {2'b0, vs1_offset[i]};
+ vs1_eew[i] = eew_vs1;
+ end
+ end
+
// some uop will use vs1 field as an opcode to decode
always_comb begin
// initial
@@ -4830,14 +4815,13 @@
end
end
- // update vs2 index, eew and valid
+ // update vs2 offset and valid
always_comb begin
// initial
- vs2_index = 'b0;
- vs2_eew = EEW_NONE;
+ vs2_offset = 'b0;
vs2_valid = 'b0;
- for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2
+ for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2_OFFSET
case(1'b1)
valid_opi: begin
// OPI*
@@ -4869,9 +4853,8 @@
OPIVV,
OPIVX,
OPIVI: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -4890,9 +4873,8 @@
case(inst_funct3)
OPIVV,
OPIVX: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -4904,9 +4886,8 @@
case(inst_funct3)
OPIVX,
OPIVI: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -4917,8 +4898,7 @@
OPIVX,
OPIVI: begin
if(inst_vm==1'b0) begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vs2_eew[i] = eew_vs2;
+ vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
vs2_valid[i] = 1'b1;
end
end
@@ -4929,9 +4909,8 @@
VWREDSUM: begin
case(inst_funct3)
OPIVV: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -4944,24 +4923,21 @@
{EMUL2,EMUL2},
{EMUL4,EMUL4},
{EMUL8,EMUL8}: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vs2_eew[i] = eew_vs2;
+ vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
vs2_valid[i] = 1'b1;
end
{EMUL2,EMUL1},
{EMUL4,EMUL2},
{EMUL8,EMUL4}: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
- vs2_eew[i] = eew_vs2;
+ vs2_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]};
vs2_valid[i] = 1'b1;
end
endcase
end
OPIVX,
OPIVI: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -4984,9 +4960,8 @@
case(inst_funct3)
OPMVV,
OPMVX: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]};
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -5014,9 +4989,8 @@
case(inst_funct3)
OPMVV,
OPMVX: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -5028,20 +5002,17 @@
{EMUL1,EMUL1},
{EMUL2,EMUL1},
{EMUL4,EMUL1}: begin
- vs2_index[i] = inst_vs2;
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = 'b0;
+ vs2_valid[i] = 1'b1;
end
{EMUL4,EMUL2},
{EMUL8,EMUL4}: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]};
+ vs2_valid[i] = 1'b1;
end
{EMUL8,EMUL2}: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:2];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = {2'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:2]};
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -5051,9 +5022,8 @@
VWMACCUS: begin
case(inst_funct3)
OPMVX: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]};
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -5070,9 +5040,8 @@
VCOMPRESS: begin
case(inst_funct3)
OPMVV: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -5087,9 +5056,8 @@
VMXNOR: begin
case(inst_funct3)
OPMVV: begin
- vs2_index[i] = inst_vs2;
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = 'b0;
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -5102,9 +5070,8 @@
VMSIF,
VMSOF,
VIOTA: begin
- vs2_index[i] = inst_vs2;
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = 'b0;
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -5115,9 +5082,8 @@
VSLIDE1DOWN: begin
case(inst_funct3)
OPMVX: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -5127,6 +5093,14 @@
end
end
+ // update vs2 index and eew
+ always_comb begin
+ for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2
+ vs2_index[i] = inst_vs2 + {2'b0, vs2_offset[i]};
+ vs2_eew[i] = eew_vs2;
+ end
+ end
+
// update rd_index and valid
always_comb begin
// initial
diff --git a/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv b/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv
index a940fe2..c39222e 100644
--- a/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv
+++ b/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv
@@ -95,6 +95,7 @@
logic [`NUM_DE_UOP-1:0] vm;
logic [`NUM_DE_UOP-1:0] v0_valid;
logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vd_index;
+ logic [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-1:0] vd_offset;
EEW_e [`NUM_DE_UOP-1:0] vd_eew;
logic [`NUM_DE_UOP-1:0] vd_valid;
logic [`NUM_DE_UOP-1:0] vs3_valid;
@@ -103,6 +104,7 @@
logic [`NUM_DE_UOP-1:0] vs1_index_valid;
logic [`NUM_DE_UOP-1:0] vs1_opcode_valid;
logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs2_index;
+ logic [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-1:0] vs2_offset;
EEW_e [`NUM_DE_UOP-1:0] vs2_eew;
logic [`NUM_DE_UOP-1:0] vs2_valid;
logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] rd_index;
@@ -228,10 +230,11 @@
US_US,
US_FF: begin
case(inst_nf)
- // EMUL_vd = ceil( inst_funct3/csr_sew*csr_lmul )
- // emul_max_vd_vs2 = EMUL_vd
- // emul_vd_nf = EMUL_vd*NF
- // EMUL_max = NF*emul_max_vd_vs2
+ // emul_vd = ceil(inst_funct3/csr_sew*csr_lmul)
+ // emul_vs2: no emul_vs2 for unit
+ // emul_max_vd_vs2 = max(emul_vd,emul_vs2) = emul_vd
+ // emul_vd_nf = NF*emul_vd
+ // emul_max = NF*emul_max_vd_vs2
NF1: begin
case({inst_funct3,csr_sew})
// 1:1
@@ -1028,8 +1031,11 @@
CS: begin
case(inst_nf)
- // EMUL_vd = ceil( inst_funct3/csr_sew*csr_lmul )
- // EMUL_max = NF*EMUL_vd
+ // emul_vd = ceil(inst_funct3/csr_sew*csr_lmul)
+ // emul_vs2: no emul_vs2 for stride
+ // emul_max_vd_vs2 = max(emul_vd,emul_vs2) = emul_vd
+ // emul_vd_nf = NF*emul_vd
+ // emul_max = NF*emul_max_vd_vs2
NF1: begin
case({inst_funct3,csr_sew})
// 1:1
@@ -1782,10 +1788,11 @@
IU,
IO: begin
case(inst_nf)
- // EMUL_vd = ceil( csr_lmul )
- // EMUL_vs2 = ceil( inst_funct3/csr_sew*csr_lmul )
+ // emul_vd = ceil(csr_lmul)
+ // emul_vs2 = ceil(inst_funct3/csr_sew*csr_lmul)
// emul_max_vd_vs2 = max(EMUL_vd,EMUL_vs2)
- // EMUL_max = NF*emul_max_vd_vs2
+ // emul_vd_nf = NF*emul_vd
+ // emul_max = NF*emul_max_vd_vs2
NF1: begin
case({inst_funct3,csr_sew})
// 1:1
@@ -3233,63 +3240,194 @@
end
end
- // update vd_index and eew
+ // update vd_offset
always_comb begin
- for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD
+ for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD_OFFSET
// initial
- vd_index[i] = 'b0;
- vd_eew[i] = eew_vd;
+ vd_offset[i] = 'b0;
case(inst_funct6[2:0])
UNIT_STRIDE: begin
case(inst_umop)
US_REGULAR,
- US_FAULT_FIRST,
+ US_FAULT_FIRST: begin
+ case({inst_nf,emul_vd})
+ {NF2,EMUL4}: begin
+ case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0])
+ 3'd1 : vd_offset[i] = 3'd4;
+ 3'd2 : vd_offset[i] = 3'd1;
+ 3'd3 : vd_offset[i] = 3'd5;
+ 3'd4 : vd_offset[i] = 3'd2;
+ 3'd5 : vd_offset[i] = 3'd6;
+ 3'd6 : vd_offset[i] = 3'd3;
+ default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
+ end
+ {NF2,EMUL2}: begin
+ case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0])
+ 3'd1 : vd_offset[i] = 3'd2;
+ 3'd2 : vd_offset[i] = 3'd1;
+ default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
+ end
+ {NF3,EMUL2}: begin
+ case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0])
+ 3'd1 : vd_offset[i] = 3'd2;
+ 3'd2 : vd_offset[i] = 3'd4;
+ 3'd3 : vd_offset[i] = 3'd1;
+ 3'd4 : vd_offset[i] = 3'd3;
+ default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
+ end
+ {NF4,EMUL2}: begin
+ case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0])
+ 3'd1 : vd_offset[i] = 3'd2;
+ 3'd2 : vd_offset[i] = 3'd4;
+ 3'd3 : vd_offset[i] = 3'd6;
+ 3'd4 : vd_offset[i] = 3'd1;
+ 3'd5 : vd_offset[i] = 3'd3;
+ 3'd6 : vd_offset[i] = 3'd5;
+ default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
+ end
+ default:
+ vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
+ end
US_WHOLE_REGISTER: begin
- vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
end
US_MASK: begin
- vd_index[i] = inst_vd;
+ vd_offset[i] = 'b0;
end
endcase
end
CONSTANT_STRIDE: begin
- vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ case({inst_nf,emul_vd})
+ {NF2,EMUL4}: begin
+ case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0])
+ 3'd1 : vd_offset[i] = 3'd4;
+ 3'd2 : vd_offset[i] = 3'd1;
+ 3'd3 : vd_offset[i] = 3'd5;
+ 3'd4 : vd_offset[i] = 3'd2;
+ 3'd5 : vd_offset[i] = 3'd6;
+ 3'd6 : vd_offset[i] = 3'd3;
+ default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
+ end
+ {NF2,EMUL2}: begin
+ case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0])
+ 3'd1 : vd_offset[i] = 3'd2;
+ 3'd2 : vd_offset[i] = 3'd1;
+ default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
+ end
+ {NF3,EMUL2}: begin
+ case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0])
+ 3'd1 : vd_offset[i] = 3'd2;
+ 3'd2 : vd_offset[i] = 3'd4;
+ 3'd3 : vd_offset[i] = 3'd1;
+ 3'd4 : vd_offset[i] = 3'd3;
+ default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
+ end
+ {NF4,EMUL2}: begin
+ case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0])
+ 3'd1 : vd_offset[i] = 3'd2;
+ 3'd2 : vd_offset[i] = 3'd4;
+ 3'd3 : vd_offset[i] = 3'd6;
+ 3'd4 : vd_offset[i] = 3'd1;
+ 3'd5 : vd_offset[i] = 3'd3;
+ 3'd6 : vd_offset[i] = 3'd5;
+ default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
+ end
+ default:
+ vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
end
UNORDERED_INDEX,
ORDERED_INDEX: begin
- case({inst_funct3,csr_sew})
+ case({eew_vs2,eew_vd})
// EEW_vs2:EEW_vd=1:1
- {SEW_8,SEW8},
- {SEW_16,SEW16},
- {SEW_32,SEW32},
+ {EEW8,EEW8},
+ {EEW16,EEW16},
+ {EEW32,EEW32},
// 1:2
- {SEW_8,SEW16},
- {SEW_16,SEW32},
+ {EEW8,EEW16},
+ {EEW16,EEW32},
// 1:4
- {SEW_8,SEW32}: begin
- vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ {EEW8,EEW32}: begin
+ case({inst_nf,emul_vd})
+ {NF2,EMUL4}: begin
+ case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0])
+ 3'd1 : vd_offset[i] = 3'd4;
+ 3'd2 : vd_offset[i] = 3'd1;
+ 3'd3 : vd_offset[i] = 3'd5;
+ 3'd4 : vd_offset[i] = 3'd2;
+ 3'd5 : vd_offset[i] = 3'd6;
+ 3'd6 : vd_offset[i] = 3'd3;
+ default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
+ end
+ {NF2,EMUL2}: begin
+ case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0])
+ 3'd1 : vd_offset[i] = 3'd2;
+ 3'd2 : vd_offset[i] = 3'd1;
+ default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
+ end
+ {NF3,EMUL2}: begin
+ case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0])
+ 3'd1 : vd_offset[i] = 3'd2;
+ 3'd2 : vd_offset[i] = 3'd4;
+ 3'd3 : vd_offset[i] = 3'd1;
+ 3'd4 : vd_offset[i] = 3'd3;
+ default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
+ end
+ {NF4,EMUL2}: begin
+ case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0])
+ 3'd1 : vd_offset[i] = 3'd2;
+ 3'd2 : vd_offset[i] = 3'd4;
+ 3'd3 : vd_offset[i] = 3'd6;
+ 3'd4 : vd_offset[i] = 3'd1;
+ 3'd5 : vd_offset[i] = 3'd3;
+ 3'd6 : vd_offset[i] = 3'd5;
+ default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
+ end
+ default:
+ vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
+ endcase
end
// 2:1
- {SEW_16,SEW8},
- {SEW_32,SEW16},
+ {EEW16,EEW8},
+ {EEW32,EEW16},
// 4:1
- {SEW_32,SEW8}: begin
+ {EEW32,EEW8}: begin
case({emul_vs2,emul_vd})
- {EMUL1,EMUL1}: begin
- vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
- end
+ {EMUL1,EMUL1}:
+ vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0];
{EMUL2,EMUL1},
- {EMUL4,EMUL2},
- {EMUL8,EMUL4}: begin
- vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:1];
+ {EMUL8,EMUL4}:
+ vd_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]};
+ {EMUL4,EMUL2}: begin
+ if (inst_nf==NF2) begin
+ case(uop_index_current[i][`UOP_INDEX_WIDTH-1:1])
+ 2'd1 : vd_offset[i] = 3'd2;
+ 2'd2 : vd_offset[i] = 3'd1;
+ default: vd_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]};
+ endcase
+ end
+ else
+ vd_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]};
end
{EMUL4,EMUL1},
- {EMUL8,EMUL2}: begin
- vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:2];
- end
+ {EMUL8,EMUL2}:
+ vd_offset[i] = {2'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:2]};
endcase
end
endcase
@@ -3298,6 +3436,14 @@
end
end
+ // update vd_index and eew
+ always_comb begin
+ for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD
+ vd_index[i] = inst_vd + {2'b0, vd_offset[i]};
+ vd_eew[i] = eew_vd;
+ end
+ end
+
// update vd_valid and vs3_valid
// some uop need vd as the vs3 vector operand
always_comb begin
@@ -3330,83 +3476,126 @@
end
end
- // update vs2 index, eew and valid
+ // update vs2 offset and valid
always_comb begin
// initial
- vs2_index = 'b0;
- vs2_eew = EEW_NONE;
- vs2_valid = 'b0;
+ vs2_offset = 'b0;
+ vs2_valid = 'b0;
- for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2
+ for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2_OFFSET
case(inst_funct6[2:0])
UNORDERED_INDEX,
ORDERED_INDEX: begin
- case({inst_funct3,csr_sew})
+ case({eew_vs2,eew_vd})
// EEW_vs2:EEW_vd=1:1
- {SEW_8,SEW8},
- {SEW_16,SEW16},
- {SEW_32,SEW32},
- // 2:1
- {SEW_16,SEW8},
- {SEW_32,SEW16},
- // 4:1
- {SEW_32,SEW8}: begin
+ {EEW8,EEW8},
+ {EEW16,EEW16},
+ {EEW32,EEW32}: begin
case(emul_vs2)
- EMUL1: begin
- vs2_index[i] = inst_vs2;
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
- end
EMUL2: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][0];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ case(inst_nf)
+ NF2: vs2_offset[i] = {2'b0, uop_index_current[i][1]};
+ NF3: vs2_offset[i] = (uop_index_current[i]>='d3) ? 3'd1 : 3'b0;
+ NF4: vs2_offset[i] = {2'b0, uop_index_current[i][2]};
+ default: vs2_offset[i] = {2'b0, uop_index_current[i][0]};
+ endcase
+ vs2_valid[i] = 1'b1;
end
EMUL4: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][1:0];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = (inst_nf==NF2) ? {1'b0, uop_index_current[i][2:1]} : {1'b0, uop_index_current[i][1:0]};
+ vs2_valid[i] = 1'b1;
end
EMUL8: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][2:0];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = uop_index_current[i][2:0];
+ vs2_valid[i] = 1'b1;
+ end
+ default: begin //EMUL1
+ vs2_offset[i] = 'b0;
+ vs2_valid[i] = 1'b1;
+ end
+ endcase
+ end
+ // 2:1
+ {EEW16,EEW8},
+ {EEW32,EEW16}: begin
+ case(emul_vs2)
+ EMUL2: begin
+ case(inst_nf)
+ NF2: vs2_offset[i] = {1'b0, uop_index_current[i][2], uop_index_current[i][0]};
+ NF3,
+ NF4: vs2_offset[i] = {2'b0, uop_index_current[i][0]};
+ default: vs2_offset[i] = uop_index_current[i]; //NF1
+ endcase
+ vs2_valid[i] = 1'b1;
+ end
+ EMUL4: begin
+ vs2_offset[i] = (inst_nf==NF2) ? {1'b0, uop_index_current[i][2], uop_index_current[i][0]} : uop_index_current[i];
+ vs2_valid[i] = 1'b1;
+ end
+ EMUL8: begin
+ vs2_offset[i] = uop_index_current[i];
+ vs2_valid[i] = 1'b1;
+ end
+ default: begin //EMUL1
+ vs2_offset[i] = 'b0;
+ vs2_valid[i] = 1'b1;
+ end
+ endcase
+ end
+ // 4:1
+ {EEW32,EEW8}: begin
+ case(emul_vs2)
+ EMUL2: begin
+ case(inst_nf)
+ NF2: vs2_offset[i] = {1'b0, uop_index_current[i][2], uop_index_current[i][0]};
+ NF3,
+ NF4: vs2_offset[i] = {2'b0, uop_index_current[i][0]};
+ default: vs2_offset[i] = uop_index_current[i]; //NF1
+ endcase
+ vs2_valid[i] = 1'b1;
+ end
+ EMUL4: begin
+ vs2_offset[i] = (inst_nf==NF2) ? {1'b0, uop_index_current[i][1:0]} : uop_index_current[i];
+ vs2_valid[i] = 1'b1;
+ end
+ EMUL8: begin
+ vs2_offset[i] = uop_index_current[i];
+ vs2_valid[i] = 1'b1;
+ end
+ default: begin //EMUL1
+ vs2_offset[i] = 'b0;
+ vs2_valid[i] = 1'b1;
end
endcase
end
// 1:2
- {SEW_8,SEW16},
- {SEW_16,SEW32}: begin
+ {EEW8,EEW16},
+ {EEW16,EEW32}: begin
case(emul_vs2)
EMUL1: begin
- vs2_index[i] = inst_vs2;
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = 'b0;
+ vs2_valid[i] = 1'b1;
end
EMUL2: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][1];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = (inst_nf==NF2) ? {2'b0, uop_index_current[i][2]} : {2'b0, uop_index_current[i][1]};
+ vs2_valid[i] = 1'b1;
end
EMUL4: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][2:1];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = {1'b0, uop_index_current[i][2:1]};
+ vs2_valid[i] = 1'b1;
end
endcase
end
// 1:4
- {SEW_8,SEW32}: begin
+ {EEW8,EEW32}: begin
case(emul_vs2)
EMUL1: begin
- vs2_index[i] = inst_vs2;
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = 'b0;
+ vs2_valid[i] = 1'b1;
end
EMUL2: begin
- vs2_index[i] = inst_vs2+uop_index_current[i][2];
- vs2_eew[i] = eew_vs2;
- vs2_valid[i] = 1'b1;
+ vs2_offset[i] = {2'b0, uop_index_current[i][2]};
+ vs2_valid[i] = 1'b1;
end
endcase
end
@@ -3416,6 +3605,14 @@
end
end
+ // update vs2 index and eew
+ always_comb begin
+ for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2
+ vs2_index[i] = inst_vs2 + {2'b0, vs2_offset[i]};
+ vs2_eew[i] = eew_vs2;
+ end
+ end
+
// update rd_index and valid
always_comb begin
for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_RD
@@ -3450,24 +3647,39 @@
// update segment_index valid
always_comb begin
for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_SEG_INDEX
- // initial
- seg_field_index[i] = 'b0;
+ // default
+ if (inst_nf==NF2)
+ seg_field_index[i] = {1'b0,uop_index_current[i][2:1]};
+ else if (inst_nf==NF3)
+ seg_field_index[i] = (uop_index_current[i]>=4'd3) ? 'd1 : 'b0;
+ else if (inst_nf==NF4)
+ seg_field_index[i] = {2'b0,uop_index_current[i][2]};
+ else
+ seg_field_index[i] = 'b0;
- if(funct6_lsu.lsu_funct6.lsu_is_seg==IS_SEGMENT) begin
- case(inst_nf)
- NF2: begin
- case(emul_max_vd_vs2)
- EMUL2: seg_field_index[i] = {1'b0,uop_index_current[i][0]};
- EMUL4: seg_field_index[i] = uop_index_current[i][1:0];
- endcase
- end
- NF3,
- NF4: begin
- if (emul_max_vd_vs2==EMUL2)
- seg_field_index[i] = {1'b0,uop_index_current[i][0]};
- end
- endcase
- end
+ // EEW_vs2>EEW_vd for index load/store
+ case(inst_funct6[2:0])
+ UNORDERED_INDEX,
+ ORDERED_INDEX: begin
+ case({eew_vs2,eew_vd})
+ // 2:1
+ {EEW16,EEW8},
+ {EEW32,EEW16}: begin
+ case(emul_vs2)
+ EMUL2: seg_field_index[i] = {2'b0, uop_index_current[i][0]};
+ EMUL4: seg_field_index[i] = {1'b0, uop_index_current[i][2], uop_index_current[i][0]};
+ endcase
+ end
+ // 4:1
+ {EEW32,EEW8}: begin
+ case(emul_vs2)
+ EMUL2: seg_field_index[i] = {2'b0, uop_index_current[i][0]};
+ EMUL4: seg_field_index[i] = {1'b0, uop_index_current[i][1:0]};
+ endcase
+ end
+ endcase
+ end
+ endcase
end
end
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_driver.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_driver.sv
index 0e12dd4..0e50dfa 100644
--- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_driver.sv
+++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_driver.sv
@@ -8,6 +8,7 @@
class lsu_driver extends uvm_driver # (lsu_transaction);
+ parameter int MAX_SEG = 8;
typedef virtual lsu_interface v_if;
v_if lsu_if;
@@ -42,6 +43,8 @@
// receive & decode inst from rvs
extern function void write_lsu_inst(rvs_transaction inst_tr);
extern function int lsu_uop_decode(ref rvs_transaction inst_tr);
+ extern protected function void lsu_uop_gen_delay(ref lsu_transaction uop_tr);
+ extern protected function void lsu_uop_gen_trap(ref lsu_transaction uop_tr);
endclass: lsu_driver
@@ -153,17 +156,17 @@
// update address for indexed-stride from vidx_data
if(uop_tr.is_indexed == 1) begin
if(lsu_if.uop_lsu_rvv2lsu[i].vidx_valid !== 1) begin
- `uvm_fatal("LSU_DRV", "Uop is indexed but vidx_valid is not")
+ `uvm_error("LSU_DRV", "Uop is indexed but vidx_valid is not")
continue;
end else if(uop_tr.lsu_slot_addr_valid === 1) begin
`uvm_fatal("TB_ISSUE", "Decode error")
continue;
end else if(uop_tr.vidx_vreg_idx !== lsu_if.uop_lsu_rvv2lsu[i].vidx_addr) begin
- `uvm_fatal("LSU_DRV", $sformatf("vidx_addr mismatch: lsu=%0d, dut=%0d", uop_tr.vidx_vreg_idx, lsu_if.uop_lsu_rvv2lsu[i].vidx_addr))
+ `uvm_error("LSU_DRV", $sformatf("vidx_addr mismatch: lsu=%0d, dut=%0d", uop_tr.vidx_vreg_idx, lsu_if.uop_lsu_rvv2lsu[i].vidx_addr))
continue;
end else begin
`uvm_info("LSU_DRV", $sformatf("Got vreg[%0d]=0x%16x from dut.", lsu_if.uop_lsu_rvv2lsu[i].vidx_addr, lsu_if.uop_lsu_rvv2lsu[i].vidx_data), UVM_HIGH);
- for(int byte_idx=uop_tr.vidx_vreg_byte_start; byte_idx<=uop_tr.vidx_vreg_byte_end; byte_idx += uop_tr.vidx_vreg_eew/8) begin
+ for(int byte_idx=uop_tr.vidx_vreg_byte_head; byte_idx<uop_tr.vidx_vreg_byte_tail; byte_idx += uop_tr.vidx_vreg_eew/8) begin
case(uop_tr.vidx_vreg_eew)
// For indexed-stride, the stride from vrf should be zero-extended to `XLEN.
EEW8 : stride_temp = $unsigned(lsu_if.uop_lsu_rvv2lsu[i].vidx_data[byte_idx*8 +: 8 ]);
@@ -173,7 +176,7 @@
indexed_stride.push_back(stride_temp);
`uvm_info("LSU_DRV", $sformatf("byte[%0d]: push stride=0x%8x to indexed_stride(size: %0d).", byte_idx, stride_temp, indexed_stride.size()), UVM_HIGH)
end
- for(int byte_idx=uop_tr.data_vreg_byte_start; byte_idx<=uop_tr.data_vreg_byte_end; byte_idx++) begin
+ for(int byte_idx=uop_tr.data_vreg_byte_head; byte_idx<uop_tr.data_vreg_byte_tail; byte_idx++) begin
if(byte_idx % (uop_tr.data_vreg_eew/8) == 0) begin
stride_temp = indexed_stride.pop_front();
`uvm_info("LSU_DRV", $sformatf("byte[%0d]: pop stride=0x%8x from indexed_stride(size: %0d).", byte_idx, stride_temp, indexed_stride.size()), UVM_HIGH)
@@ -198,7 +201,7 @@
`uvm_fatal("TB_ISSUE", $sformatf("vregfile_read_addr mismatch: lsu=%0d, dut=%0d", uop_tr.data_vreg_idx, lsu_if.uop_lsu_rvv2lsu[i].vregfile_read_addr))
continue;
end else begin
- for(int byte_idx=uop_tr.data_vreg_byte_start; byte_idx<=uop_tr.data_vreg_byte_end; byte_idx++) begin
+ for(int byte_idx=uop_tr.data_vreg_byte_head; byte_idx<uop_tr.data_vreg_byte_tail; byte_idx++) begin
uop_tr.lsu_slot_data[byte_idx] = lsu_if.uop_lsu_rvv2lsu[i].vregfile_read_data[byte_idx*8 +: 8];
end
uop_tr.lsu_slot_data_valid = 1;
@@ -211,6 +214,7 @@
`uvm_fatal("LSU_DRV", "Uops need v0_data but v0_valid is 0")
continue;
end else begin
+ `uvm_info("LSU_DRV", $sformatf("uop_pc:0x%8x, v0_data=0x%016x", uop_tr.uop_pc, lsu_if.uop_lsu_rvv2lsu[i].v0_data), UVM_HIGH)
uop_tr.lsu_slot_strobe = lsu_if.uop_lsu_rvv2lsu[i].v0_data;
uop_tr.lsu_slot_addr_valid = 1;
end
@@ -286,8 +290,7 @@
`uvm_fatal("TB_ISSUE", "LSU decode err.")
break;
end else if(uops_tx_queue[uop_idx].uop_done == 0) begin
- // for(int byte_idx=0; byte_idx<`VLENB; byte_idx++) begin
- for(int byte_idx=uops_tx_queue[uop_idx].data_vreg_byte_start; byte_idx<=uops_tx_queue[uop_idx].data_vreg_byte_end; byte_idx++) begin
+ for(int byte_idx=uops_tx_queue[uop_idx].data_vreg_byte_head; byte_idx<uops_tx_queue[uop_idx].data_vreg_byte_tail; byte_idx++) begin
if(uops_tx_queue[uop_idx].lsu_slot_strobe[byte_idx] === 1'b1) begin
mem.pc = uops_tx_queue[uop_idx].uop_pc;
mem.load_byte(data_temp, uops_tx_queue[uop_idx].lsu_slot_addr[byte_idx]);
@@ -308,8 +311,7 @@
`uvm_fatal("TB_ISSUE", "LSU decode err.")
break;
end else if(uops_tx_queue[uop_idx].uop_done == 0) begin
- // for(int byte_idx=0; byte_idx<`VLENB; byte_idx++) begin
- for(int byte_idx=uops_tx_queue[uop_idx].data_vreg_byte_start; byte_idx<=uops_tx_queue[uop_idx].data_vreg_byte_end; byte_idx++) begin
+ for(int byte_idx=uops_tx_queue[uop_idx].data_vreg_byte_head; byte_idx<uops_tx_queue[uop_idx].data_vreg_byte_tail; byte_idx++) begin
if(uops_tx_queue[uop_idx].lsu_slot_strobe[byte_idx] === 1'b1) begin
data_temp = uops_tx_queue[uop_idx].lsu_slot_data[byte_idx];
mem.pc = uops_tx_queue[uop_idx].uop_pc;
@@ -432,17 +434,14 @@
endfunction
function int lsu_driver::lsu_uop_decode(ref rvs_transaction inst_tr);
- lsu_transaction uop_tr;
+ lsu_transaction uop_tr; // segment max is 3
// vtype decode
- int sew;
- int lsu_eew;
- real lmul;
int elm_idx_max;
int lsu_nf;
- int seg_idx_max;
+ int seg_num;
int evl;
int vstart;
- int uop_vstart;
+ int uop_vstart[MAX_SEG];
int data_eew;
real data_emul;
@@ -452,15 +451,19 @@
real emul_max;
// uop info
- int uops_cnt;
int uops_num;
- int data_byte_idx;
- int vidx_byte_idx;
- int temp_idx;
int data_vreg_idx_base;
int vidx_vreg_idx_base;
int data_vreg_idx_last;
int vidx_vreg_idx_last;
+ int elm_per_uop;
+ int elm_idx_head[MAX_SEG]; // elm pointer
+ int elm_idx_tail[MAX_SEG]; // elm pointer
+ int data_byte_idx[MAX_SEG];
+ int vidx_byte_idx[MAX_SEG];
+ int seg_idx;
+ int switch_seg;
+ int seg_switch_gap; // num of bytes per data vreg
// load/store addres info
int addr;
@@ -468,87 +471,66 @@
// Decode ----------------------------------------------------------------------
`uvm_info("LSU_DRV","Start decode vtype",UVM_HIGH)
- sew = 8 << inst_tr.vsew;
- lsu_eew = inst_tr.lsu_eew;
- lmul = 2.0 ** $signed(inst_tr.vlmul);
- addr_base = inst_tr.rs1_data;
- lsu_nf = inst_tr.lsu_nf;
- vstart = inst_tr.vstart;
+ addr_base = inst_tr.rs1_data;
+ evl = inst_tr.evl;
+ vstart = inst_tr.vstart;
+ lsu_nf = inst_tr.lsu_nf;
+ eew_max = inst_tr.eew_max;
+ emul_max = inst_tr.emul_max;
+
+ elm_idx_max = inst_tr.elm_idx_max;
+ seg_num = inst_tr.seg_num;
+
+ uops_num = int'($ceil(emul_max)) * (seg_num);
+ elm_per_uop = `VLEN / eew_max;
+
+ case(inst_tr.inst_type)
+ LD: begin
+ data_eew = inst_tr.dest_eew;
+ vidx_eew = inst_tr.src2_eew;
+ data_emul = inst_tr.dest_emul;
+ vidx_emul = inst_tr.src2_emul;
+ end
+ ST: begin
+ data_eew = inst_tr.src3_eew;
+ vidx_eew = inst_tr.src2_eew;
+ data_emul = inst_tr.src3_emul;
+ vidx_emul = inst_tr.src2_emul;
+ end
+ endcase
case(inst_tr.lsu_mop)
LSU_US : begin
case(inst_tr.lsu_umop)
MASK: begin
- data_eew = EEW8;
- data_emul = EMUL1;
- vidx_eew = EEW32;
- vidx_emul = EMUL1;
- eew_max = EEW8;
- emul_max = EMUL1;
const_stride = (lsu_nf+1) * data_eew/8;
- seg_idx_max = lsu_nf + 1;
- evl = int'($ceil(inst_tr.vl / 8.0));
end
WHOLE_REG: begin
- data_eew = lsu_eew;
- data_emul = lsu_nf + 1;
- vidx_eew = EEW32;
- vidx_emul = EMUL1;
- eew_max = lsu_eew;
- emul_max = data_emul;
const_stride = data_eew/8;
- seg_idx_max = 1;
- evl = data_emul * `VLEN / data_eew;
end
default: begin
- data_eew = lsu_eew;
- data_emul = data_eew * lmul / sew;
- vidx_eew = EEW32;
- vidx_emul = EMUL1;
- eew_max = lsu_eew;
- emul_max = eew_max * lmul / sew;
const_stride = (lsu_nf+1) * data_eew/8;
- seg_idx_max = lsu_nf + 1;
- evl = inst_tr.vl;
end
endcase
end
LSU_CS : begin
- data_eew = lsu_eew;
- data_emul = data_eew * lmul / sew;
- vidx_eew = EEW32;
- vidx_emul = EMUL1;
- eew_max = lsu_eew;
- emul_max = eew_max * lmul / sew;
const_stride = inst_tr.rs2_data;
- seg_idx_max = lsu_nf + 1;
- evl = inst_tr.vl;
end
LSU_UI,
LSU_OI: begin
- data_eew = sew;
- data_emul = data_eew * lmul / sew;
- vidx_eew = lsu_eew;
- vidx_emul = vidx_eew * lmul / sew;
- eew_max = (data_eew > vidx_eew) ? data_eew : vidx_eew;
- emul_max = eew_max * lmul / sew;
const_stride = 0;
- seg_idx_max = lsu_nf + 1;
- evl = inst_tr.vl;
end
endcase
- uops_num = int'($ceil(emul_max)) * (seg_idx_max);
- elm_idx_max = int'($ceil(emul_max)) * `VLEN / eew_max;
if(inst_tr.inst_type == LD) begin
data_vreg_idx_base = inst_tr.dest_idx;
- data_vreg_idx_last = inst_tr.dest_idx + (seg_idx_max) * int'($ceil(data_emul)) - 1;
+ data_vreg_idx_last = inst_tr.dest_idx + (seg_num) * int'($ceil(data_emul)) - 1;
vidx_vreg_idx_base = inst_tr.src2_idx;
vidx_vreg_idx_last = inst_tr.src2_idx + int'($ceil(vidx_emul)) - 1;
end else if(inst_tr.inst_type == ST) begin
data_vreg_idx_base = inst_tr.src3_idx;
- data_vreg_idx_last = inst_tr.src3_idx + (seg_idx_max) * int'($ceil(data_emul)) - 1;
+ data_vreg_idx_last = inst_tr.src3_idx + (seg_num) * int'($ceil(data_emul)) - 1;
vidx_vreg_idx_base = inst_tr.src2_idx;
vidx_vreg_idx_last = inst_tr.src2_idx + int'($ceil(vidx_emul)) - 1;
end else begin
@@ -557,164 +539,198 @@
`uvm_info("LSU_DRV", $sformatf("eew_max=%0d, emul_max=%.2f, elm_idx_max=%0d", eew_max, emul_max, elm_idx_max), UVM_HIGH)
// Uops Gen --------------------------------------------------------------------
- `uvm_info("LSU_DRV","Start gen uops",UVM_HIGH)
- uops_cnt = 0;
- for(int seg_idx=0; seg_idx<seg_idx_max; seg_idx++) begin
- uop_vstart = inst_tr.vstart;
- data_byte_idx = vstart * data_eew / 8;
- vidx_byte_idx = vstart * vidx_eew / 8;
- for(int elm_idx=0; elm_idx<elm_idx_max; elm_idx++) begin
-
- `uvm_info("LSU_DRV",$sformatf("seg_idx=%0d, elm_idx=%0d", seg_idx, elm_idx),UVM_HIGH)
- if(elm_idx * eew_max % `VLEN == 0) begin
- `uvm_info("LSU_DRV","Gen new uop",UVM_HIGH)
- uop_tr = new();
- // Gen delay
- case(delay_mode_rvv2lsu)
- delay_mode_pkg::SLOW: begin
- uop_tr.c_rvv2lsu_delay.constraint_mode(0);
- assert(uop_tr.randomize(rvv2lsu_delay) with {
- rvv2lsu_delay dist {
- [1:50] :/ 20,
- [50:100] :/ 80
- };
- });
- end
- delay_mode_pkg::NORMAL: begin
- assert(uop_tr.randomize(rvv2lsu_delay) with {
- rvv2lsu_delay dist {
- [0:10] :/ 50,
- [10:20] :/ 30,
- [20:50] :/ 20
- };
- });
- end
- delay_mode_pkg::FAST: begin
- assert(uop_tr.randomize(rvv2lsu_delay) with {
- rvv2lsu_delay dist {
- 0 := 80,
- [1:5] :/ 15,
- [5:20] :/ 5
- };
- });
- end
- endcase
- case(delay_mode_lsu2rvv)
- delay_mode_pkg::SLOW: begin
- uop_tr.c_lsu2rvv_delay.constraint_mode(0);
- assert(uop_tr.randomize(lsu2rvv_delay) with {
- lsu2rvv_delay dist {
- [1:50] :/ 20,
- [50:100] :/ 80
- };
- });
- end
- delay_mode_pkg::NORMAL: begin
- assert(uop_tr.randomize(lsu2rvv_delay) with {
- lsu2rvv_delay dist {
- [0:10] :/ 50,
- [10:20] :/ 30,
- [20:50] :/ 20
- };
- });
- end
- delay_mode_pkg::FAST: begin
- assert(uop_tr.randomize(lsu2rvv_delay) with {
- lsu2rvv_delay dist {
- 0 := 80,
- [1:5] :/ 15,
- [5:20] :/ 5
- };
- });
- end
- endcase
- // Gen trap
- if(trap_en) begin
- if(always_trap) begin
- assert(uop_tr.randomize(trap_occured) with {
- trap_occured == 1;
- });
- end else begin
- assert(uop_tr.randomize(trap_occured) with {
- trap_occured dist {
- // 0 := 99,
- 0 := 9,
- 1 := 1
- };
- });
- end
- end else begin
- assert(uop_tr.randomize(trap_occured) with {
- trap_occured == 0;
- });
- end
- uops_cnt++;
- uop_tr.inst_string = inst_tr.asm_string;
- if(inst_tr.inst_type == LD) begin
- uop_tr.kind = lsu_transaction::LOAD;
- end else if(inst_tr.inst_type == ST) begin
- uop_tr.kind = lsu_transaction::STORE;
- end else begin
- `uvm_fatal("TB_ISSUE", "Decode inst_tr which is not load/store in lsu_driver.")
- end
- uop_tr.uop_pc = inst_tr.pc;
- uop_tr.uop_index = uops_cnt-1;
+ `uvm_info("LSU_DRV","Start gen uops",UVM_HIGH)
+ if(data_emul < 1) begin
+ seg_switch_gap = data_emul * `VLENB;
+ end else begin
+ seg_switch_gap = `VLENB;
+ end
- uop_tr.is_last_uop = (uops_cnt == uops_num) ? 1: 0;
- uop_tr.is_indexed = (inst_tr.lsu_mop inside {LSU_UI, LSU_OI}) ? 1 : 0;
- uop_tr.total_uops_num = uops_num;
- uop_tr.base_addr = addr_base;
- uop_tr.vstart = uop_vstart;
+ for(int seg_idx=0; seg_idx<seg_num; seg_idx++) begin
+ elm_idx_head[seg_idx] = 0;
+ elm_idx_tail[seg_idx] = elm_per_uop;
+ data_byte_idx[seg_idx] = 0;
+ vidx_byte_idx[seg_idx] = 0;
+ uop_vstart[seg_idx] = vstart;
+ end
+ seg_idx = 0;
+ for(int uops_idx=0; uops_idx<uops_num; uops_idx++) begin
+ `uvm_info("LSU_DRV","Gen new uop",UVM_HIGH)
+ uop_tr = new();
+ `uvm_info("LSU_DRV", $sformatf("seg_idx = %0d\n", seg_idx ), UVM_HIGH)
+ `uvm_info("LSU_DRV", $sformatf("seg_switch_gap = %0d\n", seg_switch_gap), UVM_HIGH)
+ `uvm_info("LSU_DRV", $sformatf("elm_idx_head[%0d] = %0d\n", seg_idx, elm_idx_head[seg_idx]), UVM_HIGH)
+ `uvm_info("LSU_DRV", $sformatf("elm_idx_tail[%0d] = %0d\n", seg_idx, elm_idx_tail[seg_idx]), UVM_HIGH)
- uop_tr.vm = inst_tr.vm;
- uop_tr.lsu_slot_strobe = '0;
-
- uop_tr.data_vreg_valid = 1;
- uop_tr.data_vreg_idx = data_vreg_idx_base + elm_idx * (data_eew/8) / `VLENB + seg_idx * int'($ceil(data_emul));
- uop_tr.data_vreg_eew = data_eew;
- uop_tr.data_vreg_byte_start = data_byte_idx % `VLENB;
+ lsu_uop_gen_delay(uop_tr);
+ lsu_uop_gen_trap(uop_tr);
- uop_tr.vidx_vreg_valid = (inst_tr.lsu_mop inside {LSU_UI, LSU_OI}) ? 1 : 0;
- uop_tr.vidx_vreg_idx = vidx_vreg_idx_base + elm_idx * (vidx_eew/8) / `VLENB;
- uop_tr.vidx_vreg_eew = vidx_eew;
- uop_tr.vidx_vreg_byte_start = vidx_byte_idx % `VLENB;
+ uop_tr.inst_string = inst_tr.asm_string;
+ if(inst_tr.inst_type == LD) begin
+ uop_tr.kind = lsu_transaction::LOAD;
+ end else if(inst_tr.inst_type == ST) begin
+ uop_tr.kind = lsu_transaction::STORE;
+ end else begin
+ `uvm_fatal("TB_ISSUE", "Decode inst_tr which is not load/store in lsu_driver.")
+ end
+ uop_tr.uop_pc = inst_tr.pc;
+ uop_tr.uop_index = uops_idx;
+
+ uop_tr.is_last_uop = (uops_idx == uops_num-1) ? 1: 0;
+ uop_tr.is_indexed = (inst_tr.lsu_mop inside {LSU_UI, LSU_OI}) ? 1 : 0;
+ uop_tr.total_uops_num = uops_num;
+ uop_tr.base_addr = addr_base;
+ uop_tr.vstart = uop_vstart[seg_idx];
+
+ uop_tr.vm = inst_tr.vm;
+ uop_tr.lsu_slot_strobe = '0;
+
+ uop_tr.data_vreg_valid = 1;
+ uop_tr.data_vreg_idx = data_vreg_idx_base + elm_idx_head[seg_idx] * (data_eew/8) / `VLENB + seg_idx * int'($ceil(data_emul));
+ uop_tr.data_vreg_eew = data_eew;
+ uop_tr.data_vreg_byte_head = data_byte_idx[seg_idx];
+
+ uop_tr.vidx_vreg_valid = (inst_tr.lsu_mop inside {LSU_UI, LSU_OI}) ? 1 : 0;
+ uop_tr.vidx_vreg_idx = vidx_vreg_idx_base + elm_idx_head[seg_idx] * (vidx_eew/8) / `VLENB;
+ uop_tr.vidx_vreg_eew = vidx_eew;
+ uop_tr.vidx_vreg_byte_head = vidx_byte_idx[seg_idx];
+
+ for(int elm_idx=elm_idx_head[seg_idx]; elm_idx<elm_idx_tail[seg_idx]; elm_idx++) begin
+ if(elm_idx == vstart) begin
+ uop_tr.data_vreg_byte_head = vstart * data_eew / 8 %`VLENB;
+ uop_tr.vidx_vreg_byte_head = vstart * vidx_eew / 8 %`VLENB;
end
-
if(elm_idx >= vstart && elm_idx < evl) begin
for(int byte_idx=0; byte_idx<data_eew/8; byte_idx++) begin
addr = addr_base + const_stride * elm_idx + data_eew / 8 * seg_idx + byte_idx;
- temp_idx = data_byte_idx % `VLENB;
- uop_tr.lsu_slot_addr[temp_idx] = addr;
- uop_tr.lsu_slot_strobe[temp_idx] = 1'b1;
- data_byte_idx++;
- // `uvm_info("LSU_DRV",$sformatf("addr=%0x, data_byte_idx = %0d, temp_idx=%0d", addr, data_byte_idx, temp_idx),UVM_HIGH)
- // uop_tr.print();
+ uop_tr.lsu_slot_addr[data_byte_idx[seg_idx]] = addr;
+ uop_tr.lsu_slot_strobe[data_byte_idx[seg_idx]] = 1'b1;
+ data_byte_idx[seg_idx]++;
end
- vidx_byte_idx += vidx_eew/8;
+ vidx_byte_idx[seg_idx] += vidx_eew/8;
+ end else begin
+ data_byte_idx[seg_idx] += data_eew/8;
+ vidx_byte_idx[seg_idx] += vidx_eew/8;
end
if(elm_idx >= vstart) begin
- uop_vstart++;
+ uop_vstart[seg_idx]++;
end
+ end // elm-loop
- if(elm_idx * eew_max % `VLEN == `VLEN - eew_max) begin
- if(elm_idx >= vstart && elm_idx < evl) begin
- uop_tr.data_vreg_byte_end = (data_byte_idx-1) % `VLENB;
- uop_tr.vidx_vreg_byte_end = (vidx_byte_idx-1) % `VLENB;
- end else begin
- uop_tr.data_vreg_byte_end = (data_byte_idx) % `VLENB;
- uop_tr.vidx_vreg_byte_end = (vidx_byte_idx) % `VLENB;
- end
- if(inst_tr.lsu_mop inside {LSU_US, LSU_CS} && inst_tr.vm == 1) begin
- uop_tr.lsu_slot_addr_valid = 1;
- end
- `uvm_info("LSU_DRV",$sformatf("Decode uop_tr to uops_rx_queque:\n%s",uop_tr.sprint()),UVM_HIGH)
- uops_rx_queue.push_back(uop_tr);
+ uop_tr.data_vreg_byte_tail = data_byte_idx[seg_idx];
+ uop_tr.vidx_vreg_byte_tail = vidx_byte_idx[seg_idx];
+
+ if(inst_tr.lsu_mop inside {LSU_US, LSU_CS} && inst_tr.vm == 1) begin
+ uop_tr.lsu_slot_addr_valid = 1;
+ end
+ `uvm_info("LSU_DRV",$sformatf("Decode uop_tr to uops_rx_queque:\n%s",uop_tr.sprint()),UVM_HIGH)
+ uops_rx_queue.push_back(uop_tr);
+
+ `uvm_info("LSU_DRV", $sformatf("data_byte_idx[%0d] = %0d\n", seg_idx, data_byte_idx[seg_idx]), UVM_HIGH)
+
+ switch_seg = data_byte_idx[seg_idx] >= seg_switch_gap;
+
+ data_byte_idx[seg_idx] = (data_byte_idx[seg_idx] % `VLENB);
+ vidx_byte_idx[seg_idx] = (vidx_byte_idx[seg_idx] % `VLENB);
+
+ elm_idx_head[seg_idx] += elm_per_uop;
+ elm_idx_tail[seg_idx] += elm_per_uop;
+
+ if(switch_seg) begin
+ if(seg_idx == seg_num-1) begin
+ seg_idx = 0;
+ end else begin
+ seg_idx += 1;
end
end
- end
+ end // uop-loop
`uvm_info("LSU_DRV","Decode done",UVM_HIGH)
endfunction: lsu_uop_decode
+function void lsu_driver::lsu_uop_gen_delay(ref lsu_transaction uop_tr); // Gen delay
+ case(delay_mode_rvv2lsu)
+ delay_mode_pkg::SLOW: begin
+ uop_tr.c_rvv2lsu_delay.constraint_mode(0);
+ assert(uop_tr.randomize(rvv2lsu_delay) with {
+ rvv2lsu_delay dist {
+ [1:50] :/ 20,
+ [50:100] :/ 80
+ };
+ });
+ end
+ delay_mode_pkg::NORMAL: begin
+ assert(uop_tr.randomize(rvv2lsu_delay) with {
+ rvv2lsu_delay dist {
+ [0:10] :/ 50,
+ [10:20] :/ 30,
+ [20:50] :/ 20
+ };
+ });
+ end
+ delay_mode_pkg::FAST: begin
+ assert(uop_tr.randomize(rvv2lsu_delay) with {
+ rvv2lsu_delay dist {
+ 0 := 80,
+ [1:5] :/ 15,
+ [5:20] :/ 5
+ };
+ });
+ end
+ endcase
+ case(delay_mode_lsu2rvv)
+ delay_mode_pkg::SLOW: begin
+ uop_tr.c_lsu2rvv_delay.constraint_mode(0);
+ assert(uop_tr.randomize(lsu2rvv_delay) with {
+ lsu2rvv_delay dist {
+ [1:50] :/ 20,
+ [50:100] :/ 80
+ };
+ });
+ end
+ delay_mode_pkg::NORMAL: begin
+ assert(uop_tr.randomize(lsu2rvv_delay) with {
+ lsu2rvv_delay dist {
+ [0:10] :/ 50,
+ [10:20] :/ 30,
+ [20:50] :/ 20
+ };
+ });
+ end
+ delay_mode_pkg::FAST: begin
+ assert(uop_tr.randomize(lsu2rvv_delay) with {
+ lsu2rvv_delay dist {
+ 0 := 80,
+ [1:5] :/ 15,
+ [5:20] :/ 5
+ };
+ });
+ end
+ endcase
+endfunction: lsu_uop_gen_delay
+
+function void lsu_driver::lsu_uop_gen_trap(ref lsu_transaction uop_tr);
+ // Gen trap
+ if(trap_en) begin
+ if(always_trap) begin
+ assert(uop_tr.randomize(trap_occured) with {
+ trap_occured == 1;
+ });
+ end else begin
+ assert(uop_tr.randomize(trap_occured) with {
+ trap_occured dist {
+ // 0 := 99,
+ 0 := 9,
+ 1 := 1
+ };
+ });
+ end
+ end else begin
+ assert(uop_tr.randomize(trap_occured) with {
+ trap_occured == 0;
+ });
+ end
+endfunction: lsu_uop_gen_trap
+
function void lsu_driver::final_phase(uvm_phase phase);
super.final_phase(phase);
if(inst_queue.size()>0) begin
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_transaction.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_transaction.sv
index 783c12f..5e44515 100644
--- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_transaction.sv
+++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_transaction.sv
@@ -25,15 +25,15 @@
bit data_vreg_valid;
int data_vreg_idx;
eew_e data_vreg_eew;
- int data_vreg_byte_start;
- int data_vreg_byte_end;
+ int data_vreg_byte_head;
+ int data_vreg_byte_tail;
// vs2
bit vidx_vreg_valid;
int vidx_vreg_idx;
eew_e vidx_vreg_eew;
- int vidx_vreg_byte_start;
- int vidx_vreg_byte_end;
+ int vidx_vreg_byte_head;
+ int vidx_vreg_byte_tail;
/* info about load/store address/data */
bit lsu_slot_addr_valid;
@@ -82,15 +82,15 @@
`uvm_field_int(data_vreg_idx,UVM_ALL_ON)
`uvm_field_enum(eew_e,data_vreg_eew,UVM_ALL_ON)
- `uvm_field_int(data_vreg_byte_start,UVM_ALL_ON)
- `uvm_field_int(data_vreg_byte_end ,UVM_ALL_ON)
+ `uvm_field_int(data_vreg_byte_head,UVM_ALL_ON)
+ `uvm_field_int(data_vreg_byte_tail,UVM_ALL_ON)
`uvm_field_int(data_vreg_valid,UVM_ALL_ON)
if(is_indexed) begin
`uvm_field_int(vidx_vreg_idx ,UVM_ALL_ON)
`uvm_field_enum(eew_e, vidx_vreg_eew,UVM_ALL_ON)
- `uvm_field_int(vidx_vreg_byte_start,UVM_ALL_ON)
- `uvm_field_int(vidx_vreg_byte_end ,UVM_ALL_ON)
+ `uvm_field_int(vidx_vreg_byte_head,UVM_ALL_ON)
+ `uvm_field_int(vidx_vreg_byte_tail,UVM_ALL_ON)
`uvm_field_int(vidx_vreg_valid,UVM_ALL_ON)
end
`uvm_field_int(lsu_slot_addr_valid, UVM_ALL_ON)
@@ -125,14 +125,14 @@
data_vreg_valid = 0;
data_vreg_idx = 0;
data_vreg_eew = EEW_NONE;
- data_vreg_byte_start = 0;
- data_vreg_byte_end = 0;
+ data_vreg_byte_head = 0;
+ data_vreg_byte_tail = 0;
vidx_vreg_valid = 0;
vidx_vreg_idx = 0;
vidx_vreg_eew = EEW_NONE;
- vidx_vreg_byte_start = 0;
- vidx_vreg_byte_end = 0;
+ vidx_vreg_byte_head = 0;
+ vidx_vreg_byte_tail = 0;
lsu_slot_addr_valid = 1'b0;
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_monitor.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_monitor.sv
index 71627c3..91c006e 100644
--- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_monitor.sv
+++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_monitor.sv
@@ -245,23 +245,32 @@
// VRF
if(rvs_if.rt_vrf_valid_rob2rt[rt_idx]) begin
+ int pos = 0;
vrf_overlap = 0;
rt_vrf_byte_strobe = rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_strobe;
for(int i=0; i<`VLENB; i++) begin
rt_vrf_bit_strobe[i*8 +: 8] = {8{rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_strobe[i]}};
end
foreach(tr.rt_vrf_index[i]) begin
+ // merge same vrf
if(tr.rt_vrf_index[i] == rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_index) begin
tr.rt_vrf_strobe[i] |= rt_vrf_byte_strobe;
tr.rt_vrf_data[i] = rt_vrf_bit_strobe & rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_data | ~rt_vrf_bit_strobe & tr.rt_vrf_data[i];
vrf_overlap = 1;
`uvm_info(get_type_name(), $sformatf("Uops %0d also write vrf[%0d].", rt_idx, rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_index), UVM_HIGH)
end
+ // sort vrf
+ if(tr.rt_vrf_index[i] > rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_index) begin
+ pos = i;
+ break;
+ end else begin
+ pos = i+1;
+ end
end
if(!vrf_overlap) begin
- tr.rt_vrf_index.push_back(rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_index);
- tr.rt_vrf_strobe.push_back(rt_vrf_byte_strobe);
- tr.rt_vrf_data.push_back(rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_data);
+ tr.rt_vrf_index.insert(pos, rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_index);
+ tr.rt_vrf_strobe.insert(pos, rt_vrf_byte_strobe);
+ tr.rt_vrf_data.insert(pos, rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_data);
end
end
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_transaction.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_transaction.sv
index 7286bb1..49a77a1 100644
--- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_transaction.sv
+++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_transaction.sv
@@ -2728,7 +2728,7 @@
if(this.lsu_nf == NF1) begin
inst = $sformatf("%se%0d", inst, lsu_eew);
end else begin
- inst = $sformatf("%s%0de%0d", inst, lsu_nf+1, lsu_eew);
+ inst = $sformatf("%sseg%0de%0d", inst, lsu_nf+1, lsu_eew);
end
end
endcase
@@ -2737,7 +2737,7 @@
if(this.lsu_nf == NF1) begin
inst = $sformatf("%se%0d", inst, lsu_eew);
end else begin
- inst = $sformatf("%s%0de%0d", inst, lsu_nf+1, lsu_eew);
+ inst = $sformatf("%sseg%0de%0d", inst, lsu_nf+1, lsu_eew);
end
end
LSU_UI,
@@ -2745,7 +2745,7 @@
if(this.lsu_nf == NF1) begin
inst = $sformatf("%sei%0d", inst, lsu_eew);
end else begin
- inst = $sformatf("%s%0dei%0d", inst, lsu_nf+1, lsu_eew);
+ inst = $sformatf("%sseg%0dei%0d", inst, lsu_nf+1, lsu_eew);
end
end
endcase
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_behavior_model.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_behavior_model.sv
index 4337683..5c87f1a 100644
--- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_behavior_model.sv
+++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_behavior_model.sv
@@ -413,6 +413,7 @@
end
`uvm_info("MDL",$sformatf("Prepare done!\nelm_idx_max=%0d\ndest_eew=%0d\nsrc2_eew=%0d\nsrc1_eew=%0d\ndest_emul=%2.4f\nsrc2_emul=%2.4f\nsrc1_emul=%2.4f\n",elm_idx_max,dest_eew,src2_eew,src1_eew,dest_emul,src2_emul,src1_emul),UVM_LOW)
+ `uvm_info("MDL",$sformatf("pc = 0x%8x, v0 = 0x%16x\n", inst_tr.pc, this.vrf[0]),UVM_LOW)
// 2.2 Check VRF index
dest_reg_idx_base = inst_tr.dest_idx_base;
@@ -1773,6 +1774,7 @@
//------------------------------------------------------------------------------
class lsu_processor extends uvm_component;
+ parameter int MAX_SEG = 8;
`uvm_component_utils(lsu_processor)
int dest_eew; real dest_emul;
@@ -1781,6 +1783,9 @@
int src1_eew; real src1_emul;
int src0_eew; real src0_emul;
+ int data_eew; real data_emul;
+ int vidx_eew; real vidx_emul;
+
vrf_t [31:0] vrf_temp;
int dest_reg_idx_base = 0;
@@ -1800,11 +1805,16 @@
int address;
int elm_idx_max;
+ int seg_idx;
int seg_num;
int seg_size; // byte size
int data_size; // byte size
int vidx_size; // byte size
+ int elm_idx_head[MAX_SEG]; // elm pointer
+ int elm_idx_tail[MAX_SEG]; // elm pointer
+ int data_byte_idx[MAX_SEG];
+ int vidx_byte_idx[MAX_SEG];
int uops_num;
int elm_per_uop;
@@ -1815,12 +1825,28 @@
endfunction: new
function void exe(rvv_behavior_model rvm, ref rvs_transaction inst_tr);
- int uops_cnt = 0;
+ int seg_switch_gap = 0;
+ int switch_seg = 0;
+
decode(inst_tr);
`uvm_info("MDL/LSU", "LSU decode done", UVM_HIGH)
`uvm_info("MDL/LSU", $sformatf("\n%s", inst_tr.sprint()), UVM_HIGH);
+ if(data_emul < 1) begin
+ seg_switch_gap = data_emul * `VLENB;
+ end else begin
+ seg_switch_gap = `VLENB;
+ end
+
for(int seg_idx=0; seg_idx<seg_num; seg_idx++) begin
+ elm_idx_head[seg_idx] = 0;
+ elm_idx_tail[seg_idx] = elm_per_uop;
+ data_byte_idx[seg_idx] = 0;
+ vidx_byte_idx[seg_idx] = 0;
+ end
+
+ seg_idx = 0;
+ for(int uops_idx=0; uops_idx<uops_num; uops_idx++) begin
dest_reg_idx_base = (inst_tr.dest_type == VRF) ? (inst_tr.dest_idx + seg_idx * int'($ceil(dest_emul))) : (inst_tr.dest_idx);
src3_reg_idx_base = (inst_tr.src3_type == VRF) ? (inst_tr.src3_idx + seg_idx * int'($ceil(src3_emul))) : (inst_tr.src3_idx);
src2_reg_idx_base = (inst_tr.src2_idx);
@@ -1829,7 +1855,13 @@
`uvm_info("MDL/LSU", $sformatf("seg_idx=%0d: dest_reg_idx_base=%0d, src3_reg_idx_base=%0d, src2_reg_idx_base=%0d, src1_reg_idx_base=%0d",
seg_idx, dest_reg_idx_base, src3_reg_idx_base, src2_reg_idx_base, src1_reg_idx_base), UVM_HIGH)
`uvm_info("MDL/LSU", $sformatf("vreg[0]=0x%16h", rvm.vrf[0]), UVM_HIGH)
- for(int elm_idx=0; elm_idx<elm_idx_max; elm_idx++) begin
+
+ `uvm_info("MDL/LSU", $sformatf("seg_idx = %0d\n", seg_idx ), UVM_HIGH)
+ `uvm_info("MDL/LSU", $sformatf("seg_switch_gap = %0d\n", seg_switch_gap), UVM_HIGH)
+ `uvm_info("MDL/LSU", $sformatf("elm_idx_head[%0d] = %0d\n", seg_idx, elm_idx_head[seg_idx]), UVM_HIGH)
+ `uvm_info("MDL/LSU", $sformatf("elm_idx_tail[%0d] = %0d\n", seg_idx, elm_idx_tail[seg_idx]), UVM_HIGH)
+
+ for(int elm_idx=elm_idx_head[seg_idx]; elm_idx<elm_idx_tail[seg_idx]; elm_idx++) begin
// fetch
dest = rvm.elm_fetch(inst_tr.dest_type, dest_reg_idx_base, elm_idx, dest_eew);
src3 = rvm.elm_fetch(inst_tr.src3_type, src3_reg_idx_base, elm_idx, src3_eew);
@@ -1841,7 +1873,8 @@
`uvm_info("MDL/LSU", $sformatf("dest=0x%8x, src3=0x%8x, src2=0x%8x, src1=0x%8x, src0=0x%8x", dest, src3, src2, src1, src0), UVM_HIGH);
update_addr(inst_tr, seg_idx, seg_size, elm_idx, data_size, src2, src1);
- if(rvm.trap_occured && uops_cnt<rvm.trap_occured_uop || !rvm.trap_occured) begin
+
+ if(rvm.trap_occured && uops_idx<rvm.trap_occured_uop || !rvm.trap_occured) begin
if(elm_idx<vstart) begin
// pre-start
case(inst_tr.inst_type)
@@ -1884,10 +1917,28 @@
else
rvm.vstart = rvm.trap_queue[0].vstart;
end
- if(elm_idx%elm_per_uop == elm_per_uop-1) uops_cnt++;
+
+ data_byte_idx[seg_idx] += data_eew/8;
+ vidx_byte_idx[seg_idx] += vidx_eew/8;
`uvm_info("MDL/LSU", "\n---------------------------------------------------------------------------------------------------------------------------------\n", UVM_HIGH)
+ end // elm-loop
+
+ switch_seg = data_byte_idx[seg_idx] >= seg_switch_gap;
+
+ data_byte_idx[seg_idx] = (data_byte_idx[seg_idx] % `VLENB);
+ vidx_byte_idx[seg_idx] = (vidx_byte_idx[seg_idx] % `VLENB);
+
+ elm_idx_head[seg_idx] += elm_per_uop;
+ elm_idx_tail[seg_idx] += elm_per_uop;
+
+ if(switch_seg) begin
+ if(seg_idx == seg_num-1) begin
+ seg_idx = 0;
+ end else begin
+ seg_idx += 1;
+ end
end
- end // seg-loop
+ end // uops-loop
endfunction
function bit decode(ref rvs_transaction inst_tr);
@@ -1927,14 +1978,22 @@
case(inst_tr.inst_type)
LD: begin
- seg_size = (seg_num) * dest_eew / 8;
+ seg_size = (seg_num) * dest_eew / 8;
data_size = dest_eew / 8;
vidx_size = src2_eew / 8;
+ data_eew = dest_eew;
+ vidx_eew = src2_eew;
+ data_emul = dest_emul;
+ vidx_emul = src2_emul;
end
ST: begin
seg_size = (seg_num) * src3_eew / 8;
data_size = src3_eew / 8;
vidx_size = src2_eew / 8;
+ data_eew = src3_eew;
+ vidx_eew = src2_eew;
+ data_emul = src3_emul;
+ vidx_emul = src2_emul;
end
endcase
return 0;
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_scoreboard.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_scoreboard.sv
index fc83666..ea0e6a3 100644
--- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_scoreboard.sv
+++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_scoreboard.sv
@@ -362,6 +362,10 @@
lsu_tr = mem_queue_lsu.pop_front();
mdl_tr = mem_queue_mdl.pop_front();
`uvm_info("MEM_RECORDER", $sformatf("\nMEM check start. ====================================================================================================\n"),UVM_HIGH)
+ `uvm_info("MEM_RECORDER", "lsu memory tr:", UVM_HIGH)
+ `uvm_info("MEM_RECORDER", lsu_tr.sprint(), UVM_HIGH)
+ `uvm_info("MEM_RECORDER", "mdl memory tr:", UVM_HIGH)
+ `uvm_info("MEM_RECORDER", mdl_tr.sprint(), UVM_HIGH)
if(lsu_tr.kind != mdl_tr.kind) begin
`uvm_error("MEM_CHCKER", $sformatf("Memory access kind mismatch: lsu = %s, mdl = %s", lsu_tr.kind.name(), mdl_tr.kind.name()))
err++;