Adjust vector register order in Decoder for segment load/store. Update rvv_backend_tb for lsu changes. Change-Id: I60d55196d033b70f5e64baddf17ae2d40e96c574
diff --git a/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv b/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv index 61b3e2c..dd4febd 100644 --- a/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv +++ b/hdl/verilog/rvv/design/rvv_backend_decode_unit_ari.sv
@@ -102,15 +102,18 @@ logic [`NUM_DE_UOP-1:0] force_vta_agnostic; logic [`NUM_DE_UOP-1:0] vm; logic [`NUM_DE_UOP-1:0] v0_valid; - logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vd_index; + logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vd_index; + logic [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-1:0] vd_offset; EEW_e [`NUM_DE_UOP-1:0] vd_eew; logic [`NUM_DE_UOP-1:0] vd_valid; logic [`NUM_DE_UOP-1:0] vs3_valid; - logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs1; + logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs1; + logic [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-1:0] vs1_offset; EEW_e [`NUM_DE_UOP-1:0] vs1_eew; logic [`NUM_DE_UOP-1:0] vs1_index_valid; logic [`NUM_DE_UOP-1:0] vs1_opcode_valid; - logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs2_index; + logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs2_index; + logic [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-1:0] vs2_offset; EEW_e [`NUM_DE_UOP-1:0] vs2_eew; logic [`NUM_DE_UOP-1:0] vs2_valid; logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] rd_index; @@ -4182,14 +4185,12 @@ end end - // update vd_index, eew and valid + // update vd_offset and valid always_comb begin - // initial - vd_index = 'b0; - vd_eew = EEW_NONE; - vd_valid = 'b0; - - for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD + vd_offset = 'b0; + vd_valid = 'b0; + + for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD_OFFSET case(1'b1) valid_opi: begin case(funct6_ari.ari_funct6) @@ -4212,9 +4213,8 @@ OPIVV, OPIVX, OPIVI: begin - vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vd_valid[i] = 1'b1; end endcase end @@ -4230,9 +4230,8 @@ case(inst_funct3) OPIVV, OPIVX: begin - vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vd_valid[i] = 1'b1; end endcase end @@ -4242,9 +4241,8 @@ case(inst_funct3) OPIVX, OPIVI: begin - vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = 'b0; + vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; end endcase end @@ -4258,9 +4256,8 @@ OPIVV, OPIVX, OPIVI: begin - vd_index[i] = inst_vd; - vd_eew[i] = eew_vd; - vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; + vd_offset[i] = 'b0; + vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; end endcase end @@ -4271,9 +4268,8 @@ case(inst_funct3) OPIVV, OPIVX: begin - vd_index[i] = inst_vd; - vd_eew[i] = eew_vd; - vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; + vd_offset[i] = 'b0; + vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; end endcase end @@ -4283,9 +4279,8 @@ case(inst_funct3) OPIVX, OPIVI: begin - vd_index[i] = inst_vd; - vd_eew[i] = eew_vd; - vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; + vd_offset[i] = 'b0; + vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; end endcase end @@ -4298,9 +4293,8 @@ OPIVV, OPIVX, OPIVI: begin - vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:1]; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]}; + vd_valid[i] = 1'b1; end endcase end @@ -4308,9 +4302,8 @@ VWREDSUMU, VWREDSUM: begin if(inst_funct3==OPIVV) begin - vd_index[i] = inst_vd; - vd_eew[i] = eew_vd; - vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; + vd_offset[i] = 'b0; + vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; end end @@ -4322,24 +4315,21 @@ {EMUL2,EMUL2}, {EMUL4,EMUL4}, {EMUL8,EMUL8}: begin - vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vd_valid[i] = 1'b1; end {EMUL2,EMUL1}, {EMUL4,EMUL2}, {EMUL8,EMUL4}: begin - vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:1]; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]}; + vd_valid[i] = 1'b1; end endcase end OPIVX, OPIVI: begin - vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vd_valid[i] = 1'b1; end endcase end @@ -4382,9 +4372,8 @@ case(inst_funct3) OPMVV, OPMVX: begin - vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vd_valid[i] = 1'b1; end endcase end @@ -4393,9 +4382,8 @@ VCOMPRESS: begin case(inst_funct3) OPMVV: begin - vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vd_valid[i] = 1'b1; end endcase end @@ -4405,9 +4393,8 @@ VSLIDE1DOWN: begin case(inst_funct3) OPMVX: begin - vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vd_valid[i] = 1'b1; end endcase end @@ -4422,9 +4409,8 @@ VREDXOR: begin case(inst_funct3) OPMVV: begin - vd_index[i] = inst_vd; - vd_eew[i] = eew_vd; - vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; + vd_offset[i] = 'b0; + vd_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; end endcase end @@ -4439,9 +4425,8 @@ VMXNOR: begin case(inst_funct3) OPMVV: begin - vd_index[i] = inst_vd; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = 'b0; + vd_valid[i] = 1'b1; end endcase end @@ -4449,9 +4434,8 @@ VWXUNARY0: begin case(inst_funct3) OPMVX: begin - vd_index[i] = inst_vd; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = 'b0; + vd_valid[i] = 1'b1; end endcase end @@ -4463,15 +4447,13 @@ VMSBF, VMSIF, VMSOF: begin - vd_index[i] = inst_vd; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = 'b0; + vd_valid[i] = 1'b1; end VIOTA, VID: begin - vd_index[i] = inst_vd+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vd_eew[i] = eew_vd; - vd_valid[i] = 1'b1; + vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vd_valid[i] = 1'b1; end endcase end @@ -4483,6 +4465,14 @@ end end + // update vd_index and eew + always_comb begin + for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD_OFFSET + vd_index[i] = inst_vd + {2'b0, vd_offset[i]}; + vd_eew[i] = eew_vd; + end + end + // some uop need vd as the vs3 vector operand always_comb begin // initial @@ -4610,14 +4600,12 @@ end end - // update vs1 + // update vs1_offset and valid always_comb begin - // initial - vs1 = 'b0; - vs1_eew = EEW_NONE; + vs1_offset = 'b0; vs1_index_valid = 'b0; - for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS1 + for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS1_OFFSET case(inst_funct3) OPIVV: begin case(funct6_ari.ari_funct6) @@ -4652,25 +4640,22 @@ VSSRL, VSSRA, VRGATHER: begin - vs1[i] = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vs1_eew[i] = eew_vs1; - vs1_index_valid[i] = 1'b1; + vs1_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vs1_index_valid[i] = 1'b1; end VNSRL, VNSRA, VNCLIPU, VNCLIP: begin - vs1[i] = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:1]; - vs1_eew[i] = eew_vs1; - vs1_index_valid[i] = 1'b1; + vs1_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]}; + vs1_index_valid[i] = 1'b1; end VWREDSUMU, VWREDSUM: begin - vs1[i] = inst_vs1; - vs1_eew[i] = eew_vs1; - vs1_index_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; + vs1_offset[i] = 'b0; + vs1_index_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; end VSLIDEUP_RGATHEREI16: begin @@ -4679,16 +4664,14 @@ {EMUL2,EMUL2}, {EMUL4,EMUL4}, {EMUL8,EMUL8}: begin - vs1[i] = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vs1_eew[i] = eew_vs1; - vs1_index_valid[i] = 1'b1; + vs1_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vs1_index_valid[i] = 1'b1; end {EMUL2,EMUL1}, {EMUL4,EMUL2}, {EMUL8,EMUL4}: begin - vs1[i] = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:1]; - vs1_eew[i] = eew_vs1; - vs1_index_valid[i] = 1'b1; + vs1_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]}; + vs1_index_valid[i] = 1'b1; end endcase end @@ -4711,17 +4694,15 @@ VWMACCU, VWMACC, VWMACCSU: begin - vs1[i] = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:1]; - vs1_eew[i] = eew_vs1; - vs1_index_valid[i] = 1'b1; + vs1_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]}; + vs1_index_valid[i] = 1'b1; end VXUNARY0, VWXUNARY0, VMUNARY0: begin - vs1[i] = inst_vs1; // vs1 is regarded as opcode - vs1_eew[i] = eew_vs1; - vs1_index_valid[i] = 'b0; + vs1_offset[i] = 'b0; // vs1 is regarded as opcode + vs1_index_valid[i] = 'b0; end VMUL, @@ -4740,9 +4721,8 @@ VAADD, VASUBU, VASUB: begin - vs1[i] = inst_vs1+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vs1_eew[i] = eew_vs1; - vs1_index_valid[i] = 1'b1; + vs1_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vs1_index_valid[i] = 1'b1; end // reduction @@ -4754,9 +4734,8 @@ VREDAND, VREDOR, VREDXOR: begin - vs1[i] = inst_vs1; - vs1_eew[i] = eew_vs1; - vs1_index_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; + vs1_offset[i] = 'b0; + vs1_index_valid[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_index_max; end VMAND, @@ -4767,16 +4746,14 @@ VMNOR, VMORN, VMXNOR: begin - vs1[i] = inst_vs1; - vs1_eew[i] = eew_vs1; - vs1_index_valid[i] = 1'b1; + vs1_offset[i] = 'b0; + vs1_index_valid[i] = 1'b1; end VCOMPRESS: begin if (uop_index_current[i][`UOP_INDEX_WIDTH-1:0] == uop_vstart) begin - vs1[i] = inst_vs1; - vs1_eew[i] = eew_vs1; - vs1_index_valid[i] = 1'b1; + vs1_offset[i] = 'b0; + vs1_index_valid[i] = 1'b1; end end endcase @@ -4785,6 +4762,14 @@ end end + // update vs1(index or opcode) and eew + always_comb begin + for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS1 + vs1[i] = inst_vs1 + {2'b0, vs1_offset[i]}; + vs1_eew[i] = eew_vs1; + end + end + // some uop will use vs1 field as an opcode to decode always_comb begin // initial @@ -4830,14 +4815,13 @@ end end - // update vs2 index, eew and valid + // update vs2 offset and valid always_comb begin // initial - vs2_index = 'b0; - vs2_eew = EEW_NONE; + vs2_offset = 'b0; vs2_valid = 'b0; - for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2 + for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2_OFFSET case(1'b1) valid_opi: begin // OPI* @@ -4869,9 +4853,8 @@ OPIVV, OPIVX, OPIVI: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vs2_valid[i] = 1'b1; end endcase end @@ -4890,9 +4873,8 @@ case(inst_funct3) OPIVV, OPIVX: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vs2_valid[i] = 1'b1; end endcase end @@ -4904,9 +4886,8 @@ case(inst_funct3) OPIVX, OPIVI: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vs2_valid[i] = 1'b1; end endcase end @@ -4917,8 +4898,7 @@ OPIVX, OPIVI: begin if(inst_vm==1'b0) begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vs2_eew[i] = eew_vs2; + vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; vs2_valid[i] = 1'b1; end end @@ -4929,9 +4909,8 @@ VWREDSUM: begin case(inst_funct3) OPIVV: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vs2_valid[i] = 1'b1; end endcase end @@ -4944,24 +4923,21 @@ {EMUL2,EMUL2}, {EMUL4,EMUL4}, {EMUL8,EMUL8}: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vs2_eew[i] = eew_vs2; + vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; vs2_valid[i] = 1'b1; end {EMUL2,EMUL1}, {EMUL4,EMUL2}, {EMUL8,EMUL4}: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1]; - vs2_eew[i] = eew_vs2; + vs2_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]}; vs2_valid[i] = 1'b1; end endcase end OPIVX, OPIVI: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vs2_valid[i] = 1'b1; end endcase end @@ -4984,9 +4960,8 @@ case(inst_funct3) OPMVV, OPMVX: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]}; + vs2_valid[i] = 1'b1; end endcase end @@ -5014,9 +4989,8 @@ case(inst_funct3) OPMVV, OPMVX: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vs2_valid[i] = 1'b1; end endcase end @@ -5028,20 +5002,17 @@ {EMUL1,EMUL1}, {EMUL2,EMUL1}, {EMUL4,EMUL1}: begin - vs2_index[i] = inst_vs2; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = 'b0; + vs2_valid[i] = 1'b1; end {EMUL4,EMUL2}, {EMUL8,EMUL4}: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]}; + vs2_valid[i] = 1'b1; end {EMUL8,EMUL2}: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:2]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = {2'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:2]}; + vs2_valid[i] = 1'b1; end endcase end @@ -5051,9 +5022,8 @@ VWMACCUS: begin case(inst_funct3) OPMVX: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:1]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]}; + vs2_valid[i] = 1'b1; end endcase end @@ -5070,9 +5040,8 @@ VCOMPRESS: begin case(inst_funct3) OPMVV: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vs2_valid[i] = 1'b1; end endcase end @@ -5087,9 +5056,8 @@ VMXNOR: begin case(inst_funct3) OPMVV: begin - vs2_index[i] = inst_vs2; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = 'b0; + vs2_valid[i] = 1'b1; end endcase end @@ -5102,9 +5070,8 @@ VMSIF, VMSOF, VIOTA: begin - vs2_index[i] = inst_vs2; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = 'b0; + vs2_valid[i] = 1'b1; end endcase end @@ -5115,9 +5082,8 @@ VSLIDE1DOWN: begin case(inst_funct3) OPMVX: begin - vs2_index[i] = inst_vs2+uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vs2_valid[i] = 1'b1; end endcase end @@ -5127,6 +5093,14 @@ end end + // update vs2 index and eew + always_comb begin + for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2 + vs2_index[i] = inst_vs2 + {2'b0, vs2_offset[i]}; + vs2_eew[i] = eew_vs2; + end + end + // update rd_index and valid always_comb begin // initial
diff --git a/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv b/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv index a940fe2..c39222e 100644 --- a/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv +++ b/hdl/verilog/rvv/design/rvv_backend_decode_unit_lsu.sv
@@ -95,6 +95,7 @@ logic [`NUM_DE_UOP-1:0] vm; logic [`NUM_DE_UOP-1:0] v0_valid; logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vd_index; + logic [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-1:0] vd_offset; EEW_e [`NUM_DE_UOP-1:0] vd_eew; logic [`NUM_DE_UOP-1:0] vd_valid; logic [`NUM_DE_UOP-1:0] vs3_valid; @@ -103,6 +104,7 @@ logic [`NUM_DE_UOP-1:0] vs1_index_valid; logic [`NUM_DE_UOP-1:0] vs1_opcode_valid; logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] vs2_index; + logic [`NUM_DE_UOP-1:0][`UOP_INDEX_WIDTH-1:0] vs2_offset; EEW_e [`NUM_DE_UOP-1:0] vs2_eew; logic [`NUM_DE_UOP-1:0] vs2_valid; logic [`NUM_DE_UOP-1:0][`REGFILE_INDEX_WIDTH-1:0] rd_index; @@ -228,10 +230,11 @@ US_US, US_FF: begin case(inst_nf) - // EMUL_vd = ceil( inst_funct3/csr_sew*csr_lmul ) - // emul_max_vd_vs2 = EMUL_vd - // emul_vd_nf = EMUL_vd*NF - // EMUL_max = NF*emul_max_vd_vs2 + // emul_vd = ceil(inst_funct3/csr_sew*csr_lmul) + // emul_vs2: no emul_vs2 for unit + // emul_max_vd_vs2 = max(emul_vd,emul_vs2) = emul_vd + // emul_vd_nf = NF*emul_vd + // emul_max = NF*emul_max_vd_vs2 NF1: begin case({inst_funct3,csr_sew}) // 1:1 @@ -1028,8 +1031,11 @@ CS: begin case(inst_nf) - // EMUL_vd = ceil( inst_funct3/csr_sew*csr_lmul ) - // EMUL_max = NF*EMUL_vd + // emul_vd = ceil(inst_funct3/csr_sew*csr_lmul) + // emul_vs2: no emul_vs2 for stride + // emul_max_vd_vs2 = max(emul_vd,emul_vs2) = emul_vd + // emul_vd_nf = NF*emul_vd + // emul_max = NF*emul_max_vd_vs2 NF1: begin case({inst_funct3,csr_sew}) // 1:1 @@ -1782,10 +1788,11 @@ IU, IO: begin case(inst_nf) - // EMUL_vd = ceil( csr_lmul ) - // EMUL_vs2 = ceil( inst_funct3/csr_sew*csr_lmul ) + // emul_vd = ceil(csr_lmul) + // emul_vs2 = ceil(inst_funct3/csr_sew*csr_lmul) // emul_max_vd_vs2 = max(EMUL_vd,EMUL_vs2) - // EMUL_max = NF*emul_max_vd_vs2 + // emul_vd_nf = NF*emul_vd + // emul_max = NF*emul_max_vd_vs2 NF1: begin case({inst_funct3,csr_sew}) // 1:1 @@ -3233,63 +3240,194 @@ end end - // update vd_index and eew + // update vd_offset always_comb begin - for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD + for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD_OFFSET // initial - vd_index[i] = 'b0; - vd_eew[i] = eew_vd; + vd_offset[i] = 'b0; case(inst_funct6[2:0]) UNIT_STRIDE: begin case(inst_umop) US_REGULAR, - US_FAULT_FIRST, + US_FAULT_FIRST: begin + case({inst_nf,emul_vd}) + {NF2,EMUL4}: begin + case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0]) + 3'd1 : vd_offset[i] = 3'd4; + 3'd2 : vd_offset[i] = 3'd1; + 3'd3 : vd_offset[i] = 3'd5; + 3'd4 : vd_offset[i] = 3'd2; + 3'd5 : vd_offset[i] = 3'd6; + 3'd6 : vd_offset[i] = 3'd3; + default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase + end + {NF2,EMUL2}: begin + case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0]) + 3'd1 : vd_offset[i] = 3'd2; + 3'd2 : vd_offset[i] = 3'd1; + default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase + end + {NF3,EMUL2}: begin + case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0]) + 3'd1 : vd_offset[i] = 3'd2; + 3'd2 : vd_offset[i] = 3'd4; + 3'd3 : vd_offset[i] = 3'd1; + 3'd4 : vd_offset[i] = 3'd3; + default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase + end + {NF4,EMUL2}: begin + case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0]) + 3'd1 : vd_offset[i] = 3'd2; + 3'd2 : vd_offset[i] = 3'd4; + 3'd3 : vd_offset[i] = 3'd6; + 3'd4 : vd_offset[i] = 3'd1; + 3'd5 : vd_offset[i] = 3'd3; + 3'd6 : vd_offset[i] = 3'd5; + default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase + end + default: + vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase + end US_WHOLE_REGISTER: begin - vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; end US_MASK: begin - vd_index[i] = inst_vd; + vd_offset[i] = 'b0; end endcase end CONSTANT_STRIDE: begin - vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + case({inst_nf,emul_vd}) + {NF2,EMUL4}: begin + case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0]) + 3'd1 : vd_offset[i] = 3'd4; + 3'd2 : vd_offset[i] = 3'd1; + 3'd3 : vd_offset[i] = 3'd5; + 3'd4 : vd_offset[i] = 3'd2; + 3'd5 : vd_offset[i] = 3'd6; + 3'd6 : vd_offset[i] = 3'd3; + default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase + end + {NF2,EMUL2}: begin + case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0]) + 3'd1 : vd_offset[i] = 3'd2; + 3'd2 : vd_offset[i] = 3'd1; + default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase + end + {NF3,EMUL2}: begin + case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0]) + 3'd1 : vd_offset[i] = 3'd2; + 3'd2 : vd_offset[i] = 3'd4; + 3'd3 : vd_offset[i] = 3'd1; + 3'd4 : vd_offset[i] = 3'd3; + default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase + end + {NF4,EMUL2}: begin + case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0]) + 3'd1 : vd_offset[i] = 3'd2; + 3'd2 : vd_offset[i] = 3'd4; + 3'd3 : vd_offset[i] = 3'd6; + 3'd4 : vd_offset[i] = 3'd1; + 3'd5 : vd_offset[i] = 3'd3; + 3'd6 : vd_offset[i] = 3'd5; + default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase + end + default: + vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase end UNORDERED_INDEX, ORDERED_INDEX: begin - case({inst_funct3,csr_sew}) + case({eew_vs2,eew_vd}) // EEW_vs2:EEW_vd=1:1 - {SEW_8,SEW8}, - {SEW_16,SEW16}, - {SEW_32,SEW32}, + {EEW8,EEW8}, + {EEW16,EEW16}, + {EEW32,EEW32}, // 1:2 - {SEW_8,SEW16}, - {SEW_16,SEW32}, + {EEW8,EEW16}, + {EEW16,EEW32}, // 1:4 - {SEW_8,SEW32}: begin - vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + {EEW8,EEW32}: begin + case({inst_nf,emul_vd}) + {NF2,EMUL4}: begin + case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0]) + 3'd1 : vd_offset[i] = 3'd4; + 3'd2 : vd_offset[i] = 3'd1; + 3'd3 : vd_offset[i] = 3'd5; + 3'd4 : vd_offset[i] = 3'd2; + 3'd5 : vd_offset[i] = 3'd6; + 3'd6 : vd_offset[i] = 3'd3; + default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase + end + {NF2,EMUL2}: begin + case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0]) + 3'd1 : vd_offset[i] = 3'd2; + 3'd2 : vd_offset[i] = 3'd1; + default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase + end + {NF3,EMUL2}: begin + case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0]) + 3'd1 : vd_offset[i] = 3'd2; + 3'd2 : vd_offset[i] = 3'd4; + 3'd3 : vd_offset[i] = 3'd1; + 3'd4 : vd_offset[i] = 3'd3; + default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase + end + {NF4,EMUL2}: begin + case(uop_index_current[i][`UOP_INDEX_WIDTH-1:0]) + 3'd1 : vd_offset[i] = 3'd2; + 3'd2 : vd_offset[i] = 3'd4; + 3'd3 : vd_offset[i] = 3'd6; + 3'd4 : vd_offset[i] = 3'd1; + 3'd5 : vd_offset[i] = 3'd3; + 3'd6 : vd_offset[i] = 3'd5; + default: vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase + end + default: + vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; + endcase end // 2:1 - {SEW_16,SEW8}, - {SEW_32,SEW16}, + {EEW16,EEW8}, + {EEW32,EEW16}, // 4:1 - {SEW_32,SEW8}: begin + {EEW32,EEW8}: begin case({emul_vs2,emul_vd}) - {EMUL1,EMUL1}: begin - vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; - end + {EMUL1,EMUL1}: + vd_offset[i] = uop_index_current[i][`UOP_INDEX_WIDTH-1:0]; {EMUL2,EMUL1}, - {EMUL4,EMUL2}, - {EMUL8,EMUL4}: begin - vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:1]; + {EMUL8,EMUL4}: + vd_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]}; + {EMUL4,EMUL2}: begin + if (inst_nf==NF2) begin + case(uop_index_current[i][`UOP_INDEX_WIDTH-1:1]) + 2'd1 : vd_offset[i] = 3'd2; + 2'd2 : vd_offset[i] = 3'd1; + default: vd_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]}; + endcase + end + else + vd_offset[i] = {1'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:1]}; end {EMUL4,EMUL1}, - {EMUL8,EMUL2}: begin - vd_index[i] = inst_vd + uop_index_current[i][`UOP_INDEX_WIDTH-1:2]; - end + {EMUL8,EMUL2}: + vd_offset[i] = {2'b0, uop_index_current[i][`UOP_INDEX_WIDTH-1:2]}; endcase end endcase @@ -3298,6 +3436,14 @@ end end + // update vd_index and eew + always_comb begin + for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VD + vd_index[i] = inst_vd + {2'b0, vd_offset[i]}; + vd_eew[i] = eew_vd; + end + end + // update vd_valid and vs3_valid // some uop need vd as the vs3 vector operand always_comb begin @@ -3330,83 +3476,126 @@ end end - // update vs2 index, eew and valid + // update vs2 offset and valid always_comb begin // initial - vs2_index = 'b0; - vs2_eew = EEW_NONE; - vs2_valid = 'b0; + vs2_offset = 'b0; + vs2_valid = 'b0; - for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2 + for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2_OFFSET case(inst_funct6[2:0]) UNORDERED_INDEX, ORDERED_INDEX: begin - case({inst_funct3,csr_sew}) + case({eew_vs2,eew_vd}) // EEW_vs2:EEW_vd=1:1 - {SEW_8,SEW8}, - {SEW_16,SEW16}, - {SEW_32,SEW32}, - // 2:1 - {SEW_16,SEW8}, - {SEW_32,SEW16}, - // 4:1 - {SEW_32,SEW8}: begin + {EEW8,EEW8}, + {EEW16,EEW16}, + {EEW32,EEW32}: begin case(emul_vs2) - EMUL1: begin - vs2_index[i] = inst_vs2; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; - end EMUL2: begin - vs2_index[i] = inst_vs2+uop_index_current[i][0]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + case(inst_nf) + NF2: vs2_offset[i] = {2'b0, uop_index_current[i][1]}; + NF3: vs2_offset[i] = (uop_index_current[i]>='d3) ? 3'd1 : 3'b0; + NF4: vs2_offset[i] = {2'b0, uop_index_current[i][2]}; + default: vs2_offset[i] = {2'b0, uop_index_current[i][0]}; + endcase + vs2_valid[i] = 1'b1; end EMUL4: begin - vs2_index[i] = inst_vs2+uop_index_current[i][1:0]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = (inst_nf==NF2) ? {1'b0, uop_index_current[i][2:1]} : {1'b0, uop_index_current[i][1:0]}; + vs2_valid[i] = 1'b1; end EMUL8: begin - vs2_index[i] = inst_vs2+uop_index_current[i][2:0]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = uop_index_current[i][2:0]; + vs2_valid[i] = 1'b1; + end + default: begin //EMUL1 + vs2_offset[i] = 'b0; + vs2_valid[i] = 1'b1; + end + endcase + end + // 2:1 + {EEW16,EEW8}, + {EEW32,EEW16}: begin + case(emul_vs2) + EMUL2: begin + case(inst_nf) + NF2: vs2_offset[i] = {1'b0, uop_index_current[i][2], uop_index_current[i][0]}; + NF3, + NF4: vs2_offset[i] = {2'b0, uop_index_current[i][0]}; + default: vs2_offset[i] = uop_index_current[i]; //NF1 + endcase + vs2_valid[i] = 1'b1; + end + EMUL4: begin + vs2_offset[i] = (inst_nf==NF2) ? {1'b0, uop_index_current[i][2], uop_index_current[i][0]} : uop_index_current[i]; + vs2_valid[i] = 1'b1; + end + EMUL8: begin + vs2_offset[i] = uop_index_current[i]; + vs2_valid[i] = 1'b1; + end + default: begin //EMUL1 + vs2_offset[i] = 'b0; + vs2_valid[i] = 1'b1; + end + endcase + end + // 4:1 + {EEW32,EEW8}: begin + case(emul_vs2) + EMUL2: begin + case(inst_nf) + NF2: vs2_offset[i] = {1'b0, uop_index_current[i][2], uop_index_current[i][0]}; + NF3, + NF4: vs2_offset[i] = {2'b0, uop_index_current[i][0]}; + default: vs2_offset[i] = uop_index_current[i]; //NF1 + endcase + vs2_valid[i] = 1'b1; + end + EMUL4: begin + vs2_offset[i] = (inst_nf==NF2) ? {1'b0, uop_index_current[i][1:0]} : uop_index_current[i]; + vs2_valid[i] = 1'b1; + end + EMUL8: begin + vs2_offset[i] = uop_index_current[i]; + vs2_valid[i] = 1'b1; + end + default: begin //EMUL1 + vs2_offset[i] = 'b0; + vs2_valid[i] = 1'b1; end endcase end // 1:2 - {SEW_8,SEW16}, - {SEW_16,SEW32}: begin + {EEW8,EEW16}, + {EEW16,EEW32}: begin case(emul_vs2) EMUL1: begin - vs2_index[i] = inst_vs2; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = 'b0; + vs2_valid[i] = 1'b1; end EMUL2: begin - vs2_index[i] = inst_vs2+uop_index_current[i][1]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = (inst_nf==NF2) ? {2'b0, uop_index_current[i][2]} : {2'b0, uop_index_current[i][1]}; + vs2_valid[i] = 1'b1; end EMUL4: begin - vs2_index[i] = inst_vs2+uop_index_current[i][2:1]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = {1'b0, uop_index_current[i][2:1]}; + vs2_valid[i] = 1'b1; end endcase end // 1:4 - {SEW_8,SEW32}: begin + {EEW8,EEW32}: begin case(emul_vs2) EMUL1: begin - vs2_index[i] = inst_vs2; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = 'b0; + vs2_valid[i] = 1'b1; end EMUL2: begin - vs2_index[i] = inst_vs2+uop_index_current[i][2]; - vs2_eew[i] = eew_vs2; - vs2_valid[i] = 1'b1; + vs2_offset[i] = {2'b0, uop_index_current[i][2]}; + vs2_valid[i] = 1'b1; end endcase end @@ -3416,6 +3605,14 @@ end end + // update vs2 index and eew + always_comb begin + for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_VS2 + vs2_index[i] = inst_vs2 + {2'b0, vs2_offset[i]}; + vs2_eew[i] = eew_vs2; + end + end + // update rd_index and valid always_comb begin for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_RD @@ -3450,24 +3647,39 @@ // update segment_index valid always_comb begin for(int i=0;i<`NUM_DE_UOP;i++) begin: GET_SEG_INDEX - // initial - seg_field_index[i] = 'b0; + // default + if (inst_nf==NF2) + seg_field_index[i] = {1'b0,uop_index_current[i][2:1]}; + else if (inst_nf==NF3) + seg_field_index[i] = (uop_index_current[i]>=4'd3) ? 'd1 : 'b0; + else if (inst_nf==NF4) + seg_field_index[i] = {2'b0,uop_index_current[i][2]}; + else + seg_field_index[i] = 'b0; - if(funct6_lsu.lsu_funct6.lsu_is_seg==IS_SEGMENT) begin - case(inst_nf) - NF2: begin - case(emul_max_vd_vs2) - EMUL2: seg_field_index[i] = {1'b0,uop_index_current[i][0]}; - EMUL4: seg_field_index[i] = uop_index_current[i][1:0]; - endcase - end - NF3, - NF4: begin - if (emul_max_vd_vs2==EMUL2) - seg_field_index[i] = {1'b0,uop_index_current[i][0]}; - end - endcase - end + // EEW_vs2>EEW_vd for index load/store + case(inst_funct6[2:0]) + UNORDERED_INDEX, + ORDERED_INDEX: begin + case({eew_vs2,eew_vd}) + // 2:1 + {EEW16,EEW8}, + {EEW32,EEW16}: begin + case(emul_vs2) + EMUL2: seg_field_index[i] = {2'b0, uop_index_current[i][0]}; + EMUL4: seg_field_index[i] = {1'b0, uop_index_current[i][2], uop_index_current[i][0]}; + endcase + end + // 4:1 + {EEW32,EEW8}: begin + case(emul_vs2) + EMUL2: seg_field_index[i] = {2'b0, uop_index_current[i][0]}; + EMUL4: seg_field_index[i] = {1'b0, uop_index_current[i][1:0]}; + endcase + end + endcase + end + endcase end end
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_driver.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_driver.sv index 0e12dd4..0e50dfa 100644 --- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_driver.sv +++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_driver.sv
@@ -8,6 +8,7 @@ class lsu_driver extends uvm_driver # (lsu_transaction); + parameter int MAX_SEG = 8; typedef virtual lsu_interface v_if; v_if lsu_if; @@ -42,6 +43,8 @@ // receive & decode inst from rvs extern function void write_lsu_inst(rvs_transaction inst_tr); extern function int lsu_uop_decode(ref rvs_transaction inst_tr); + extern protected function void lsu_uop_gen_delay(ref lsu_transaction uop_tr); + extern protected function void lsu_uop_gen_trap(ref lsu_transaction uop_tr); endclass: lsu_driver @@ -153,17 +156,17 @@ // update address for indexed-stride from vidx_data if(uop_tr.is_indexed == 1) begin if(lsu_if.uop_lsu_rvv2lsu[i].vidx_valid !== 1) begin - `uvm_fatal("LSU_DRV", "Uop is indexed but vidx_valid is not") + `uvm_error("LSU_DRV", "Uop is indexed but vidx_valid is not") continue; end else if(uop_tr.lsu_slot_addr_valid === 1) begin `uvm_fatal("TB_ISSUE", "Decode error") continue; end else if(uop_tr.vidx_vreg_idx !== lsu_if.uop_lsu_rvv2lsu[i].vidx_addr) begin - `uvm_fatal("LSU_DRV", $sformatf("vidx_addr mismatch: lsu=%0d, dut=%0d", uop_tr.vidx_vreg_idx, lsu_if.uop_lsu_rvv2lsu[i].vidx_addr)) + `uvm_error("LSU_DRV", $sformatf("vidx_addr mismatch: lsu=%0d, dut=%0d", uop_tr.vidx_vreg_idx, lsu_if.uop_lsu_rvv2lsu[i].vidx_addr)) continue; end else begin `uvm_info("LSU_DRV", $sformatf("Got vreg[%0d]=0x%16x from dut.", lsu_if.uop_lsu_rvv2lsu[i].vidx_addr, lsu_if.uop_lsu_rvv2lsu[i].vidx_data), UVM_HIGH); - for(int byte_idx=uop_tr.vidx_vreg_byte_start; byte_idx<=uop_tr.vidx_vreg_byte_end; byte_idx += uop_tr.vidx_vreg_eew/8) begin + for(int byte_idx=uop_tr.vidx_vreg_byte_head; byte_idx<uop_tr.vidx_vreg_byte_tail; byte_idx += uop_tr.vidx_vreg_eew/8) begin case(uop_tr.vidx_vreg_eew) // For indexed-stride, the stride from vrf should be zero-extended to `XLEN. EEW8 : stride_temp = $unsigned(lsu_if.uop_lsu_rvv2lsu[i].vidx_data[byte_idx*8 +: 8 ]); @@ -173,7 +176,7 @@ indexed_stride.push_back(stride_temp); `uvm_info("LSU_DRV", $sformatf("byte[%0d]: push stride=0x%8x to indexed_stride(size: %0d).", byte_idx, stride_temp, indexed_stride.size()), UVM_HIGH) end - for(int byte_idx=uop_tr.data_vreg_byte_start; byte_idx<=uop_tr.data_vreg_byte_end; byte_idx++) begin + for(int byte_idx=uop_tr.data_vreg_byte_head; byte_idx<uop_tr.data_vreg_byte_tail; byte_idx++) begin if(byte_idx % (uop_tr.data_vreg_eew/8) == 0) begin stride_temp = indexed_stride.pop_front(); `uvm_info("LSU_DRV", $sformatf("byte[%0d]: pop stride=0x%8x from indexed_stride(size: %0d).", byte_idx, stride_temp, indexed_stride.size()), UVM_HIGH) @@ -198,7 +201,7 @@ `uvm_fatal("TB_ISSUE", $sformatf("vregfile_read_addr mismatch: lsu=%0d, dut=%0d", uop_tr.data_vreg_idx, lsu_if.uop_lsu_rvv2lsu[i].vregfile_read_addr)) continue; end else begin - for(int byte_idx=uop_tr.data_vreg_byte_start; byte_idx<=uop_tr.data_vreg_byte_end; byte_idx++) begin + for(int byte_idx=uop_tr.data_vreg_byte_head; byte_idx<uop_tr.data_vreg_byte_tail; byte_idx++) begin uop_tr.lsu_slot_data[byte_idx] = lsu_if.uop_lsu_rvv2lsu[i].vregfile_read_data[byte_idx*8 +: 8]; end uop_tr.lsu_slot_data_valid = 1; @@ -211,6 +214,7 @@ `uvm_fatal("LSU_DRV", "Uops need v0_data but v0_valid is 0") continue; end else begin + `uvm_info("LSU_DRV", $sformatf("uop_pc:0x%8x, v0_data=0x%016x", uop_tr.uop_pc, lsu_if.uop_lsu_rvv2lsu[i].v0_data), UVM_HIGH) uop_tr.lsu_slot_strobe = lsu_if.uop_lsu_rvv2lsu[i].v0_data; uop_tr.lsu_slot_addr_valid = 1; end @@ -286,8 +290,7 @@ `uvm_fatal("TB_ISSUE", "LSU decode err.") break; end else if(uops_tx_queue[uop_idx].uop_done == 0) begin - // for(int byte_idx=0; byte_idx<`VLENB; byte_idx++) begin - for(int byte_idx=uops_tx_queue[uop_idx].data_vreg_byte_start; byte_idx<=uops_tx_queue[uop_idx].data_vreg_byte_end; byte_idx++) begin + for(int byte_idx=uops_tx_queue[uop_idx].data_vreg_byte_head; byte_idx<uops_tx_queue[uop_idx].data_vreg_byte_tail; byte_idx++) begin if(uops_tx_queue[uop_idx].lsu_slot_strobe[byte_idx] === 1'b1) begin mem.pc = uops_tx_queue[uop_idx].uop_pc; mem.load_byte(data_temp, uops_tx_queue[uop_idx].lsu_slot_addr[byte_idx]); @@ -308,8 +311,7 @@ `uvm_fatal("TB_ISSUE", "LSU decode err.") break; end else if(uops_tx_queue[uop_idx].uop_done == 0) begin - // for(int byte_idx=0; byte_idx<`VLENB; byte_idx++) begin - for(int byte_idx=uops_tx_queue[uop_idx].data_vreg_byte_start; byte_idx<=uops_tx_queue[uop_idx].data_vreg_byte_end; byte_idx++) begin + for(int byte_idx=uops_tx_queue[uop_idx].data_vreg_byte_head; byte_idx<uops_tx_queue[uop_idx].data_vreg_byte_tail; byte_idx++) begin if(uops_tx_queue[uop_idx].lsu_slot_strobe[byte_idx] === 1'b1) begin data_temp = uops_tx_queue[uop_idx].lsu_slot_data[byte_idx]; mem.pc = uops_tx_queue[uop_idx].uop_pc; @@ -432,17 +434,14 @@ endfunction function int lsu_driver::lsu_uop_decode(ref rvs_transaction inst_tr); - lsu_transaction uop_tr; + lsu_transaction uop_tr; // segment max is 3 // vtype decode - int sew; - int lsu_eew; - real lmul; int elm_idx_max; int lsu_nf; - int seg_idx_max; + int seg_num; int evl; int vstart; - int uop_vstart; + int uop_vstart[MAX_SEG]; int data_eew; real data_emul; @@ -452,15 +451,19 @@ real emul_max; // uop info - int uops_cnt; int uops_num; - int data_byte_idx; - int vidx_byte_idx; - int temp_idx; int data_vreg_idx_base; int vidx_vreg_idx_base; int data_vreg_idx_last; int vidx_vreg_idx_last; + int elm_per_uop; + int elm_idx_head[MAX_SEG]; // elm pointer + int elm_idx_tail[MAX_SEG]; // elm pointer + int data_byte_idx[MAX_SEG]; + int vidx_byte_idx[MAX_SEG]; + int seg_idx; + int switch_seg; + int seg_switch_gap; // num of bytes per data vreg // load/store addres info int addr; @@ -468,87 +471,66 @@ // Decode ---------------------------------------------------------------------- `uvm_info("LSU_DRV","Start decode vtype",UVM_HIGH) - sew = 8 << inst_tr.vsew; - lsu_eew = inst_tr.lsu_eew; - lmul = 2.0 ** $signed(inst_tr.vlmul); - addr_base = inst_tr.rs1_data; - lsu_nf = inst_tr.lsu_nf; - vstart = inst_tr.vstart; + addr_base = inst_tr.rs1_data; + evl = inst_tr.evl; + vstart = inst_tr.vstart; + lsu_nf = inst_tr.lsu_nf; + eew_max = inst_tr.eew_max; + emul_max = inst_tr.emul_max; + + elm_idx_max = inst_tr.elm_idx_max; + seg_num = inst_tr.seg_num; + + uops_num = int'($ceil(emul_max)) * (seg_num); + elm_per_uop = `VLEN / eew_max; + + case(inst_tr.inst_type) + LD: begin + data_eew = inst_tr.dest_eew; + vidx_eew = inst_tr.src2_eew; + data_emul = inst_tr.dest_emul; + vidx_emul = inst_tr.src2_emul; + end + ST: begin + data_eew = inst_tr.src3_eew; + vidx_eew = inst_tr.src2_eew; + data_emul = inst_tr.src3_emul; + vidx_emul = inst_tr.src2_emul; + end + endcase case(inst_tr.lsu_mop) LSU_US : begin case(inst_tr.lsu_umop) MASK: begin - data_eew = EEW8; - data_emul = EMUL1; - vidx_eew = EEW32; - vidx_emul = EMUL1; - eew_max = EEW8; - emul_max = EMUL1; const_stride = (lsu_nf+1) * data_eew/8; - seg_idx_max = lsu_nf + 1; - evl = int'($ceil(inst_tr.vl / 8.0)); end WHOLE_REG: begin - data_eew = lsu_eew; - data_emul = lsu_nf + 1; - vidx_eew = EEW32; - vidx_emul = EMUL1; - eew_max = lsu_eew; - emul_max = data_emul; const_stride = data_eew/8; - seg_idx_max = 1; - evl = data_emul * `VLEN / data_eew; end default: begin - data_eew = lsu_eew; - data_emul = data_eew * lmul / sew; - vidx_eew = EEW32; - vidx_emul = EMUL1; - eew_max = lsu_eew; - emul_max = eew_max * lmul / sew; const_stride = (lsu_nf+1) * data_eew/8; - seg_idx_max = lsu_nf + 1; - evl = inst_tr.vl; end endcase end LSU_CS : begin - data_eew = lsu_eew; - data_emul = data_eew * lmul / sew; - vidx_eew = EEW32; - vidx_emul = EMUL1; - eew_max = lsu_eew; - emul_max = eew_max * lmul / sew; const_stride = inst_tr.rs2_data; - seg_idx_max = lsu_nf + 1; - evl = inst_tr.vl; end LSU_UI, LSU_OI: begin - data_eew = sew; - data_emul = data_eew * lmul / sew; - vidx_eew = lsu_eew; - vidx_emul = vidx_eew * lmul / sew; - eew_max = (data_eew > vidx_eew) ? data_eew : vidx_eew; - emul_max = eew_max * lmul / sew; const_stride = 0; - seg_idx_max = lsu_nf + 1; - evl = inst_tr.vl; end endcase - uops_num = int'($ceil(emul_max)) * (seg_idx_max); - elm_idx_max = int'($ceil(emul_max)) * `VLEN / eew_max; if(inst_tr.inst_type == LD) begin data_vreg_idx_base = inst_tr.dest_idx; - data_vreg_idx_last = inst_tr.dest_idx + (seg_idx_max) * int'($ceil(data_emul)) - 1; + data_vreg_idx_last = inst_tr.dest_idx + (seg_num) * int'($ceil(data_emul)) - 1; vidx_vreg_idx_base = inst_tr.src2_idx; vidx_vreg_idx_last = inst_tr.src2_idx + int'($ceil(vidx_emul)) - 1; end else if(inst_tr.inst_type == ST) begin data_vreg_idx_base = inst_tr.src3_idx; - data_vreg_idx_last = inst_tr.src3_idx + (seg_idx_max) * int'($ceil(data_emul)) - 1; + data_vreg_idx_last = inst_tr.src3_idx + (seg_num) * int'($ceil(data_emul)) - 1; vidx_vreg_idx_base = inst_tr.src2_idx; vidx_vreg_idx_last = inst_tr.src2_idx + int'($ceil(vidx_emul)) - 1; end else begin @@ -557,164 +539,198 @@ `uvm_info("LSU_DRV", $sformatf("eew_max=%0d, emul_max=%.2f, elm_idx_max=%0d", eew_max, emul_max, elm_idx_max), UVM_HIGH) // Uops Gen -------------------------------------------------------------------- - `uvm_info("LSU_DRV","Start gen uops",UVM_HIGH) - uops_cnt = 0; - for(int seg_idx=0; seg_idx<seg_idx_max; seg_idx++) begin - uop_vstart = inst_tr.vstart; - data_byte_idx = vstart * data_eew / 8; - vidx_byte_idx = vstart * vidx_eew / 8; - for(int elm_idx=0; elm_idx<elm_idx_max; elm_idx++) begin - - `uvm_info("LSU_DRV",$sformatf("seg_idx=%0d, elm_idx=%0d", seg_idx, elm_idx),UVM_HIGH) - if(elm_idx * eew_max % `VLEN == 0) begin - `uvm_info("LSU_DRV","Gen new uop",UVM_HIGH) - uop_tr = new(); - // Gen delay - case(delay_mode_rvv2lsu) - delay_mode_pkg::SLOW: begin - uop_tr.c_rvv2lsu_delay.constraint_mode(0); - assert(uop_tr.randomize(rvv2lsu_delay) with { - rvv2lsu_delay dist { - [1:50] :/ 20, - [50:100] :/ 80 - }; - }); - end - delay_mode_pkg::NORMAL: begin - assert(uop_tr.randomize(rvv2lsu_delay) with { - rvv2lsu_delay dist { - [0:10] :/ 50, - [10:20] :/ 30, - [20:50] :/ 20 - }; - }); - end - delay_mode_pkg::FAST: begin - assert(uop_tr.randomize(rvv2lsu_delay) with { - rvv2lsu_delay dist { - 0 := 80, - [1:5] :/ 15, - [5:20] :/ 5 - }; - }); - end - endcase - case(delay_mode_lsu2rvv) - delay_mode_pkg::SLOW: begin - uop_tr.c_lsu2rvv_delay.constraint_mode(0); - assert(uop_tr.randomize(lsu2rvv_delay) with { - lsu2rvv_delay dist { - [1:50] :/ 20, - [50:100] :/ 80 - }; - }); - end - delay_mode_pkg::NORMAL: begin - assert(uop_tr.randomize(lsu2rvv_delay) with { - lsu2rvv_delay dist { - [0:10] :/ 50, - [10:20] :/ 30, - [20:50] :/ 20 - }; - }); - end - delay_mode_pkg::FAST: begin - assert(uop_tr.randomize(lsu2rvv_delay) with { - lsu2rvv_delay dist { - 0 := 80, - [1:5] :/ 15, - [5:20] :/ 5 - }; - }); - end - endcase - // Gen trap - if(trap_en) begin - if(always_trap) begin - assert(uop_tr.randomize(trap_occured) with { - trap_occured == 1; - }); - end else begin - assert(uop_tr.randomize(trap_occured) with { - trap_occured dist { - // 0 := 99, - 0 := 9, - 1 := 1 - }; - }); - end - end else begin - assert(uop_tr.randomize(trap_occured) with { - trap_occured == 0; - }); - end - uops_cnt++; - uop_tr.inst_string = inst_tr.asm_string; - if(inst_tr.inst_type == LD) begin - uop_tr.kind = lsu_transaction::LOAD; - end else if(inst_tr.inst_type == ST) begin - uop_tr.kind = lsu_transaction::STORE; - end else begin - `uvm_fatal("TB_ISSUE", "Decode inst_tr which is not load/store in lsu_driver.") - end - uop_tr.uop_pc = inst_tr.pc; - uop_tr.uop_index = uops_cnt-1; + `uvm_info("LSU_DRV","Start gen uops",UVM_HIGH) + if(data_emul < 1) begin + seg_switch_gap = data_emul * `VLENB; + end else begin + seg_switch_gap = `VLENB; + end - uop_tr.is_last_uop = (uops_cnt == uops_num) ? 1: 0; - uop_tr.is_indexed = (inst_tr.lsu_mop inside {LSU_UI, LSU_OI}) ? 1 : 0; - uop_tr.total_uops_num = uops_num; - uop_tr.base_addr = addr_base; - uop_tr.vstart = uop_vstart; + for(int seg_idx=0; seg_idx<seg_num; seg_idx++) begin + elm_idx_head[seg_idx] = 0; + elm_idx_tail[seg_idx] = elm_per_uop; + data_byte_idx[seg_idx] = 0; + vidx_byte_idx[seg_idx] = 0; + uop_vstart[seg_idx] = vstart; + end + seg_idx = 0; + for(int uops_idx=0; uops_idx<uops_num; uops_idx++) begin + `uvm_info("LSU_DRV","Gen new uop",UVM_HIGH) + uop_tr = new(); + `uvm_info("LSU_DRV", $sformatf("seg_idx = %0d\n", seg_idx ), UVM_HIGH) + `uvm_info("LSU_DRV", $sformatf("seg_switch_gap = %0d\n", seg_switch_gap), UVM_HIGH) + `uvm_info("LSU_DRV", $sformatf("elm_idx_head[%0d] = %0d\n", seg_idx, elm_idx_head[seg_idx]), UVM_HIGH) + `uvm_info("LSU_DRV", $sformatf("elm_idx_tail[%0d] = %0d\n", seg_idx, elm_idx_tail[seg_idx]), UVM_HIGH) - uop_tr.vm = inst_tr.vm; - uop_tr.lsu_slot_strobe = '0; - - uop_tr.data_vreg_valid = 1; - uop_tr.data_vreg_idx = data_vreg_idx_base + elm_idx * (data_eew/8) / `VLENB + seg_idx * int'($ceil(data_emul)); - uop_tr.data_vreg_eew = data_eew; - uop_tr.data_vreg_byte_start = data_byte_idx % `VLENB; + lsu_uop_gen_delay(uop_tr); + lsu_uop_gen_trap(uop_tr); - uop_tr.vidx_vreg_valid = (inst_tr.lsu_mop inside {LSU_UI, LSU_OI}) ? 1 : 0; - uop_tr.vidx_vreg_idx = vidx_vreg_idx_base + elm_idx * (vidx_eew/8) / `VLENB; - uop_tr.vidx_vreg_eew = vidx_eew; - uop_tr.vidx_vreg_byte_start = vidx_byte_idx % `VLENB; + uop_tr.inst_string = inst_tr.asm_string; + if(inst_tr.inst_type == LD) begin + uop_tr.kind = lsu_transaction::LOAD; + end else if(inst_tr.inst_type == ST) begin + uop_tr.kind = lsu_transaction::STORE; + end else begin + `uvm_fatal("TB_ISSUE", "Decode inst_tr which is not load/store in lsu_driver.") + end + uop_tr.uop_pc = inst_tr.pc; + uop_tr.uop_index = uops_idx; + + uop_tr.is_last_uop = (uops_idx == uops_num-1) ? 1: 0; + uop_tr.is_indexed = (inst_tr.lsu_mop inside {LSU_UI, LSU_OI}) ? 1 : 0; + uop_tr.total_uops_num = uops_num; + uop_tr.base_addr = addr_base; + uop_tr.vstart = uop_vstart[seg_idx]; + + uop_tr.vm = inst_tr.vm; + uop_tr.lsu_slot_strobe = '0; + + uop_tr.data_vreg_valid = 1; + uop_tr.data_vreg_idx = data_vreg_idx_base + elm_idx_head[seg_idx] * (data_eew/8) / `VLENB + seg_idx * int'($ceil(data_emul)); + uop_tr.data_vreg_eew = data_eew; + uop_tr.data_vreg_byte_head = data_byte_idx[seg_idx]; + + uop_tr.vidx_vreg_valid = (inst_tr.lsu_mop inside {LSU_UI, LSU_OI}) ? 1 : 0; + uop_tr.vidx_vreg_idx = vidx_vreg_idx_base + elm_idx_head[seg_idx] * (vidx_eew/8) / `VLENB; + uop_tr.vidx_vreg_eew = vidx_eew; + uop_tr.vidx_vreg_byte_head = vidx_byte_idx[seg_idx]; + + for(int elm_idx=elm_idx_head[seg_idx]; elm_idx<elm_idx_tail[seg_idx]; elm_idx++) begin + if(elm_idx == vstart) begin + uop_tr.data_vreg_byte_head = vstart * data_eew / 8 %`VLENB; + uop_tr.vidx_vreg_byte_head = vstart * vidx_eew / 8 %`VLENB; end - if(elm_idx >= vstart && elm_idx < evl) begin for(int byte_idx=0; byte_idx<data_eew/8; byte_idx++) begin addr = addr_base + const_stride * elm_idx + data_eew / 8 * seg_idx + byte_idx; - temp_idx = data_byte_idx % `VLENB; - uop_tr.lsu_slot_addr[temp_idx] = addr; - uop_tr.lsu_slot_strobe[temp_idx] = 1'b1; - data_byte_idx++; - // `uvm_info("LSU_DRV",$sformatf("addr=%0x, data_byte_idx = %0d, temp_idx=%0d", addr, data_byte_idx, temp_idx),UVM_HIGH) - // uop_tr.print(); + uop_tr.lsu_slot_addr[data_byte_idx[seg_idx]] = addr; + uop_tr.lsu_slot_strobe[data_byte_idx[seg_idx]] = 1'b1; + data_byte_idx[seg_idx]++; end - vidx_byte_idx += vidx_eew/8; + vidx_byte_idx[seg_idx] += vidx_eew/8; + end else begin + data_byte_idx[seg_idx] += data_eew/8; + vidx_byte_idx[seg_idx] += vidx_eew/8; end if(elm_idx >= vstart) begin - uop_vstart++; + uop_vstart[seg_idx]++; end + end // elm-loop - if(elm_idx * eew_max % `VLEN == `VLEN - eew_max) begin - if(elm_idx >= vstart && elm_idx < evl) begin - uop_tr.data_vreg_byte_end = (data_byte_idx-1) % `VLENB; - uop_tr.vidx_vreg_byte_end = (vidx_byte_idx-1) % `VLENB; - end else begin - uop_tr.data_vreg_byte_end = (data_byte_idx) % `VLENB; - uop_tr.vidx_vreg_byte_end = (vidx_byte_idx) % `VLENB; - end - if(inst_tr.lsu_mop inside {LSU_US, LSU_CS} && inst_tr.vm == 1) begin - uop_tr.lsu_slot_addr_valid = 1; - end - `uvm_info("LSU_DRV",$sformatf("Decode uop_tr to uops_rx_queque:\n%s",uop_tr.sprint()),UVM_HIGH) - uops_rx_queue.push_back(uop_tr); + uop_tr.data_vreg_byte_tail = data_byte_idx[seg_idx]; + uop_tr.vidx_vreg_byte_tail = vidx_byte_idx[seg_idx]; + + if(inst_tr.lsu_mop inside {LSU_US, LSU_CS} && inst_tr.vm == 1) begin + uop_tr.lsu_slot_addr_valid = 1; + end + `uvm_info("LSU_DRV",$sformatf("Decode uop_tr to uops_rx_queque:\n%s",uop_tr.sprint()),UVM_HIGH) + uops_rx_queue.push_back(uop_tr); + + `uvm_info("LSU_DRV", $sformatf("data_byte_idx[%0d] = %0d\n", seg_idx, data_byte_idx[seg_idx]), UVM_HIGH) + + switch_seg = data_byte_idx[seg_idx] >= seg_switch_gap; + + data_byte_idx[seg_idx] = (data_byte_idx[seg_idx] % `VLENB); + vidx_byte_idx[seg_idx] = (vidx_byte_idx[seg_idx] % `VLENB); + + elm_idx_head[seg_idx] += elm_per_uop; + elm_idx_tail[seg_idx] += elm_per_uop; + + if(switch_seg) begin + if(seg_idx == seg_num-1) begin + seg_idx = 0; + end else begin + seg_idx += 1; end end - end + end // uop-loop `uvm_info("LSU_DRV","Decode done",UVM_HIGH) endfunction: lsu_uop_decode +function void lsu_driver::lsu_uop_gen_delay(ref lsu_transaction uop_tr); // Gen delay + case(delay_mode_rvv2lsu) + delay_mode_pkg::SLOW: begin + uop_tr.c_rvv2lsu_delay.constraint_mode(0); + assert(uop_tr.randomize(rvv2lsu_delay) with { + rvv2lsu_delay dist { + [1:50] :/ 20, + [50:100] :/ 80 + }; + }); + end + delay_mode_pkg::NORMAL: begin + assert(uop_tr.randomize(rvv2lsu_delay) with { + rvv2lsu_delay dist { + [0:10] :/ 50, + [10:20] :/ 30, + [20:50] :/ 20 + }; + }); + end + delay_mode_pkg::FAST: begin + assert(uop_tr.randomize(rvv2lsu_delay) with { + rvv2lsu_delay dist { + 0 := 80, + [1:5] :/ 15, + [5:20] :/ 5 + }; + }); + end + endcase + case(delay_mode_lsu2rvv) + delay_mode_pkg::SLOW: begin + uop_tr.c_lsu2rvv_delay.constraint_mode(0); + assert(uop_tr.randomize(lsu2rvv_delay) with { + lsu2rvv_delay dist { + [1:50] :/ 20, + [50:100] :/ 80 + }; + }); + end + delay_mode_pkg::NORMAL: begin + assert(uop_tr.randomize(lsu2rvv_delay) with { + lsu2rvv_delay dist { + [0:10] :/ 50, + [10:20] :/ 30, + [20:50] :/ 20 + }; + }); + end + delay_mode_pkg::FAST: begin + assert(uop_tr.randomize(lsu2rvv_delay) with { + lsu2rvv_delay dist { + 0 := 80, + [1:5] :/ 15, + [5:20] :/ 5 + }; + }); + end + endcase +endfunction: lsu_uop_gen_delay + +function void lsu_driver::lsu_uop_gen_trap(ref lsu_transaction uop_tr); + // Gen trap + if(trap_en) begin + if(always_trap) begin + assert(uop_tr.randomize(trap_occured) with { + trap_occured == 1; + }); + end else begin + assert(uop_tr.randomize(trap_occured) with { + trap_occured dist { + // 0 := 99, + 0 := 9, + 1 := 1 + }; + }); + end + end else begin + assert(uop_tr.randomize(trap_occured) with { + trap_occured == 0; + }); + end +endfunction: lsu_uop_gen_trap + function void lsu_driver::final_phase(uvm_phase phase); super.final_phase(phase); if(inst_queue.size()>0) begin
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_transaction.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_transaction.sv index 783c12f..5e44515 100644 --- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_transaction.sv +++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/lsu_agent_lsu_transaction.sv
@@ -25,15 +25,15 @@ bit data_vreg_valid; int data_vreg_idx; eew_e data_vreg_eew; - int data_vreg_byte_start; - int data_vreg_byte_end; + int data_vreg_byte_head; + int data_vreg_byte_tail; // vs2 bit vidx_vreg_valid; int vidx_vreg_idx; eew_e vidx_vreg_eew; - int vidx_vreg_byte_start; - int vidx_vreg_byte_end; + int vidx_vreg_byte_head; + int vidx_vreg_byte_tail; /* info about load/store address/data */ bit lsu_slot_addr_valid; @@ -82,15 +82,15 @@ `uvm_field_int(data_vreg_idx,UVM_ALL_ON) `uvm_field_enum(eew_e,data_vreg_eew,UVM_ALL_ON) - `uvm_field_int(data_vreg_byte_start,UVM_ALL_ON) - `uvm_field_int(data_vreg_byte_end ,UVM_ALL_ON) + `uvm_field_int(data_vreg_byte_head,UVM_ALL_ON) + `uvm_field_int(data_vreg_byte_tail,UVM_ALL_ON) `uvm_field_int(data_vreg_valid,UVM_ALL_ON) if(is_indexed) begin `uvm_field_int(vidx_vreg_idx ,UVM_ALL_ON) `uvm_field_enum(eew_e, vidx_vreg_eew,UVM_ALL_ON) - `uvm_field_int(vidx_vreg_byte_start,UVM_ALL_ON) - `uvm_field_int(vidx_vreg_byte_end ,UVM_ALL_ON) + `uvm_field_int(vidx_vreg_byte_head,UVM_ALL_ON) + `uvm_field_int(vidx_vreg_byte_tail,UVM_ALL_ON) `uvm_field_int(vidx_vreg_valid,UVM_ALL_ON) end `uvm_field_int(lsu_slot_addr_valid, UVM_ALL_ON) @@ -125,14 +125,14 @@ data_vreg_valid = 0; data_vreg_idx = 0; data_vreg_eew = EEW_NONE; - data_vreg_byte_start = 0; - data_vreg_byte_end = 0; + data_vreg_byte_head = 0; + data_vreg_byte_tail = 0; vidx_vreg_valid = 0; vidx_vreg_idx = 0; vidx_vreg_eew = EEW_NONE; - vidx_vreg_byte_start = 0; - vidx_vreg_byte_end = 0; + vidx_vreg_byte_head = 0; + vidx_vreg_byte_tail = 0; lsu_slot_addr_valid = 1'b0;
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_monitor.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_monitor.sv index 71627c3..91c006e 100644 --- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_monitor.sv +++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_monitor.sv
@@ -245,23 +245,32 @@ // VRF if(rvs_if.rt_vrf_valid_rob2rt[rt_idx]) begin + int pos = 0; vrf_overlap = 0; rt_vrf_byte_strobe = rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_strobe; for(int i=0; i<`VLENB; i++) begin rt_vrf_bit_strobe[i*8 +: 8] = {8{rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_strobe[i]}}; end foreach(tr.rt_vrf_index[i]) begin + // merge same vrf if(tr.rt_vrf_index[i] == rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_index) begin tr.rt_vrf_strobe[i] |= rt_vrf_byte_strobe; tr.rt_vrf_data[i] = rt_vrf_bit_strobe & rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_data | ~rt_vrf_bit_strobe & tr.rt_vrf_data[i]; vrf_overlap = 1; `uvm_info(get_type_name(), $sformatf("Uops %0d also write vrf[%0d].", rt_idx, rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_index), UVM_HIGH) end + // sort vrf + if(tr.rt_vrf_index[i] > rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_index) begin + pos = i; + break; + end else begin + pos = i+1; + end end if(!vrf_overlap) begin - tr.rt_vrf_index.push_back(rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_index); - tr.rt_vrf_strobe.push_back(rt_vrf_byte_strobe); - tr.rt_vrf_data.push_back(rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_data); + tr.rt_vrf_index.insert(pos, rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_index); + tr.rt_vrf_strobe.insert(pos, rt_vrf_byte_strobe); + tr.rt_vrf_data.insert(pos, rvs_if.rt_vrf_data_rob2rt[rt_idx].rt_data); end end
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_transaction.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_transaction.sv index 7286bb1..49a77a1 100644 --- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_transaction.sv +++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvs_agent_rvs_transaction.sv
@@ -2728,7 +2728,7 @@ if(this.lsu_nf == NF1) begin inst = $sformatf("%se%0d", inst, lsu_eew); end else begin - inst = $sformatf("%s%0de%0d", inst, lsu_nf+1, lsu_eew); + inst = $sformatf("%sseg%0de%0d", inst, lsu_nf+1, lsu_eew); end end endcase @@ -2737,7 +2737,7 @@ if(this.lsu_nf == NF1) begin inst = $sformatf("%se%0d", inst, lsu_eew); end else begin - inst = $sformatf("%s%0de%0d", inst, lsu_nf+1, lsu_eew); + inst = $sformatf("%sseg%0de%0d", inst, lsu_nf+1, lsu_eew); end end LSU_UI, @@ -2745,7 +2745,7 @@ if(this.lsu_nf == NF1) begin inst = $sformatf("%sei%0d", inst, lsu_eew); end else begin - inst = $sformatf("%s%0dei%0d", inst, lsu_nf+1, lsu_eew); + inst = $sformatf("%sseg%0dei%0d", inst, lsu_nf+1, lsu_eew); end end endcase
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_behavior_model.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_behavior_model.sv index 4337683..5c87f1a 100644 --- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_behavior_model.sv +++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_behavior_model.sv
@@ -413,6 +413,7 @@ end `uvm_info("MDL",$sformatf("Prepare done!\nelm_idx_max=%0d\ndest_eew=%0d\nsrc2_eew=%0d\nsrc1_eew=%0d\ndest_emul=%2.4f\nsrc2_emul=%2.4f\nsrc1_emul=%2.4f\n",elm_idx_max,dest_eew,src2_eew,src1_eew,dest_emul,src2_emul,src1_emul),UVM_LOW) + `uvm_info("MDL",$sformatf("pc = 0x%8x, v0 = 0x%16x\n", inst_tr.pc, this.vrf[0]),UVM_LOW) // 2.2 Check VRF index dest_reg_idx_base = inst_tr.dest_idx_base; @@ -1773,6 +1774,7 @@ //------------------------------------------------------------------------------ class lsu_processor extends uvm_component; + parameter int MAX_SEG = 8; `uvm_component_utils(lsu_processor) int dest_eew; real dest_emul; @@ -1781,6 +1783,9 @@ int src1_eew; real src1_emul; int src0_eew; real src0_emul; + int data_eew; real data_emul; + int vidx_eew; real vidx_emul; + vrf_t [31:0] vrf_temp; int dest_reg_idx_base = 0; @@ -1800,11 +1805,16 @@ int address; int elm_idx_max; + int seg_idx; int seg_num; int seg_size; // byte size int data_size; // byte size int vidx_size; // byte size + int elm_idx_head[MAX_SEG]; // elm pointer + int elm_idx_tail[MAX_SEG]; // elm pointer + int data_byte_idx[MAX_SEG]; + int vidx_byte_idx[MAX_SEG]; int uops_num; int elm_per_uop; @@ -1815,12 +1825,28 @@ endfunction: new function void exe(rvv_behavior_model rvm, ref rvs_transaction inst_tr); - int uops_cnt = 0; + int seg_switch_gap = 0; + int switch_seg = 0; + decode(inst_tr); `uvm_info("MDL/LSU", "LSU decode done", UVM_HIGH) `uvm_info("MDL/LSU", $sformatf("\n%s", inst_tr.sprint()), UVM_HIGH); + if(data_emul < 1) begin + seg_switch_gap = data_emul * `VLENB; + end else begin + seg_switch_gap = `VLENB; + end + for(int seg_idx=0; seg_idx<seg_num; seg_idx++) begin + elm_idx_head[seg_idx] = 0; + elm_idx_tail[seg_idx] = elm_per_uop; + data_byte_idx[seg_idx] = 0; + vidx_byte_idx[seg_idx] = 0; + end + + seg_idx = 0; + for(int uops_idx=0; uops_idx<uops_num; uops_idx++) begin dest_reg_idx_base = (inst_tr.dest_type == VRF) ? (inst_tr.dest_idx + seg_idx * int'($ceil(dest_emul))) : (inst_tr.dest_idx); src3_reg_idx_base = (inst_tr.src3_type == VRF) ? (inst_tr.src3_idx + seg_idx * int'($ceil(src3_emul))) : (inst_tr.src3_idx); src2_reg_idx_base = (inst_tr.src2_idx); @@ -1829,7 +1855,13 @@ `uvm_info("MDL/LSU", $sformatf("seg_idx=%0d: dest_reg_idx_base=%0d, src3_reg_idx_base=%0d, src2_reg_idx_base=%0d, src1_reg_idx_base=%0d", seg_idx, dest_reg_idx_base, src3_reg_idx_base, src2_reg_idx_base, src1_reg_idx_base), UVM_HIGH) `uvm_info("MDL/LSU", $sformatf("vreg[0]=0x%16h", rvm.vrf[0]), UVM_HIGH) - for(int elm_idx=0; elm_idx<elm_idx_max; elm_idx++) begin + + `uvm_info("MDL/LSU", $sformatf("seg_idx = %0d\n", seg_idx ), UVM_HIGH) + `uvm_info("MDL/LSU", $sformatf("seg_switch_gap = %0d\n", seg_switch_gap), UVM_HIGH) + `uvm_info("MDL/LSU", $sformatf("elm_idx_head[%0d] = %0d\n", seg_idx, elm_idx_head[seg_idx]), UVM_HIGH) + `uvm_info("MDL/LSU", $sformatf("elm_idx_tail[%0d] = %0d\n", seg_idx, elm_idx_tail[seg_idx]), UVM_HIGH) + + for(int elm_idx=elm_idx_head[seg_idx]; elm_idx<elm_idx_tail[seg_idx]; elm_idx++) begin // fetch dest = rvm.elm_fetch(inst_tr.dest_type, dest_reg_idx_base, elm_idx, dest_eew); src3 = rvm.elm_fetch(inst_tr.src3_type, src3_reg_idx_base, elm_idx, src3_eew); @@ -1841,7 +1873,8 @@ `uvm_info("MDL/LSU", $sformatf("dest=0x%8x, src3=0x%8x, src2=0x%8x, src1=0x%8x, src0=0x%8x", dest, src3, src2, src1, src0), UVM_HIGH); update_addr(inst_tr, seg_idx, seg_size, elm_idx, data_size, src2, src1); - if(rvm.trap_occured && uops_cnt<rvm.trap_occured_uop || !rvm.trap_occured) begin + + if(rvm.trap_occured && uops_idx<rvm.trap_occured_uop || !rvm.trap_occured) begin if(elm_idx<vstart) begin // pre-start case(inst_tr.inst_type) @@ -1884,10 +1917,28 @@ else rvm.vstart = rvm.trap_queue[0].vstart; end - if(elm_idx%elm_per_uop == elm_per_uop-1) uops_cnt++; + + data_byte_idx[seg_idx] += data_eew/8; + vidx_byte_idx[seg_idx] += vidx_eew/8; `uvm_info("MDL/LSU", "\n---------------------------------------------------------------------------------------------------------------------------------\n", UVM_HIGH) + end // elm-loop + + switch_seg = data_byte_idx[seg_idx] >= seg_switch_gap; + + data_byte_idx[seg_idx] = (data_byte_idx[seg_idx] % `VLENB); + vidx_byte_idx[seg_idx] = (vidx_byte_idx[seg_idx] % `VLENB); + + elm_idx_head[seg_idx] += elm_per_uop; + elm_idx_tail[seg_idx] += elm_per_uop; + + if(switch_seg) begin + if(seg_idx == seg_num-1) begin + seg_idx = 0; + end else begin + seg_idx += 1; + end end - end // seg-loop + end // uops-loop endfunction function bit decode(ref rvs_transaction inst_tr); @@ -1927,14 +1978,22 @@ case(inst_tr.inst_type) LD: begin - seg_size = (seg_num) * dest_eew / 8; + seg_size = (seg_num) * dest_eew / 8; data_size = dest_eew / 8; vidx_size = src2_eew / 8; + data_eew = dest_eew; + vidx_eew = src2_eew; + data_emul = dest_emul; + vidx_emul = src2_emul; end ST: begin seg_size = (seg_num) * src3_eew / 8; data_size = src3_eew / 8; vidx_size = src2_eew / 8; + data_eew = src3_eew; + vidx_eew = src2_eew; + data_emul = src3_emul; + vidx_emul = src2_emul; end endcase return 0;
diff --git a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_scoreboard.sv b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_scoreboard.sv index fc83666..ea0e6a3 100644 --- a/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_scoreboard.sv +++ b/hdl/verilog/rvv/sve/rvv_backend_tb/src/rvv_scoreboard.sv
@@ -362,6 +362,10 @@ lsu_tr = mem_queue_lsu.pop_front(); mdl_tr = mem_queue_mdl.pop_front(); `uvm_info("MEM_RECORDER", $sformatf("\nMEM check start. ====================================================================================================\n"),UVM_HIGH) + `uvm_info("MEM_RECORDER", "lsu memory tr:", UVM_HIGH) + `uvm_info("MEM_RECORDER", lsu_tr.sprint(), UVM_HIGH) + `uvm_info("MEM_RECORDER", "mdl memory tr:", UVM_HIGH) + `uvm_info("MEM_RECORDER", mdl_tr.sprint(), UVM_HIGH) if(lsu_tr.kind != mdl_tr.kind) begin `uvm_error("MEM_CHCKER", $sformatf("Memory access kind mismatch: lsu = %s, mdl = %s", lsu_tr.kind.name(), mdl_tr.kind.name())) err++;