Add RVV to tracing - Plumb the output from rvvCore's writeback into the RetirementBuffer. - If rvv is enabled, RetirementBuffer is expanded to handle the full VLEN. - Pass along the vector data to rvviTrace, as well. Change-Id: I418a9963e336ef9b916fc268371cc8339d0a88a8
diff --git a/hdl/chisel/src/kelvin/Interfaces.scala b/hdl/chisel/src/kelvin/Interfaces.scala index f6c9a09..9b6e1ba 100644 --- a/hdl/chisel/src/kelvin/Interfaces.scala +++ b/hdl/chisel/src/kelvin/Interfaces.scala
@@ -176,7 +176,7 @@ val pc = UInt(32.W) val inst = UInt(32.W) val idx = UInt(p.retirementBufferIdxWidth.W) - val data = UInt(32.W) + val data = if (p.enableRvv) UInt(p.rvvVlen.W) else UInt(32.W) })) } @@ -237,6 +237,11 @@ val data = Input(UInt(32.W)) } +class VectorWriteDataIO(p: Parameters) extends Bundle { + val addr = Input(UInt(5.W)) + val data = Input(UInt(p.lsuDataBits.W)) +} + class FabricIO(p: Parameters) extends Bundle { val readDataAddr = Output(Valid(UInt(p.axi2AddrBits.W))) val readData = Input(Valid(UInt(p.axi2DataBits.W)))
diff --git a/hdl/chisel/src/kelvin/Parameters.scala b/hdl/chisel/src/kelvin/Parameters.scala index 10f3b56..cb03f20 100644 --- a/hdl/chisel/src/kelvin/Parameters.scala +++ b/hdl/chisel/src/kelvin/Parameters.scala
@@ -105,11 +105,14 @@ val floatPulpDivsqrt = 0 // Retirement buffer + val floatRegfileBaseAddr = 32 + val rvvRegfileBaseAddr = 64 + val rvvRegCount = 32 val retirementBufferSize = 8 def retirementBufferIdxWidth: Int = { val scalarRegCount = 32 val floatRegCount = (if (enableFloat) { 32 } else { 0 }) - log2Ceil(scalarRegCount + floatRegCount + 1) + log2Ceil(scalarRegCount + floatRegCount + rvvRegCount + 1) } // L0ICache Fetch unit.
diff --git a/hdl/chisel/src/kelvin/RetirementBuffer.scala b/hdl/chisel/src/kelvin/RetirementBuffer.scala index 11cd32a..851e7f7 100644 --- a/hdl/chisel/src/kelvin/RetirementBuffer.scala +++ b/hdl/chisel/src/kelvin/RetirementBuffer.scala
@@ -25,12 +25,16 @@ val writeDataScalar = Input(Vec(p.instructionLanes + 2, Valid(new RegfileWriteDataIO))) val writeAddrFloat = Option.when(p.enableFloat)(Input(new RegfileWriteAddrIO)) val writeDataFloat = Option.when(p.enableFloat)(Input(Vec(2, Valid(new RegfileWriteDataIO)))) + val writeAddrVector = Option.when(p.enableRvv)(Input(Vec(p.instructionLanes, new RegfileWriteAddrIO))) + val writeDataVector = Option.when(p.enableRvv)(Input(Vec(p.instructionLanes, Valid(new VectorWriteDataIO(p))))) val nSpace = Output(UInt(32.W)) val debug = Output(new RetirementBufferDebugIO(p)) }) + if (p.enableRvv) { + dontTouch(io.writeAddrVector.get) + } val idxWidth = p.retirementBufferIdxWidth - val floatRegisterBase = 32.U val noWriteRegIdx = ~0.U(idxWidth.W) class Instruction extends Bundle { val addr = UInt(32.W) // Memory address @@ -58,12 +62,16 @@ val scalarValid = io.writeAddrScalar(i).valid val scalarAddr = io.writeAddrScalar(i).addr + val vectorValid = io.writeAddrVector.map(x => x(i).valid).getOrElse(false.B) + val vectorAddr = io.writeAddrVector.map(x => x(i).addr).getOrElse(0.U) + val instr = Wire(new Instruction) instr.addr := io.inst(i).bits.addr instr.inst := io.inst(i).bits.inst instr.idx := MuxCase(noWriteRegIdx, Seq( - floatValid -> (floatAddr +& floatRegisterBase), + floatValid -> (floatAddr +& p.floatRegfileBaseAddr.U), (scalarValid && scalarAddr =/= 0.U) -> scalarAddr, + (vectorValid && vectorAddr =/= 0.U) -> (vectorAddr +& p.rvvRegfileBaseAddr.U), )) instr } @@ -84,10 +92,11 @@ // Maintain a re-order buffer of instruction completion result. // The order and alignment of these buffers should correspond to the // output of `instBuffer`. - val resultBuffer = RegInit(VecInit(Seq.fill(bufferSize)(MakeInvalid(UInt(32.W))))) + val dataWidth = if (p.enableRvv) p.lsuDataBits else 32 + val resultBuffer = RegInit(VecInit(Seq.fill(bufferSize)(MakeInvalid(UInt(dataWidth.W))))) // Compute update based on register writeback. // Note: The shift when committing instructions will be handled in a later block. - val resultUpdate = Wire(Vec(bufferSize, Valid(UInt(32.W)))) + val resultUpdate = Wire(Vec(bufferSize, Valid(UInt(dataWidth.W)))) for (i <- 0 until bufferSize) { val bufferEntry = instBuffer.io.dataOut(i) @@ -95,7 +104,10 @@ val scalarWriteIdxMap = io.writeDataScalar.map( x => x.valid && (x.bits.addr === bufferEntry.idx)) val floatWriteIdxMap = io.writeDataFloat.map(y => y.map( - x => x.valid && ((x.bits.addr +& floatRegisterBase) === + x => x.valid && ((x.bits.addr +& p.floatRegfileBaseAddr.U) === + bufferEntry.idx))).getOrElse(Seq(false.B)) + val vectorWriteIdxMap = io.writeDataVector.map(y => y.map( + x => x.valid && ((x.bits.addr +& p.rvvRegfileBaseAddr.U) === bufferEntry.idx))).getOrElse(Seq(false.B)) // Check if this entry is an operation that doesn't require a register write (e.g., a store). val nonWritingInstr = bufferEntry.idx === noWriteRegIdx @@ -103,19 +115,24 @@ val validBufferEntry = (i.U < instBuffer.io.nEnqueued) && (!resultBuffer(i).valid) // If the entry is active and its data dependency is met (or it has no dependency)... - val updated = (validBufferEntry && (scalarWriteIdxMap.reduce(_|_) || floatWriteIdxMap.reduce(_|_) || nonWritingInstr)) + val updated = (validBufferEntry && (scalarWriteIdxMap.reduce(_|_) || floatWriteIdxMap.reduce(_|_) || vectorWriteIdxMap.reduce(_|_) || nonWritingInstr)) // Find the index of the first write port that provides the needed data. val scalarWriteIdx = PriorityEncoder(scalarWriteIdxMap) val floatWriteIdx = PriorityEncoder(floatWriteIdxMap) + val vectorWriteIdx = PriorityEncoder(vectorWriteIdxMap) // Select the actual data from the winning write port. val writeDataScalar = io.writeDataScalar(scalarWriteIdx).bits.data val writeDataFloat = io.writeDataFloat.map(x => x(floatWriteIdx).bits.data).getOrElse(0.U) + val writeDataVector = io.writeDataVector.map(x => x(vectorWriteIdx).bits.data).getOrElse(0.U) // If updated, mark this buffer entry as complete for the next cycle. resultUpdate(i).valid := Mux(updated, true.B, resultBuffer(i).valid) // true.B // Select the correct write-back data to store, if updated (FP has priority). + val sdata = if (p.enableRvv) Cat(0.U((p.lsuDataBits - 32).W), writeDataScalar) else writeDataScalar + val fdata = if (p.enableRvv) Cat(0.U((p.lsuDataBits - 32).W), writeDataFloat) else writeDataFloat resultUpdate(i).bits := Mux(updated, MuxCase(0.U, Seq( - floatWriteIdxMap.reduce(_|_) -> writeDataFloat, - scalarWriteIdxMap.reduce(_|_) -> writeDataScalar, + floatWriteIdxMap.reduce(_|_) -> fdata, + vectorWriteIdxMap.reduce(_|_) -> writeDataVector, + scalarWriteIdxMap.reduce(_|_) -> sdata, )), resultBuffer(i).bits) }
diff --git a/hdl/chisel/src/kelvin/RvviTrace.scala b/hdl/chisel/src/kelvin/RvviTrace.scala index fb72ee3..44b8d69 100644 --- a/hdl/chisel/src/kelvin/RvviTrace.scala +++ b/hdl/chisel/src/kelvin/RvviTrace.scala
@@ -197,15 +197,13 @@ x_wdata(i)(j) := MuxOR(x_wb_valid, wdata) x_wb(i)(j) := x_wb_valid - val f_wb_valid = valid && (wb_idx === j.U + (32.U)) + val f_wb_valid = valid && (wb_idx === j.U + p.floatRegfileBaseAddr.U) f_wdata(i)(j) := MuxOR(f_wb_valid, wdata) f_wb(i)(j) := f_wb_valid - /////////////////////////////////// - // TODO(atv): This is just generally not tracked. - /////////////////////////////////// - v_wdata(i)(j) := 0.U.asTypeOf(rvviTraceBlackBox.io.v_wdata_i(i)(j)) - v_wb(i)(j) := false.B + val v_wb_valid = valid && (wb_idx === j.U + p.rvvRegfileBaseAddr.U) + v_wdata(i)(j) := MuxOR(v_wb_valid, wdata) + v_wb(i)(j) := v_wb_valid } for (j <- 0 until 4096) {
diff --git a/hdl/chisel/src/kelvin/rvv/RvvCore.scala b/hdl/chisel/src/kelvin/rvv/RvvCore.scala index 24ded3a..f9529df 100644 --- a/hdl/chisel/src/kelvin/rvv/RvvCore.scala +++ b/hdl/chisel/src/kelvin/rvv/RvvCore.scala
@@ -40,6 +40,7 @@ | input [31:0] inst_GENI_bits_pc, | input [1:0] inst_GENI_bits_opcode, | input [24:0] inst_GENI_bits_bits, + | input [31:0] inst_GENI_bits_vd, |""".stripMargin.replaceAll("GENI", i.toString) } @@ -117,12 +118,29 @@ | output logic [3:0] queue_capacity, |""".stripMargin.replaceAll("VSTART_LEN", (log2Ceil(vlen) - 1).toString) + // Add rd_rob2rt_o interface outputs + for (i <- 0 until instructionLanes) { + moduleInterface += """ + | output rd_rob2rt_o_GENI_w_valid, + | output [4:0] rd_rob2rt_o_GENI_w_index, + | output [127:0] rd_rob2rt_o_GENI_w_data, + | output rd_rob2rt_o_GENI_w_type, + | output [15:0] rd_rob2rt_o_GENI_vd_type, + | output rd_rob2rt_o_GENI_trap_flag, + | output rd_rob2rt_o_GENI_vector_csr_vl, + | output rd_rob2rt_o_GENI_vector_csr_vstart, + | output rd_rob2rt_o_GENI_vector_csr_ma, + | output rd_rob2rt_o_GENI_vector_csr_ta, + | output rd_rob2rt_o_GENI_vector_csr_xrm, + | output rd_rob2rt_o_GENI_vector_csr_sew, + | output rd_rob2rt_o_GENI_vector_csr_lmul, + | output [15:0] rd_rob2rt_o_GENI_vxsaturate,""".stripMargin.replaceAll("GENI", i.toString) + } // Remove last comma/linebreak - moduleInterface = moduleInterface.dropRight(2) + moduleInterface = moduleInterface.dropRight(1) moduleInterface += "\n);\n" - // Inst valid var coreInstantiation = " logic [GENN-1:0] inst_valid;\n".replaceAll( "GENN", instructionLanes.toString) for (i <- 0 until instructionLanes) { @@ -222,6 +240,8 @@ coreInstantiation += """ RVVConfigState config_state; |""".stripMargin + coreInstantiation += " ROB2RT_t [3:0] rd_rob2rt_o;\n" + coreInstantiation += """ RvvCore#(.N (GENN)) core( | .clk(clk), | .rstn(rstn), @@ -261,11 +281,28 @@ | .config_state_valid(configStateValid), | .config_state(config_state), | .rvv_idle(rvv_idle), - | .queue_capacity(queue_capacity) + | .queue_capacity(queue_capacity), + | .rd_rob2rt_o(rd_rob2rt_o) |""".stripMargin.replaceAll("GENN", instructionLanes.toString) coreInstantiation += " );\n" - // Connect temp outputs + for (i <- 0 until instructionLanes) { + coreInstantiation += """ assign rd_rob2rt_o_GENI_w_valid = rd_rob2rt_o[GENI].w_valid; + | assign rd_rob2rt_o_GENI_w_index = rd_rob2rt_o[GENI].w_index; + | assign rd_rob2rt_o_GENI_w_data = rd_rob2rt_o[GENI].w_data; + | assign rd_rob2rt_o_GENI_w_type = rd_rob2rt_o[GENI].w_type; + | assign rd_rob2rt_o_GENI_vd_type = rd_rob2rt_o[GENI].vd_type; + | assign rd_rob2rt_o_GENI_trap_flag = rd_rob2rt_o[GENI].trap_flag; + | assign rd_rob2rt_o_GENI_vector_csr_vl = rd_rob2rt_o[GENI].vector_csr.vl; + | assign rd_rob2rt_o_GENI_vector_csr_vstart = rd_rob2rt_o[GENI].vector_csr.vstart; + | assign rd_rob2rt_o_GENI_vector_csr_ma = rd_rob2rt_o[GENI].vector_csr.ma; + | assign rd_rob2rt_o_GENI_vector_csr_ta = rd_rob2rt_o[GENI].vector_csr.ta; + | assign rd_rob2rt_o_GENI_vector_csr_xrm = rd_rob2rt_o[GENI].vector_csr.xrm; + | assign rd_rob2rt_o_GENI_vector_csr_sew = rd_rob2rt_o[GENI].vector_csr.sew; + | assign rd_rob2rt_o_GENI_vector_csr_lmul = rd_rob2rt_o[GENI].vector_csr.lmul; + | assign rd_rob2rt_o_GENI_vxsaturate = rd_rob2rt_o[GENI].vxsaturate; + |""".stripMargin.replaceAll("GENI", i.toString) + } for (i <- 0 until instructionLanes) { coreInstantiation += " assign inst_GENI_ready = inst_ready[GENI];\n".replaceAll("GENI", i.toString) } @@ -307,6 +344,8 @@ val async_rd = Decoupled(new RegfileWriteDataIO) + val rd_rob2rt_o = Vec(4, new Rob2Rt(p)) + val vcsr_valid = Output(Bool()) val vcsr_vstart = Output(UInt(7.W)) val vcsr_xrm = Output(UInt(2.W)) @@ -329,6 +368,7 @@ val rvv_idle = Output(Bool()) val queue_capacity = Output(UInt(4.W)) }) + dontTouch(io.rd_rob2rt_o) // Resources must be sorted topologically by dependency DAG addResource("hdl/verilog/rvv/inc/rvv_backend_config.svh") @@ -407,6 +447,7 @@ rvvCoreWrapper.io.rs <> io.rs rvvCoreWrapper.io.rd <> io.rd rvvCoreWrapper.io.async_rd <> io.async_rd + rvvCoreWrapper.io.rd_rob2rt_o <> io.rd_rob2rt_o rvvCoreWrapper.io.vstart := Mux( io.csr.vstart_write.valid, io.csr.vstart_write.bits, vstart)
diff --git a/hdl/chisel/src/kelvin/rvv/RvvDecode.scala b/hdl/chisel/src/kelvin/rvv/RvvDecode.scala index 0ac2602..092cb1a 100644 --- a/hdl/chisel/src/kelvin/rvv/RvvDecode.scala +++ b/hdl/chisel/src/kelvin/rvv/RvvDecode.scala
@@ -36,6 +36,7 @@ val pc = UInt(32.W) val opcode = RvvCompressedOpcode() val bits = UInt(25.W) + val vd = UInt(5.W) def funct6(): UInt = { bits(24, 19) @@ -75,6 +76,13 @@ // TODO(derekjchow): Add all cases that write scalar rd. } + def writesVectorRegister(): Bool = { + // A vector instruction writes to a vector register if it's an ALU operation + // or a load operation. Store operations do not write to a vector register. + // vset* instructions write to a scalar register (rd), not a vector register. + opcode === RvvCompressedOpcode.RVVLOAD || (opcode === RvvCompressedOpcode.RVVALU && !isVset()) + } + override def toPrintable: Printable = { cf"[opcode=$opcode, bits=$bits%b]" } @@ -110,6 +118,7 @@ _.bits.opcode -> new_opcode.bits, _.bits.pc -> pc, _.bits.bits -> bits, + _.bits.vd -> bits(4, 0), ) } }
diff --git a/hdl/chisel/src/kelvin/rvv/RvvInterface.scala b/hdl/chisel/src/kelvin/rvv/RvvInterface.scala index 1f6e8e5..0b12ae5 100644 --- a/hdl/chisel/src/kelvin/rvv/RvvInterface.scala +++ b/hdl/chisel/src/kelvin/rvv/RvvInterface.scala
@@ -69,6 +69,21 @@ val rvv_idle = Output(Bool()) val queue_capacity = Output(UInt(4.W)) + + // ROB to RT stage writes. + val rd_rob2rt_o = Vec(4, new Rob2Rt(p)) +} + + +class Rob2Rt(p: Parameters) extends Bundle { + val w_valid = Bool() + val w_index = UInt(5.W) + val w_data = UInt(p.rvvVlen.W) + val w_type = Bool() // 0 for VRF, 1 for XRF + val vd_type = UInt(p.rvvVlenb.W) + val trap_flag = Bool() + val vector_csr = new RvvConfigState(p) + val vxsaturate = UInt(p.rvvVlenb.W) } class RvvCsrIO(p: Parameters) extends Bundle { @@ -78,4 +93,4 @@ val vstart_write = Input(Valid(UInt(log2Ceil(p.rvvVlen).W))) val vxrm_write = Input(Valid(UInt(2.W))) val vxsat_write = Input(Valid(Bool())) -} \ No newline at end of file +}
diff --git a/hdl/chisel/src/kelvin/scalar/Decode.scala b/hdl/chisel/src/kelvin/scalar/Decode.scala index c5f5f77..c2f1435 100644 --- a/hdl/chisel/src/kelvin/scalar/Decode.scala +++ b/hdl/chisel/src/kelvin/scalar/Decode.scala
@@ -279,6 +279,7 @@ val rdMark = Vec(p.instructionLanes, Flipped(new RegfileWriteAddrIO)) val busRead = Vec(p.instructionLanes, Flipped(new RegfileBusAddrIO)) val rdMark_flt = Option.when(p.enableFloat)(Flipped(new RegfileWriteAddrIO)) + val rvvRdMark = Option.when(p.enableRvv)(Vec(p.instructionLanes, Flipped(new RegfileWriteAddrIO))) // ALU interface. val alu = Vec(p.instructionLanes, Valid(new AluCmd)) @@ -753,6 +754,13 @@ io.rdMark_flt.get.addr := rdAddr(i) } + // Set RVV vector registers to write + if (p.enableRvv) { + val rvvRdMark_valid = io.rvv.get(i).fire && d.rvv.get.bits.writesVectorRegister() + io.rvvRdMark.get(i).valid := rvvRdMark_valid + io.rvvRdMark.get(i).addr := d.rvv.get.bits.vd + } + // Register file bus address port. // Pointer chasing bypass if immediate is zero. // Load/Store immediate selection keys off bit5, and RET off bit6.
diff --git a/hdl/chisel/src/kelvin/scalar/SCore.scala b/hdl/chisel/src/kelvin/scalar/SCore.scala index 86c6299..83f4c54 100644 --- a/hdl/chisel/src/kelvin/scalar/SCore.scala +++ b/hdl/chisel/src/kelvin/scalar/SCore.scala
@@ -73,6 +73,14 @@ retirement_buffer.get.io.writeDataScalar(i) := regfile.io.writeData(i) }) dispatch.io.retirement_buffer_nSpace.get := retirement_buffer.get.io.nSpace + if (p.enableRvv) { + retirement_buffer.get.io.writeAddrVector.get := dispatch.io.rvvRdMark.get + (0 until p.instructionLanes).foreach(i => { + retirement_buffer.get.io.writeDataVector.get(i).valid := io.rvvcore.get.rd_rob2rt_o(i).w_valid + retirement_buffer.get.io.writeDataVector.get(i).bits.addr := io.rvvcore.get.rd_rob2rt_o(i).w_index + retirement_buffer.get.io.writeDataVector.get(i).bits.data := io.rvvcore.get.rd_rob2rt_o(i).w_data + }) + } } if (p.useDebugModule) {
diff --git a/hdl/verilog/rvv/design/RvvCore.sv b/hdl/verilog/rvv/design/RvvCore.sv index 883dcc7..f7f1642 100644 --- a/hdl/verilog/rvv/design/RvvCore.sv +++ b/hdl/verilog/rvv/design/RvvCore.sv
@@ -77,7 +77,10 @@ // Idle output logic rvv_idle, - output logic [$clog2(2*N + 1)-1:0] queue_capacity + output logic [$clog2(2*N + 1)-1:0] queue_capacity, + + // Writeback from reorder buffer + output ROB2RT_t [`NUM_RT_UOP-1:0] rd_rob2rt_o ); logic [N-1:0] frontend_cmd_valid; RVVCmd [N-1:0] frontend_cmd_data; @@ -185,6 +188,9 @@ trap_valid_rvs2rvv = 0; end + ROB2RT_t [`NUM_RT_UOP-1:0] rd_rob2rt; + assign rd_rob2rt_o = rd_rob2rt; + logic [`ISSUE_LANE-1:0] insts_ready_cq2rvs; rvv_backend backend( .clk(clk), @@ -213,7 +219,8 @@ .vcsr_valid(vcsr_valid), .vector_csr(vector_csr), .vcsr_ready(vcsr_ready), - .rvv_idle(rvv_idle) + .rvv_idle(rvv_idle), + .rd_rob2rt_o(rd_rob2rt) ); endmodule
diff --git a/hdl/verilog/rvv/design/rvv_backend.sv b/hdl/verilog/rvv/design/rvv_backend.sv index 090412b..a4598f2 100755 --- a/hdl/verilog/rvv/design/rvv_backend.sv +++ b/hdl/verilog/rvv/design/rvv_backend.sv
@@ -38,7 +38,9 @@ vector_csr, vcsr_ready, - rvv_idle + rvv_idle, + + rd_rob2rt_o ); // global signal input logic clk; @@ -81,6 +83,7 @@ // rvv_backend is not active.(IDLE) output logic rvv_idle; + output ROB2RT_t [`NUM_RT_UOP-1:0] rd_rob2rt_o; `ifdef TB_BRINGUP // inst queue @@ -993,6 +996,7 @@ // rvv_backend IDLE assign rvv_idle = fifo_empty_cq2de&uq_empty&rob_empty; + assign rd_rob2rt_o = rd_rob2rt; `endif // TB_BRINGUP
diff --git a/tests/systemc/instruction_trace.cc b/tests/systemc/instruction_trace.cc index 4521e0f..2dffa3b 100644 --- a/tests/systemc/instruction_trace.cc +++ b/tests/systemc/instruction_trace.cc
@@ -79,7 +79,11 @@ for (auto& in : retirement_buffer_) { if (in.completed) continue; if (valid && (addr == in.reg)) { - in.data = data; + in.data.resize(4); + in.data[0] = (data >> 24) & 0xff; + in.data[1] = (data >> 16) & 0xff; + in.data[2] = (data >> 8) & 0xff; + in.data[3] = data & 0xff; in.completed = true; break; } @@ -104,7 +108,9 @@ } } -void InstructionTrace::TraceInstructionRaw(uint32_t pc, uint32_t inst, uint32_t reg, uint32_t data) { +void InstructionTrace::TraceInstructionRaw(uint32_t pc, uint32_t inst, + uint32_t reg, + const std::vector<uint8_t>& data) { Instruction in(pc, inst, reg); in.data = data; committed_insts_.push_back(in); @@ -112,6 +118,10 @@ void InstructionTrace::PrintTrace() const { for (auto& inst : committed_insts_) { - printf("0x%08x,0x%08x,0x%02x,0x%08x\n", inst.pc, inst.inst, inst.reg, inst.data); + printf("0x%08x,0x%08x,0x%02x,0x", inst.pc, inst.inst, inst.reg); + for (auto d : inst.data) { + printf("%02x", d); + } + printf("\n"); } }
diff --git a/tests/systemc/instruction_trace.h b/tests/systemc/instruction_trace.h index 5912299..8699d8a 100644 --- a/tests/systemc/instruction_trace.h +++ b/tests/systemc/instruction_trace.h
@@ -33,7 +33,8 @@ const std::vector<uint32_t>& writeDataAddrs, const std::vector<uint32_t>& writeDataDatas, const std::vector<int>& executeRegBases); - void TraceInstructionRaw(uint32_t pc, uint32_t inst, uint32_t reg, uint32_t data); + void TraceInstructionRaw(uint32_t pc, uint32_t inst, uint32_t reg, + const std::vector<uint8_t>& data); void PrintTrace() const; static const int kScalarBaseReg = 0; @@ -49,13 +50,12 @@ pc(pc), inst(inst), reg(reg), - data(0), completed(false) {} uint32_t pc; uint32_t inst; uint32_t reg; - uint32_t data; + std::vector<uint8_t> data; bool completed; }; std::vector<Instruction> committed_insts_;
diff --git a/tests/verilator_sim/kelvin/core_mini_axi_tb.cc b/tests/verilator_sim/kelvin/core_mini_axi_tb.cc index 90862c3..4ab9b61 100644 --- a/tests/verilator_sim/kelvin/core_mini_axi_tb.cc +++ b/tests/verilator_sim/kelvin/core_mini_axi_tb.cc
@@ -397,13 +397,22 @@ void CoreMiniAxi_tb::TraceInstructions() { #if (KP_useRetirementBuffer == true) #define TRACE_INSTRUCTION(x) do { \ - uint32_t pc, inst, idx, data; \ + uint32_t pc, inst, idx; \ pc = debug_io_.rb_inst_##x##_bits_pc.read().get_word(0); \ inst = debug_io_.rb_inst_##x##_bits_inst.read().get_word(0); \ idx = debug_io_.rb_inst_##x##_bits_idx.read().get_word(0); \ - data = debug_io_.rb_inst_##x##_bits_data.read().get_word(0); \ if (debug_io_.rb_inst_##x##_valid.read()) { \ - tracer_.TraceInstructionRaw(pc, inst, idx, data); \ + auto data = debug_io_.rb_inst_##x##_bits_data.read(); \ + std::vector<uint8_t> data_vec(data.length() / 8); \ + int num_words = data.length() / 32; \ + for (int i = 0; i < num_words; ++i) { \ + uint32_t word = data.get_word((num_words - 1) - i); \ + data_vec[i*4+0] = (word >> 24) & 0xff; \ + data_vec[i*4+1] = (word >> 16) & 0xff; \ + data_vec[i*4+2] = (word >> 8) & 0xff; \ + data_vec[i*4+3] = word & 0xff; \ + } \ + tracer_.TraceInstructionRaw(pc, inst, idx, data_vec); \ } \ } while (0); REPEAT(TRACE_INSTRUCTION, KP_retirementBufferSize);
diff --git a/tests/verilator_sim/kelvin/core_mini_axi_tb.h b/tests/verilator_sim/kelvin/core_mini_axi_tb.h index 40ebfbf..70d1a79 100644 --- a/tests/verilator_sim/kelvin/core_mini_axi_tb.h +++ b/tests/verilator_sim/kelvin/core_mini_axi_tb.h
@@ -130,14 +130,20 @@ sc_signal<sc_bv<32>> float_writeData_1_bits_data; #endif #if (KP_useRetirementBuffer == true) +#if (KP_enableRvv == true) +#define RB_DEBUG_IO_DATA_WIDTH KP_rvvVlen +#else +#define RB_DEBUG_IO_DATA_WIDTH 32 +#endif #define RB_DEBUG_IO(x) \ sc_signal<bool> rb_inst_##x##_valid; \ sc_signal<sc_bv<32>> rb_inst_##x##_bits_pc; \ sc_signal<sc_bv<32>> rb_inst_##x##_bits_inst; \ sc_signal<sc_bv<KP_retirementBufferIdxWidth>> rb_inst_##x##_bits_idx; \ - sc_signal<sc_bv<32>> rb_inst_##x##_bits_data; + sc_signal<sc_bv<RB_DEBUG_IO_DATA_WIDTH>> rb_inst_##x##_bits_data; REPEAT(RB_DEBUG_IO, KP_retirementBufferSize); #undef RB_DEBUG_IO +#undef RB_DEBUG_IO_DATA_WIDTH #endif };