Add minstret CSR - Track proxy signals for instructions retiring, and use these to populate the value of the minstret register. Change-Id: Idfe9046d17520463c27c5214e0e8d045eba8ea13
diff --git a/hdl/chisel/src/kelvin/scalar/Csr.scala b/hdl/chisel/src/kelvin/scalar/Csr.scala index 346c9dc..9536f11 100644 --- a/hdl/chisel/src/kelvin/scalar/Csr.scala +++ b/hdl/chisel/src/kelvin/scalar/Csr.scala
@@ -44,6 +44,14 @@ val out = new CsrOutIO(p) } +class CsrCounters(p: Parameters) extends Bundle { + val rfwriteCount = UInt(3.W) + val storeCount = UInt(2.W) + val branchCount = UInt(1.W) + val vrfwriteCount = UInt(3.W) + val vstoreCount = UInt(2.W) +} + class CsrBruIO(p: Parameters) extends Bundle { val in = new Bundle { val mode = Valid(Bool()) @@ -88,6 +96,8 @@ // Vector core. val vcore = Input(new Bundle { val undef = Bool() }) + val counters = Input(new CsrCounters(p)) + // Pipeline Control. val halted = Output(Bool()) val fault = Output(Bool()) @@ -132,6 +142,7 @@ val mhartid = RegInit(0.U(32.W)) val mcycle = RegInit(0.U(64.W)) + val minstret = RegInit(0.U(64.W)) // 32-bit MXLEN, I,M,X extensions val misa = RegInit(0x40801100.U(32.W)) @@ -170,7 +181,9 @@ val mspEn = index === 0x7E1.U // M-mode performance CSRs. val mcycleEn = index === 0xB00.U + val minstretEn = index === 0xB02.U val mcyclehEn = index === 0xB80.U + val minstrethEn = index === 0xB82.U // M-mode information CSRs. val mvendoridEn = index === 0xF11.U val marchidEn = index === 0xF12.U @@ -231,6 +244,8 @@ MuxOR(mspEn, msp) | MuxOR(mcycleEn, mcycle(31,0)) | MuxOR(mcyclehEn, mcycle(63,32)) | + MuxOR(minstretEn, minstret(31,0)) | + MuxOR(minstrethEn, minstret(63,32)) | MuxOR(mvendoridEn, mvendorid) | MuxOR(marchidEn, marchid) | MuxOR(mimpidEn, mimpid) | @@ -274,6 +289,17 @@ val mcycle_t = Cat(mcycle_th, mcycle_tl) mcycle := Mux(valid, mcycle_t, mcycle) + 1.U + + val minstret_th = Mux(minstrethEn, wdata, minstret(63,32)) + val minstret_tl = Mux(minstretEn, wdata, minstret(31,0)) + val minstret_t = Cat(minstret_th, minstret_tl) + minstret := Mux(valid, minstret_t, minstret) + + io.counters.rfwriteCount + + io.counters.storeCount + + io.counters.branchCount + + io.counters.vrfwriteCount + + io.counters.vstoreCount + when (io.bru.in.mode.valid) { mode := io.bru.in.mode.bits }
diff --git a/hdl/chisel/src/kelvin/scalar/Lsu.scala b/hdl/chisel/src/kelvin/scalar/Lsu.scala index 92b8628..65a1751 100644 --- a/hdl/chisel/src/kelvin/scalar/Lsu.scala +++ b/hdl/chisel/src/kelvin/scalar/Lsu.scala
@@ -107,6 +107,8 @@ // Vector switch. val vldst = Output(Bool()) + + val storeCount = Output(UInt(2.W)) }) val lsu = new LsuOp() @@ -222,6 +224,11 @@ assert(!(io.ubus.valid && io.dbus.addr(31))) assert(!(io.ubus.valid && io.dbus.adrx(31))) + io.storeCount := PopCount(Cat( + io.dbus.valid && io.dbus.write, + io.ubus.valid && io.ubus.write + )) + io.flush.valid := ctrl.io.out.valid && (ctrl.io.out.bits.fencei || ctrl.io.out.bits.flushat || ctrl.io.out.bits.flushall) io.flush.all := ctrl.io.out.bits.fencei || ctrl.io.out.bits.flushall io.flush.clean := true.B
diff --git a/hdl/chisel/src/kelvin/scalar/Regfile.scala b/hdl/chisel/src/kelvin/scalar/Regfile.scala index 4179602..6629505 100644 --- a/hdl/chisel/src/kelvin/scalar/Regfile.scala +++ b/hdl/chisel/src/kelvin/scalar/Regfile.scala
@@ -95,6 +95,8 @@ val regd = Output(UInt(32.W)) val comb = Output(UInt(32.W)) } + + val rfwriteCount = Output(UInt(6.W)) }) @@ -177,6 +179,17 @@ } } + // We care if someone tried to write x0 (e.g. nop is encoded this way), but want + // it separate for above mentioned optimization. + val x0 = + (0 until 4).map(x => + io.writeData(x).valid && + io.writeData(x).addr === 0.U && + !io.writeMask(x).valid) ++ + (4 until 6).map(x => io.writeData(x).valid && io.writeData(x).addr === 0.U) + + io.rfwriteCount := PopCount(writeValid) - writeValid(0) + PopCount(x0) + // *************************************************************************** // Read ports with write forwarding. // ***************************************************************************
diff --git a/hdl/chisel/src/kelvin/scalar/SCore.scala b/hdl/chisel/src/kelvin/scalar/SCore.scala index 0926de8..8dbdabd 100644 --- a/hdl/chisel/src/kelvin/scalar/SCore.scala +++ b/hdl/chisel/src/kelvin/scalar/SCore.scala
@@ -172,6 +172,13 @@ io.iflush.valid := iflush + // Instruction counters + csr.io.counters.rfwriteCount := regfile.io.rfwriteCount + csr.io.counters.storeCount := lsu.io.storeCount + csr.io.counters.branchCount := bru(0).io.taken.valid + csr.io.counters.vrfwriteCount := io.vcore.vrfwriteCount + csr.io.counters.vstoreCount := io.vcore.vstoreCount + // --------------------------------------------------------------------------- // Control Status Unit csr.io.csr <> io.csr
diff --git a/hdl/chisel/src/kelvin/vector/VCore.scala b/hdl/chisel/src/kelvin/vector/VCore.scala index 029900a..58bbab6 100644 --- a/hdl/chisel/src/kelvin/vector/VCore.scala +++ b/hdl/chisel/src/kelvin/vector/VCore.scala
@@ -39,6 +39,9 @@ // Faults. val undef = Output(Bool()) + + val vrfwriteCount = Output(UInt(3.W)) + val vstoreCount = Output(UInt(2.W)) } class VCore(p: Parameters) extends Module { @@ -72,6 +75,9 @@ val vst = VSt(p) val vrf = VRegfile(p) + io.score.vrfwriteCount := vrf.io.vrfwriteCount + io.score.vstoreCount := vst.io.vstoreCount + vldst.io.vstoreCount + vinst.io.in <> io.score.vinst vinst.io.rs <> io.score.rs vinst.io.rd <> io.score.rd
diff --git a/hdl/chisel/src/kelvin/vector/VLdSt.scala b/hdl/chisel/src/kelvin/vector/VLdSt.scala index df6fadc..d2d9853 100644 --- a/hdl/chisel/src/kelvin/vector/VLdSt.scala +++ b/hdl/chisel/src/kelvin/vector/VLdSt.scala
@@ -41,8 +41,11 @@ // Bus. val dbus = new DBusIO(p) val last = Output(Bool()) + + val vstoreCount = Output(UInt(1.W)) }) + // A usable amount of outstanding transactions. val cmdqDepth = 8 @@ -253,6 +256,7 @@ ctrl.io.in.bits.write := q.io.out.bits.IsStore() ctrl.io.in.bits.widx := q.io.out.bits.vd.addr assert(!(ctrl.io.in.valid && !ctrl.io.in.ready)) + io.vstoreCount := ctrl.io.in.valid && ctrl.io.in.ready; data.io.in.valid := rdataEn data.io.in.bits.wdata := Swizzle(false, 8, rdataAshf, io.read.data)
diff --git a/hdl/chisel/src/kelvin/vector/VRegfile.scala b/hdl/chisel/src/kelvin/vector/VRegfile.scala index e9f74a5..ac67ff0 100644 --- a/hdl/chisel/src/kelvin/vector/VRegfile.scala +++ b/hdl/chisel/src/kelvin/vector/VRegfile.scala
@@ -125,6 +125,7 @@ val conv = Flipped(new VRegfileConvIO(p)) val transpose = Flipped(new VRegfileTransposeIO(p)) val vrfsb = Flipped(new VRegfileScoreboardIO) + val vrfwriteCount = Output(UInt(3.W)) }) val segcnt = p.vectorBits / 32 @@ -176,6 +177,8 @@ } } + io.vrfwriteCount := writevalid(0) + // --------------------------------------------------------------------------- // Write ports. for (i <- 0 until writePorts) {
diff --git a/hdl/chisel/src/kelvin/vector/VSt.scala b/hdl/chisel/src/kelvin/vector/VSt.scala index 9dcd9de..f730fec 100644 --- a/hdl/chisel/src/kelvin/vector/VSt.scala +++ b/hdl/chisel/src/kelvin/vector/VSt.scala
@@ -42,6 +42,8 @@ // Status. val nempty = Output(Bool()) + + val vstoreCount = Output(UInt(1.W)) }) // A usable depth of outstanding commands. @@ -299,6 +301,8 @@ assert(io.axi.addr.valid === io.axi.data.valid) assert(io.axi.addr.ready === io.axi.data.ready) + io.vstoreCount := ctrl.io.out.valid + // --------------------------------------------------------------------------- // Active. io.active := q.io.active
diff --git a/tests/verilator_sim/kelvin/vldst_tb.cc b/tests/verilator_sim/kelvin/vldst_tb.cc index 83e1edf..9539c6a 100644 --- a/tests/verilator_sim/kelvin/vldst_tb.cc +++ b/tests/verilator_sim/kelvin/vldst_tb.cc
@@ -186,6 +186,7 @@ sc_in<sc_bv<kVector / 8> > io_dbus_wmask; sc_out<sc_bv<kVector> > io_dbus_rdata; sc_in<bool> io_last; + sc_in<bool> io_vstoreCount; using Sysc_tb::Sysc_tb; @@ -722,6 +723,7 @@ sc_signal<sc_bv<kVector / 8> > io_dbus_wmask; sc_signal<sc_bv<kVector> > io_dbus_rdata; sc_signal<bool> io_last; + sc_signal<bool> io_vstoreCount; VLdSt_tb tb("VLdSt_tb", loops, true /* random */); VVLdSt ldst(name); @@ -891,6 +893,7 @@ BIND2(tb, ldst, io_dbus_wmask); BIND2(tb, ldst, io_dbus_rdata); BIND2(tb, ldst, io_last); + BIND2(tb, ldst, io_vstoreCount); if (trace) { tb.trace(ldst);
diff --git a/tests/verilator_sim/kelvin/vregfile_tb.cc b/tests/verilator_sim/kelvin/vregfile_tb.cc index cf45955..25218d5 100644 --- a/tests/verilator_sim/kelvin/vregfile_tb.cc +++ b/tests/verilator_sim/kelvin/vregfile_tb.cc
@@ -97,6 +97,7 @@ sc_out<bool> io_vrfsb_set_valid; sc_out<sc_bv<128> > io_vrfsb_set_bits; sc_in<sc_bv<128> > io_vrfsb_data; + sc_in<sc_bv<3> > io_vrfwriteCount; using Sysc_tb::Sysc_tb; @@ -594,6 +595,7 @@ sc_signal<sc_bv<128> > io_vrfsb_set_bits; sc_signal<sc_bv<128> > io_vrfsb_data; sc_signal<bool> io_vrfsb_set_valid; + sc_signal<sc_bv<3> > io_vrfwriteCount; VRegfile_tb tb("VRegfile_tb", loops, true /*random*/); VVRegfile vrf(name); @@ -684,6 +686,7 @@ BIND2(tb, vrf, io_vrfsb_set_valid); BIND2(tb, vrf, io_vrfsb_set_bits); BIND2(tb, vrf, io_vrfsb_data); + BIND2(tb, vrf, io_vrfwriteCount); tb.start(); }
diff --git a/tests/verilator_sim/kelvin/vst_tb.cc b/tests/verilator_sim/kelvin/vst_tb.cc index f425a10..79567e7 100644 --- a/tests/verilator_sim/kelvin/vst_tb.cc +++ b/tests/verilator_sim/kelvin/vst_tb.cc
@@ -92,6 +92,7 @@ sc_in<bool> io_axi_resp_ready; sc_out<bool> io_axi_resp_valid; sc_in<bool> io_nempty; + sc_in<bool> io_vstoreCount; sc_out<sc_bv<7> > io_in_bits_0_bits_op; sc_out<sc_bv<3> > io_in_bits_0_bits_f2; sc_out<sc_bv<3> > io_in_bits_0_bits_sz; @@ -572,6 +573,7 @@ sc_signal<bool> io_axi_resp_ready; sc_signal<bool> io_axi_resp_valid; sc_signal<bool> io_nempty; + sc_signal<bool> io_vstoreCount; sc_signal<sc_bv<7> > io_in_bits_0_bits_op; sc_signal<sc_bv<3> > io_in_bits_0_bits_f2; sc_signal<sc_bv<3> > io_in_bits_0_bits_sz; @@ -741,6 +743,7 @@ BIND2(tb, st, io_axi_resp_ready); BIND2(tb, st, io_axi_resp_valid); BIND2(tb, st, io_nempty); + BIND2(tb, st, io_vstoreCount); BIND2(tb, st, io_in_bits_0_bits_op); BIND2(tb, st, io_in_bits_0_bits_f2); BIND2(tb, st, io_in_bits_0_bits_sz);