Add minstret CSR
- Track proxy signals for instructions retiring, and use these to
populate the value of the minstret register.
Change-Id: Idfe9046d17520463c27c5214e0e8d045eba8ea13
diff --git a/hdl/chisel/src/kelvin/scalar/Csr.scala b/hdl/chisel/src/kelvin/scalar/Csr.scala
index 346c9dc..9536f11 100644
--- a/hdl/chisel/src/kelvin/scalar/Csr.scala
+++ b/hdl/chisel/src/kelvin/scalar/Csr.scala
@@ -44,6 +44,14 @@
val out = new CsrOutIO(p)
}
+class CsrCounters(p: Parameters) extends Bundle {
+ val rfwriteCount = UInt(3.W)
+ val storeCount = UInt(2.W)
+ val branchCount = UInt(1.W)
+ val vrfwriteCount = UInt(3.W)
+ val vstoreCount = UInt(2.W)
+}
+
class CsrBruIO(p: Parameters) extends Bundle {
val in = new Bundle {
val mode = Valid(Bool())
@@ -88,6 +96,8 @@
// Vector core.
val vcore = Input(new Bundle { val undef = Bool() })
+ val counters = Input(new CsrCounters(p))
+
// Pipeline Control.
val halted = Output(Bool())
val fault = Output(Bool())
@@ -132,6 +142,7 @@
val mhartid = RegInit(0.U(32.W))
val mcycle = RegInit(0.U(64.W))
+ val minstret = RegInit(0.U(64.W))
// 32-bit MXLEN, I,M,X extensions
val misa = RegInit(0x40801100.U(32.W))
@@ -170,7 +181,9 @@
val mspEn = index === 0x7E1.U
// M-mode performance CSRs.
val mcycleEn = index === 0xB00.U
+ val minstretEn = index === 0xB02.U
val mcyclehEn = index === 0xB80.U
+ val minstrethEn = index === 0xB82.U
// M-mode information CSRs.
val mvendoridEn = index === 0xF11.U
val marchidEn = index === 0xF12.U
@@ -231,6 +244,8 @@
MuxOR(mspEn, msp) |
MuxOR(mcycleEn, mcycle(31,0)) |
MuxOR(mcyclehEn, mcycle(63,32)) |
+ MuxOR(minstretEn, minstret(31,0)) |
+ MuxOR(minstrethEn, minstret(63,32)) |
MuxOR(mvendoridEn, mvendorid) |
MuxOR(marchidEn, marchid) |
MuxOR(mimpidEn, mimpid) |
@@ -274,6 +289,17 @@
val mcycle_t = Cat(mcycle_th, mcycle_tl)
mcycle := Mux(valid, mcycle_t, mcycle) + 1.U
+
+ val minstret_th = Mux(minstrethEn, wdata, minstret(63,32))
+ val minstret_tl = Mux(minstretEn, wdata, minstret(31,0))
+ val minstret_t = Cat(minstret_th, minstret_tl)
+ minstret := Mux(valid, minstret_t, minstret) +
+ io.counters.rfwriteCount +
+ io.counters.storeCount +
+ io.counters.branchCount +
+ io.counters.vrfwriteCount +
+ io.counters.vstoreCount
+
when (io.bru.in.mode.valid) {
mode := io.bru.in.mode.bits
}
diff --git a/hdl/chisel/src/kelvin/scalar/Lsu.scala b/hdl/chisel/src/kelvin/scalar/Lsu.scala
index 92b8628..65a1751 100644
--- a/hdl/chisel/src/kelvin/scalar/Lsu.scala
+++ b/hdl/chisel/src/kelvin/scalar/Lsu.scala
@@ -107,6 +107,8 @@
// Vector switch.
val vldst = Output(Bool())
+
+ val storeCount = Output(UInt(2.W))
})
val lsu = new LsuOp()
@@ -222,6 +224,11 @@
assert(!(io.ubus.valid && io.dbus.addr(31)))
assert(!(io.ubus.valid && io.dbus.adrx(31)))
+ io.storeCount := PopCount(Cat(
+ io.dbus.valid && io.dbus.write,
+ io.ubus.valid && io.ubus.write
+ ))
+
io.flush.valid := ctrl.io.out.valid && (ctrl.io.out.bits.fencei || ctrl.io.out.bits.flushat || ctrl.io.out.bits.flushall)
io.flush.all := ctrl.io.out.bits.fencei || ctrl.io.out.bits.flushall
io.flush.clean := true.B
diff --git a/hdl/chisel/src/kelvin/scalar/Regfile.scala b/hdl/chisel/src/kelvin/scalar/Regfile.scala
index 4179602..6629505 100644
--- a/hdl/chisel/src/kelvin/scalar/Regfile.scala
+++ b/hdl/chisel/src/kelvin/scalar/Regfile.scala
@@ -95,6 +95,8 @@
val regd = Output(UInt(32.W))
val comb = Output(UInt(32.W))
}
+
+ val rfwriteCount = Output(UInt(6.W))
})
@@ -177,6 +179,17 @@
}
}
+ // We care if someone tried to write x0 (e.g. nop is encoded this way), but want
+ // it separate for above mentioned optimization.
+ val x0 =
+ (0 until 4).map(x =>
+ io.writeData(x).valid &&
+ io.writeData(x).addr === 0.U &&
+ !io.writeMask(x).valid) ++
+ (4 until 6).map(x => io.writeData(x).valid && io.writeData(x).addr === 0.U)
+
+ io.rfwriteCount := PopCount(writeValid) - writeValid(0) + PopCount(x0)
+
// ***************************************************************************
// Read ports with write forwarding.
// ***************************************************************************
diff --git a/hdl/chisel/src/kelvin/scalar/SCore.scala b/hdl/chisel/src/kelvin/scalar/SCore.scala
index 0926de8..8dbdabd 100644
--- a/hdl/chisel/src/kelvin/scalar/SCore.scala
+++ b/hdl/chisel/src/kelvin/scalar/SCore.scala
@@ -172,6 +172,13 @@
io.iflush.valid := iflush
+ // Instruction counters
+ csr.io.counters.rfwriteCount := regfile.io.rfwriteCount
+ csr.io.counters.storeCount := lsu.io.storeCount
+ csr.io.counters.branchCount := bru(0).io.taken.valid
+ csr.io.counters.vrfwriteCount := io.vcore.vrfwriteCount
+ csr.io.counters.vstoreCount := io.vcore.vstoreCount
+
// ---------------------------------------------------------------------------
// Control Status Unit
csr.io.csr <> io.csr
diff --git a/hdl/chisel/src/kelvin/vector/VCore.scala b/hdl/chisel/src/kelvin/vector/VCore.scala
index 029900a..58bbab6 100644
--- a/hdl/chisel/src/kelvin/vector/VCore.scala
+++ b/hdl/chisel/src/kelvin/vector/VCore.scala
@@ -39,6 +39,9 @@
// Faults.
val undef = Output(Bool())
+
+ val vrfwriteCount = Output(UInt(3.W))
+ val vstoreCount = Output(UInt(2.W))
}
class VCore(p: Parameters) extends Module {
@@ -72,6 +75,9 @@
val vst = VSt(p)
val vrf = VRegfile(p)
+ io.score.vrfwriteCount := vrf.io.vrfwriteCount
+ io.score.vstoreCount := vst.io.vstoreCount + vldst.io.vstoreCount
+
vinst.io.in <> io.score.vinst
vinst.io.rs <> io.score.rs
vinst.io.rd <> io.score.rd
diff --git a/hdl/chisel/src/kelvin/vector/VLdSt.scala b/hdl/chisel/src/kelvin/vector/VLdSt.scala
index df6fadc..d2d9853 100644
--- a/hdl/chisel/src/kelvin/vector/VLdSt.scala
+++ b/hdl/chisel/src/kelvin/vector/VLdSt.scala
@@ -41,8 +41,11 @@
// Bus.
val dbus = new DBusIO(p)
val last = Output(Bool())
+
+ val vstoreCount = Output(UInt(1.W))
})
+
// A usable amount of outstanding transactions.
val cmdqDepth = 8
@@ -253,6 +256,7 @@
ctrl.io.in.bits.write := q.io.out.bits.IsStore()
ctrl.io.in.bits.widx := q.io.out.bits.vd.addr
assert(!(ctrl.io.in.valid && !ctrl.io.in.ready))
+ io.vstoreCount := ctrl.io.in.valid && ctrl.io.in.ready;
data.io.in.valid := rdataEn
data.io.in.bits.wdata := Swizzle(false, 8, rdataAshf, io.read.data)
diff --git a/hdl/chisel/src/kelvin/vector/VRegfile.scala b/hdl/chisel/src/kelvin/vector/VRegfile.scala
index e9f74a5..ac67ff0 100644
--- a/hdl/chisel/src/kelvin/vector/VRegfile.scala
+++ b/hdl/chisel/src/kelvin/vector/VRegfile.scala
@@ -125,6 +125,7 @@
val conv = Flipped(new VRegfileConvIO(p))
val transpose = Flipped(new VRegfileTransposeIO(p))
val vrfsb = Flipped(new VRegfileScoreboardIO)
+ val vrfwriteCount = Output(UInt(3.W))
})
val segcnt = p.vectorBits / 32
@@ -176,6 +177,8 @@
}
}
+ io.vrfwriteCount := writevalid(0)
+
// ---------------------------------------------------------------------------
// Write ports.
for (i <- 0 until writePorts) {
diff --git a/hdl/chisel/src/kelvin/vector/VSt.scala b/hdl/chisel/src/kelvin/vector/VSt.scala
index 9dcd9de..f730fec 100644
--- a/hdl/chisel/src/kelvin/vector/VSt.scala
+++ b/hdl/chisel/src/kelvin/vector/VSt.scala
@@ -42,6 +42,8 @@
// Status.
val nempty = Output(Bool())
+
+ val vstoreCount = Output(UInt(1.W))
})
// A usable depth of outstanding commands.
@@ -299,6 +301,8 @@
assert(io.axi.addr.valid === io.axi.data.valid)
assert(io.axi.addr.ready === io.axi.data.ready)
+ io.vstoreCount := ctrl.io.out.valid
+
// ---------------------------------------------------------------------------
// Active.
io.active := q.io.active
diff --git a/tests/verilator_sim/kelvin/vldst_tb.cc b/tests/verilator_sim/kelvin/vldst_tb.cc
index 83e1edf..9539c6a 100644
--- a/tests/verilator_sim/kelvin/vldst_tb.cc
+++ b/tests/verilator_sim/kelvin/vldst_tb.cc
@@ -186,6 +186,7 @@
sc_in<sc_bv<kVector / 8> > io_dbus_wmask;
sc_out<sc_bv<kVector> > io_dbus_rdata;
sc_in<bool> io_last;
+ sc_in<bool> io_vstoreCount;
using Sysc_tb::Sysc_tb;
@@ -722,6 +723,7 @@
sc_signal<sc_bv<kVector / 8> > io_dbus_wmask;
sc_signal<sc_bv<kVector> > io_dbus_rdata;
sc_signal<bool> io_last;
+ sc_signal<bool> io_vstoreCount;
VLdSt_tb tb("VLdSt_tb", loops, true /* random */);
VVLdSt ldst(name);
@@ -891,6 +893,7 @@
BIND2(tb, ldst, io_dbus_wmask);
BIND2(tb, ldst, io_dbus_rdata);
BIND2(tb, ldst, io_last);
+ BIND2(tb, ldst, io_vstoreCount);
if (trace) {
tb.trace(ldst);
diff --git a/tests/verilator_sim/kelvin/vregfile_tb.cc b/tests/verilator_sim/kelvin/vregfile_tb.cc
index cf45955..25218d5 100644
--- a/tests/verilator_sim/kelvin/vregfile_tb.cc
+++ b/tests/verilator_sim/kelvin/vregfile_tb.cc
@@ -97,6 +97,7 @@
sc_out<bool> io_vrfsb_set_valid;
sc_out<sc_bv<128> > io_vrfsb_set_bits;
sc_in<sc_bv<128> > io_vrfsb_data;
+ sc_in<sc_bv<3> > io_vrfwriteCount;
using Sysc_tb::Sysc_tb;
@@ -594,6 +595,7 @@
sc_signal<sc_bv<128> > io_vrfsb_set_bits;
sc_signal<sc_bv<128> > io_vrfsb_data;
sc_signal<bool> io_vrfsb_set_valid;
+ sc_signal<sc_bv<3> > io_vrfwriteCount;
VRegfile_tb tb("VRegfile_tb", loops, true /*random*/);
VVRegfile vrf(name);
@@ -684,6 +686,7 @@
BIND2(tb, vrf, io_vrfsb_set_valid);
BIND2(tb, vrf, io_vrfsb_set_bits);
BIND2(tb, vrf, io_vrfsb_data);
+ BIND2(tb, vrf, io_vrfwriteCount);
tb.start();
}
diff --git a/tests/verilator_sim/kelvin/vst_tb.cc b/tests/verilator_sim/kelvin/vst_tb.cc
index f425a10..79567e7 100644
--- a/tests/verilator_sim/kelvin/vst_tb.cc
+++ b/tests/verilator_sim/kelvin/vst_tb.cc
@@ -92,6 +92,7 @@
sc_in<bool> io_axi_resp_ready;
sc_out<bool> io_axi_resp_valid;
sc_in<bool> io_nempty;
+ sc_in<bool> io_vstoreCount;
sc_out<sc_bv<7> > io_in_bits_0_bits_op;
sc_out<sc_bv<3> > io_in_bits_0_bits_f2;
sc_out<sc_bv<3> > io_in_bits_0_bits_sz;
@@ -572,6 +573,7 @@
sc_signal<bool> io_axi_resp_ready;
sc_signal<bool> io_axi_resp_valid;
sc_signal<bool> io_nempty;
+ sc_signal<bool> io_vstoreCount;
sc_signal<sc_bv<7> > io_in_bits_0_bits_op;
sc_signal<sc_bv<3> > io_in_bits_0_bits_f2;
sc_signal<sc_bv<3> > io_in_bits_0_bits_sz;
@@ -741,6 +743,7 @@
BIND2(tb, st, io_axi_resp_ready);
BIND2(tb, st, io_axi_resp_valid);
BIND2(tb, st, io_nempty);
+ BIND2(tb, st, io_vstoreCount);
BIND2(tb, st, io_in_bits_0_bits_op);
BIND2(tb, st, io_in_bits_0_bits_f2);
BIND2(tb, st, io_in_bits_0_bits_sz);