Make WFI actually stall the core until an interrupt
- Add top-level signals, `wfi` and `irq` -- `wfi` is an output signaling
that the core is waiting for an interrupt to proceed, and `irq` is an
input for said interrupt. Now when the WFI instruction is decoded, the
pipeline will be halted and the `wfi` signal raised.
- In CoreAxi, the top level interrupt signal is `irqn`, and is inverted
before being passed to Core.
Change-Id: I49bb6e2e4ed07e0579d36f37f86ede7b33377852
diff --git a/hdl/chisel/src/kelvin/Core.scala b/hdl/chisel/src/kelvin/Core.scala
index 8acfaf2..7d2cbdc 100644
--- a/hdl/chisel/src/kelvin/Core.scala
+++ b/hdl/chisel/src/kelvin/Core.scala
@@ -40,6 +40,8 @@
val csr = new CsrInOutIO(p)
val halted = Output(Bool())
val fault = Output(Bool())
+ val wfi = Output(Bool())
+ val irq = Input(Bool())
val debug_req = Input(Bool())
// Bus between core and instruction memories.
@@ -66,6 +68,8 @@
io.ebus <> score.io.ebus
io.halted := score.io.halted
io.fault := score.io.fault
+ io.wfi := score.io.wfi
+ score.io.irq := io.irq
io.iflush <> score.io.iflush
io.dflush <> score.io.dflush
io.slog := score.io.slog
diff --git a/hdl/chisel/src/kelvin/CoreAxi.scala b/hdl/chisel/src/kelvin/CoreAxi.scala
index 7aae784..fe48b87 100644
--- a/hdl/chisel/src/kelvin/CoreAxi.scala
+++ b/hdl/chisel/src/kelvin/CoreAxi.scala
@@ -42,6 +42,8 @@
// Core status interrupts
val halted = Output(Bool())
val fault = Output(Bool())
+ val wfi = Output(Bool())
+ val irqn = Input(Bool())
// Debug data interface
val debug = new DebugIO(p)
// String logging interface
@@ -91,8 +93,8 @@
val csr = Module(new CoreAxiCSR(p))
val cg = Module(new ClockGate())
cg.io.clk_i := io.aclk
- cg.io.enable := !csr.io.cg
val core = withClockAndReset(cg.io.clk_o, csr.io.reset) { Core(p, coreModuleName) }
+ cg.io.enable := !io.irqn || (!csr.io.cg && !core.io.wfi)
csr.io.kelvin_csr := core.io.csr.out
val itcmBridge = Module(new AxiSlave2SRAM(p, log2Ceil(itcmEntries)))
@@ -128,6 +130,8 @@
itcmBridge.io.periBusy := core.io.ibus.valid
io.halted := core.io.halted
io.fault := core.io.fault
+ io.wfi := core.io.wfi
+ core.io.irq := !io.irqn
csr.io.halted := core.io.halted
csr.io.fault := core.io.fault
core.io.debug_req := true.B
@@ -196,6 +200,6 @@
// Tie-offs
core.io.dflush.ready := true.B
- core.io.iflush.ready := false.B
+ core.io.iflush.ready := true.B
}
}
diff --git a/hdl/chisel/src/kelvin/scalar/Bru.scala b/hdl/chisel/src/kelvin/scalar/Bru.scala
index 6d1a4dd..926d2bb 100644
--- a/hdl/chisel/src/kelvin/scalar/Bru.scala
+++ b/hdl/chisel/src/kelvin/scalar/Bru.scala
@@ -41,6 +41,7 @@
val MPAUSE = Value
val MRET = Value
val FENCEI = Value
+ val WFI = Value
val UNDEF = Value
}
@@ -127,7 +128,7 @@
mret -> io.csr.out.mepc,
ecall -> Cat(io.csr.out.mtvec(31,1), 0.U(1.W)),
call -> io.csr.out.mepc,
- (io.req.bits.fwd || (io.req.bits.op === BruOp.FENCEI)) -> pc4De,
+ (io.req.bits.fwd || (io.req.bits.op === BruOp.FENCEI) || (io.req.bits.op === BruOp.WFI)) -> pc4De,
(io.req.bits.op === BruOp.JALR) -> io.target.data,
))
stateReg.valid := io.req.valid
@@ -166,6 +167,7 @@
BruOp.BGE -> (ge =/= stateReg.bits.fwd),
BruOp.BLTU -> (ltu =/= stateReg.bits.fwd),
BruOp.BGEU -> (geu =/= stateReg.bits.fwd),
+ BruOp.WFI -> true.B,
))
io.taken.value := stateReg.bits.target
@@ -217,17 +219,18 @@
io.csr.in.mtval.valid := stateReg.valid && (undefFault || usageFault)
io.csr.in.mtval.bits := stateReg.bits.pcEx
- io.iflush := stateReg.valid && (op === BruOp.FENCEI)
+ io.iflush := stateReg.valid && op.isOneOf(BruOp.FENCEI, BruOp.WFI)
// Pipeline will be halted.
io.csr.in.halt := (stateReg.valid && (op === BruOp.MPAUSE) && (mode === CsrMode.Machine)) ||
io.csr.in.fault
io.csr.in.fault := (undefFault && (mode === CsrMode.Machine)) || (usageFault && (mode === CsrMode.Machine))
+ io.csr.in.wfi := stateReg.valid && (op === BruOp.WFI)
// Assertions.
val ignore = op.isOneOf(BruOp.JAL, BruOp.JALR, BruOp.EBREAK, BruOp.ECALL,
BruOp.EEXIT, BruOp.EYIELD, BruOp.ECTXSW, BruOp.MPAUSE,
- BruOp.MRET, BruOp.FENCEI, BruOp.UNDEF)
+ BruOp.MRET, BruOp.FENCEI, BruOp.UNDEF, BruOp.WFI)
assert(!(stateReg.valid && !io.rs1.valid) || ignore)
assert(!(stateReg.valid && !io.rs2.valid) || ignore)
diff --git a/hdl/chisel/src/kelvin/scalar/Csr.scala b/hdl/chisel/src/kelvin/scalar/Csr.scala
index 07ed2cd..88edcfe 100644
--- a/hdl/chisel/src/kelvin/scalar/Csr.scala
+++ b/hdl/chisel/src/kelvin/scalar/Csr.scala
@@ -55,6 +55,7 @@
val mtval = Valid(UInt(32.W))
val halt = Output(Bool())
val fault = Output(Bool())
+ val wfi = Output(Bool())
}
val out = new Bundle {
val mode = Input(CsrMode())
@@ -97,6 +98,8 @@
// Pipeline Control.
val halted = Output(Bool())
val fault = Output(Bool())
+ val wfi = Output(Bool())
+ val irq = Input(Bool())
})
// Control registers.
@@ -104,7 +107,8 @@
// Pipeline Control.
val halted = RegInit(false.B)
- val fault = RegInit(false.B)
+ val fault = RegInit(false.B)
+ val wfi = RegInit(false.B)
// Machine(0)/User(1) Mode.
val mode = RegInit(CsrMode.Machine)
@@ -194,10 +198,13 @@
fault := true.B
}
+ wfi := Mux(wfi, !io.irq, io.bru.in.wfi)
+
io.halted := halted
io.fault := fault
+ io.wfi := wfi
- assert(!(io.fault && !io.halted))
+ assert(!(io.fault && !io.halted && !io.wfi))
// Register state.
val rs1 = io.rs1.data
diff --git a/hdl/chisel/src/kelvin/scalar/Decode.scala b/hdl/chisel/src/kelvin/scalar/Decode.scala
index a42da52..d85e281 100644
--- a/hdl/chisel/src/kelvin/scalar/Decode.scala
+++ b/hdl/chisel/src/kelvin/scalar/Decode.scala
@@ -231,6 +231,8 @@
// The decode logic.
val d = DecodeInstruction(p, pipeline, io.inst.bits.addr, io.inst.bits.inst)
+ val wfi = d.wfi
+
val vldst = d.vld || d.vst
val vldst_wb = vldst && io.inst.bits.inst(28)
@@ -334,6 +336,7 @@
d.mpause -> MakeValid(true.B, BruOp.MPAUSE),
d.mret -> MakeValid(true.B, BruOp.MRET),
d.fencei -> MakeValid(true.B, BruOp.FENCEI),
+ d.wfi -> MakeValid(true.B, BruOp.WFI),
d.undef -> MakeValid(true.B, BruOp.UNDEF),
))
io.bru.valid := decodeEn && bru.valid
@@ -365,10 +368,10 @@
d.sb -> MakeValid(true.B, LsuOp.SB),
d.sh -> MakeValid(true.B, LsuOp.SH),
d.sw -> MakeValid(true.B, LsuOp.SW),
+ d.wfi -> MakeValid(true.B, LsuOp.FENCEI),
d.fencei -> MakeValid(true.B, LsuOp.FENCEI),
d.flushat -> MakeValid(true.B, LsuOp.FLUSHAT),
d.flushall -> MakeValid(true.B, LsuOp.FLUSHALL),
- d.wfi -> MakeValid(true.B, LsuOp.FENCEI),
(d.vld || d.vst) -> MakeValid(true.B, LsuOp.VLDST),
))
io.lsu.valid := decodeEn && lsu.valid
diff --git a/hdl/chisel/src/kelvin/scalar/SCore.scala b/hdl/chisel/src/kelvin/scalar/SCore.scala
index a077f88..efb7897 100644
--- a/hdl/chisel/src/kelvin/scalar/SCore.scala
+++ b/hdl/chisel/src/kelvin/scalar/SCore.scala
@@ -32,6 +32,8 @@
val csr = new CsrInOutIO(p)
val halted = Output(Bool())
val fault = Output(Bool())
+ val wfi = Output(Bool())
+ val irq = Input(Bool())
val ibus = new IBusIO(p)
val dbus = new DBusIO(p)
@@ -108,7 +110,7 @@
decode(i).io.inst.bits.brchFwd := fetch.io.inst.lanes(i).bits.brchFwd
decode(i).io.branchTaken := branchTaken
- decode(i).io.halted := csr.io.halted
+ decode(i).io.halted := csr.io.halted || csr.io.wfi
}
// Interlock based on regfile write port dependencies.
@@ -184,6 +186,8 @@
// Status
io.halted := csr.io.halted
io.fault := csr.io.fault
+ io.wfi := csr.io.wfi
+ csr.io.irq := io.irq
// ---------------------------------------------------------------------------
// Load/Store Unit
diff --git a/hdl/chisel/src/matcha/Kelvin.scala b/hdl/chisel/src/matcha/Kelvin.scala
index 352940a..adeed49 100644
--- a/hdl/chisel/src/matcha/Kelvin.scala
+++ b/hdl/chisel/src/matcha/Kelvin.scala
@@ -91,6 +91,7 @@
finish := core.io.halted
host_req := false.B
fault := core.io.fault
+ core.io.irq := false.B
// -------------------------------------------------------------------------
// Scalar Core logging.
diff --git a/tests/renode/rv_core/kelvin_hello_world.c b/tests/renode/rv_core/kelvin_hello_world.c
index 255532e..50bd6ef 100644
--- a/tests/renode/rv_core/kelvin_hello_world.c
+++ b/tests/renode/rv_core/kelvin_hello_world.c
@@ -54,6 +54,7 @@
print_uint32(*our_pc_csr);
print_string("beefb0ba\n");
print_uint32(0xb0bacafeL);
+ asm volatile("wfi");
asm volatile(".word 0x26000077"); // flushall
return 0;
}
diff --git a/tests/renode/sim_main.cc b/tests/renode/sim_main.cc
index 637ff7e..366ad09 100644
--- a/tests/renode/sim_main.cc
+++ b/tests/renode/sim_main.cc
@@ -47,6 +47,7 @@
// First cycle, always evaluate regardless of what role asked.
if (main_time == 0) {
+ top->io_irqn = true;
top->eval();
main_time++;
return;
@@ -56,6 +57,18 @@
if (main_time == last_tick) {
return;
} else {
+ // On rising-edges, check if the core is in WFI.
+ // If so, generate an interrupt pulse to wake it.
+ static bool irqn_state = true;
+ if (top->io_aclk) {
+ if (top->io_wfi && irqn_state) {
+ irqn_state = false;
+ }
+ if (!top->io_wfi && !irqn_state) {
+ irqn_state = true;
+ }
+ }
+ top->io_irqn = irqn_state;
top->eval();
last_tick = main_time;
main_time++;
diff --git a/tests/verilator_sim/kelvin/core_mini_axi_sim.cc b/tests/verilator_sim/kelvin/core_mini_axi_sim.cc
index 6fb62bd..e7655c2 100644
--- a/tests/verilator_sim/kelvin/core_mini_axi_sim.cc
+++ b/tests/verilator_sim/kelvin/core_mini_axi_sim.cc
@@ -178,6 +178,8 @@
struct CoreMiniAxi_tb : Sysc_tb {
sc_in<bool> io_halted;
sc_in<bool> io_fault;
+ sc_in<bool> io_wfi;
+ sc_out<bool> io_irqn;
CoreMiniAxi_tb(sc_module_name n, int loops, bool random, std::string binary)
: Sysc_tb(n, loops, random),
@@ -312,6 +314,17 @@
tg_.addTransfers(status_read_transfer_.get(), 0,
CoreMiniAxi_tb::status_read_transfer_done_cb);
}
+
+ static bool wfi_seen = false;
+ if (io_wfi && !wfi_seen) {
+ io_irqn = false;
+ wfi_seen = true;
+ } else if (wfi_seen) {
+ io_irqn = true;
+ wfi_seen = false;
+ } else {
+ io_irqn = true;
+ }
}
typedef AXISignals<KP_axi2AddrBits, // ADDR_WIDTH
@@ -400,13 +413,19 @@
sc_signal<bool> io_halted;
sc_signal<bool> io_fault;
+ sc_signal<bool> io_wfi;
+ sc_signal<bool> io_irqn;
tb.io_halted(io_halted);
tb.io_fault(io_fault);
+ tb.io_wfi(io_wfi);
+ tb.io_irqn(io_irqn);
core.io_aclk(tb.clock);
core.io_aresetn(tb.resetn);
core.io_halted(io_halted);
core.io_fault(io_fault);
+ core.io_wfi(io_wfi);
+ core.io_irqn(io_irqn);
SlogIO slog;
core.io_slog_valid(slog.valid);
diff --git a/tests/verilator_sim/kelvin/core_tb.cc b/tests/verilator_sim/kelvin/core_tb.cc
index 082b6a8..e53922e 100644
--- a/tests/verilator_sim/kelvin/core_tb.cc
+++ b/tests/verilator_sim/kelvin/core_tb.cc
@@ -56,6 +56,8 @@
sc_signal<bool> io_halted;
sc_signal<bool> io_fault;
+ sc_signal<bool> io_wfi;
+ sc_signal<bool> io_irq;
sc_signal<bool> io_debug_req;
sc_signal<bool> io_ibus_valid;
sc_signal<bool> io_ibus_ready;
@@ -129,6 +131,8 @@
core.reset(tb.reset);
core.io_halted(io_halted);
core.io_fault(io_fault);
+ core.io_wfi(io_wfi);
+ core.io_irq(io_irq);
core.io_debug_req(io_debug_req);
core.io_ibus_valid(io_ibus_valid);
core.io_ibus_ready(io_ibus_ready);