Allow LSU to accept multiple instructions in one cycle.
Change-Id: I6ff3d6e795182bcd921c40e24bce6af4aeda4e77
diff --git a/hdl/chisel/src/common/Aligner.scala b/hdl/chisel/src/common/Aligner.scala
index 79feb4e..936ce63 100644
--- a/hdl/chisel/src/common/Aligner.scala
+++ b/hdl/chisel/src/common/Aligner.scala
@@ -81,4 +81,13 @@
})
addResource("hdl/verilog/rvv/design/Aligner.sv")
setInline(s"$desiredName.sv", GenerateAlignerSource(t, n))
+}
+
+object Aligner {
+ def apply[T <: Data](in: Seq[ValidIO[T]]): Vec[ValidIO[T]] = {
+ val t = chiselTypeOf(in(0).bits)
+ val aligner = Module(new Aligner(t, in.length))
+ aligner.io.in := in.map(v => v.map(_.asUInt))
+ VecInit(aligner.io.out.map(v => v.map(_.asTypeOf(t))))
+ }
}
\ No newline at end of file
diff --git a/hdl/chisel/src/kelvin/BUILD b/hdl/chisel/src/kelvin/BUILD
index c410861..e8ad752 100644
--- a/hdl/chisel/src/kelvin/BUILD
+++ b/hdl/chisel/src/kelvin/BUILD
@@ -326,6 +326,8 @@
":retirement_buffer",
":rvvi_trace",
"//hdl/chisel/src/common",
+ "//hdl/chisel/src/common:aligner",
+ "//hdl/chisel/src/common:circular_buffer_multi",
"//hdl/chisel/src/common:fp",
"//hdl/chisel/src/common:instruction_buffer",
"//hdl/chisel/src/common:scatter_gather",
diff --git a/hdl/chisel/src/kelvin/scalar/Lsu.scala b/hdl/chisel/src/kelvin/scalar/Lsu.scala
index 8cbe82c..baa0958 100644
--- a/hdl/chisel/src/kelvin/scalar/Lsu.scala
+++ b/hdl/chisel/src/kelvin/scalar/Lsu.scala
@@ -955,8 +955,9 @@
io.vldst := 0.U
io.storeCount := 0.U
- val opQueue = Module(new Queue(new LsuUOp(p), 4))
- io.queueCapacity := opQueue.entries.U - opQueue.io.count
+ val opQueue = Module(new CircularBufferMulti(new LsuUOp(p), p.instructionLanes, 4))
+ opQueue.io.flush := false.B
+ io.queueCapacity := opQueue.io.nSpace
// Flush state
// DispatchV2 will only flush on first slot, when LSU is inactive.
@@ -977,26 +978,25 @@
))
// Accept one instruction per cycle.
- // TODO(derekjchow): Accept multiple when primitives are ready.
- val canAccept = opQueue.io.enq.ready
- val queueSpace = Mux(canAccept, 1.U, 0.U)
+ val queueSpace = opQueue.io.nSpace
val validSum = io.req.map(_.valid).scan(
0.U(log2Ceil(p.instructionLanes + 1).W))(_+_)
-
for (i <- 0 until p.instructionLanes) {
io.req(i).ready := (validSum(i) < queueSpace) && !flushCmd.valid
}
val ops = (0 until p.instructionLanes).map(i =>
- LsuUOp(p, i, io.req(i).bits, io.busPort, io.busPort_flt, io.rvvState))
- val enq = MuxCase(
- MakeInvalid(new LsuUOp(p)),
- (0 until p.instructionLanes).map(i =>
- ((io.req(i).fire && !io.req(i).bits.op.isOneOf(LsuOp.FENCEI, LsuOp.FLUSHAT, LsuOp.FLUSHALL)) -> MakeValid(true.B, ops(i)))))
- opQueue.io.enq.valid := enq.valid
- opQueue.io.enq.bits := enq.bits
+ MakeValid(
+ io.req(i).fire && !LsuOp.isFlush(io.req(i).bits.op),
+ LsuUOp(p, i, io.req(i).bits, io.busPort, io.busPort_flt, io.rvvState))
+ )
+ val alignedOps = Aligner(ops)
- val nextSlot = LsuSlot.fromLsuUOp(opQueue.io.deq.bits, p, 16)
+ opQueue.io.enqValid := PopCount(alignedOps.map(_.valid))
+ opQueue.io.enqData := alignedOps.map(_.bits)
+ assert(opQueue.io.enqValid <= opQueue.io.nSpace)
+
+ val nextSlot = LsuSlot.fromLsuUOp(opQueue.io.dataOut(0), p, 16)
// Tracks if a read has been fired last cycle.
val readFired = RegInit(MakeInvalid(new LsuRead(32 - nextSlot.elemBits)))
@@ -1142,7 +1142,7 @@
val writebackUpdatedSlot = slot.writebackUpdate(writebackFired)
// TODO(derekjchow): Improve timing?
- opQueue.io.deq.ready := slot.slotIdle()
+ opQueue.io.deqReady := Mux(slot.slotIdle() && (opQueue.io.nEnqueued > 0.U), 1.U, 0.U)
// ==========================================================================
// State transition
@@ -1152,7 +1152,7 @@
// Move to inactive if error.
io.fault.valid -> LsuSlot.inactive(p, 16),
// When inactive, dequeue if possible
- (slot.slotIdle() && opQueue.io.deq.valid) -> nextSlot,
+ (slot.slotIdle() && (opQueue.io.nEnqueued > 0.U)) -> nextSlot,
// Vector update.
slot.pendingVector -> vectorUpdatedSlot,
// Active transaction update.
@@ -1163,6 +1163,6 @@
slot := slotNext
- io.active := !slot.slotIdle() || (opQueue.io.count =/= 0.U)
+ io.active := !slot.slotIdle() || (opQueue.io.nEnqueued =/= 0.U)
}