Allow LSU to accept multiple instructions in one cycle. Change-Id: I6ff3d6e795182bcd921c40e24bce6af4aeda4e77
diff --git a/hdl/chisel/src/common/Aligner.scala b/hdl/chisel/src/common/Aligner.scala index 79feb4e..936ce63 100644 --- a/hdl/chisel/src/common/Aligner.scala +++ b/hdl/chisel/src/common/Aligner.scala
@@ -81,4 +81,13 @@ }) addResource("hdl/verilog/rvv/design/Aligner.sv") setInline(s"$desiredName.sv", GenerateAlignerSource(t, n)) +} + +object Aligner { + def apply[T <: Data](in: Seq[ValidIO[T]]): Vec[ValidIO[T]] = { + val t = chiselTypeOf(in(0).bits) + val aligner = Module(new Aligner(t, in.length)) + aligner.io.in := in.map(v => v.map(_.asUInt)) + VecInit(aligner.io.out.map(v => v.map(_.asTypeOf(t)))) + } } \ No newline at end of file
diff --git a/hdl/chisel/src/kelvin/BUILD b/hdl/chisel/src/kelvin/BUILD index c410861..e8ad752 100644 --- a/hdl/chisel/src/kelvin/BUILD +++ b/hdl/chisel/src/kelvin/BUILD
@@ -326,6 +326,8 @@ ":retirement_buffer", ":rvvi_trace", "//hdl/chisel/src/common", + "//hdl/chisel/src/common:aligner", + "//hdl/chisel/src/common:circular_buffer_multi", "//hdl/chisel/src/common:fp", "//hdl/chisel/src/common:instruction_buffer", "//hdl/chisel/src/common:scatter_gather",
diff --git a/hdl/chisel/src/kelvin/scalar/Lsu.scala b/hdl/chisel/src/kelvin/scalar/Lsu.scala index 8cbe82c..baa0958 100644 --- a/hdl/chisel/src/kelvin/scalar/Lsu.scala +++ b/hdl/chisel/src/kelvin/scalar/Lsu.scala
@@ -955,8 +955,9 @@ io.vldst := 0.U io.storeCount := 0.U - val opQueue = Module(new Queue(new LsuUOp(p), 4)) - io.queueCapacity := opQueue.entries.U - opQueue.io.count + val opQueue = Module(new CircularBufferMulti(new LsuUOp(p), p.instructionLanes, 4)) + opQueue.io.flush := false.B + io.queueCapacity := opQueue.io.nSpace // Flush state // DispatchV2 will only flush on first slot, when LSU is inactive. @@ -977,26 +978,25 @@ )) // Accept one instruction per cycle. - // TODO(derekjchow): Accept multiple when primitives are ready. - val canAccept = opQueue.io.enq.ready - val queueSpace = Mux(canAccept, 1.U, 0.U) + val queueSpace = opQueue.io.nSpace val validSum = io.req.map(_.valid).scan( 0.U(log2Ceil(p.instructionLanes + 1).W))(_+_) - for (i <- 0 until p.instructionLanes) { io.req(i).ready := (validSum(i) < queueSpace) && !flushCmd.valid } val ops = (0 until p.instructionLanes).map(i => - LsuUOp(p, i, io.req(i).bits, io.busPort, io.busPort_flt, io.rvvState)) - val enq = MuxCase( - MakeInvalid(new LsuUOp(p)), - (0 until p.instructionLanes).map(i => - ((io.req(i).fire && !io.req(i).bits.op.isOneOf(LsuOp.FENCEI, LsuOp.FLUSHAT, LsuOp.FLUSHALL)) -> MakeValid(true.B, ops(i))))) - opQueue.io.enq.valid := enq.valid - opQueue.io.enq.bits := enq.bits + MakeValid( + io.req(i).fire && !LsuOp.isFlush(io.req(i).bits.op), + LsuUOp(p, i, io.req(i).bits, io.busPort, io.busPort_flt, io.rvvState)) + ) + val alignedOps = Aligner(ops) - val nextSlot = LsuSlot.fromLsuUOp(opQueue.io.deq.bits, p, 16) + opQueue.io.enqValid := PopCount(alignedOps.map(_.valid)) + opQueue.io.enqData := alignedOps.map(_.bits) + assert(opQueue.io.enqValid <= opQueue.io.nSpace) + + val nextSlot = LsuSlot.fromLsuUOp(opQueue.io.dataOut(0), p, 16) // Tracks if a read has been fired last cycle. val readFired = RegInit(MakeInvalid(new LsuRead(32 - nextSlot.elemBits))) @@ -1142,7 +1142,7 @@ val writebackUpdatedSlot = slot.writebackUpdate(writebackFired) // TODO(derekjchow): Improve timing? - opQueue.io.deq.ready := slot.slotIdle() + opQueue.io.deqReady := Mux(slot.slotIdle() && (opQueue.io.nEnqueued > 0.U), 1.U, 0.U) // ========================================================================== // State transition @@ -1152,7 +1152,7 @@ // Move to inactive if error. io.fault.valid -> LsuSlot.inactive(p, 16), // When inactive, dequeue if possible - (slot.slotIdle() && opQueue.io.deq.valid) -> nextSlot, + (slot.slotIdle() && (opQueue.io.nEnqueued > 0.U)) -> nextSlot, // Vector update. slot.pendingVector -> vectorUpdatedSlot, // Active transaction update. @@ -1163,6 +1163,6 @@ slot := slotNext - io.active := !slot.slotIdle() || (opQueue.io.count =/= 0.U) + io.active := !slot.slotIdle() || (opQueue.io.nEnqueued =/= 0.U) }