blob: 11cd32adc579abcfe2548a3c5b15a49d06c41774 [file]
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package kelvin
import chisel3._
import chisel3.util._
import common._
class RetirementBuffer(p: Parameters) extends Module {
val io = IO(new Bundle {
val inst = Input(Vec(p.instructionLanes, Decoupled(new FetchInstruction(p))))
val writeAddrScalar = Input(Vec(p.instructionLanes, new RegfileWriteAddrIO))
val writeDataScalar = Input(Vec(p.instructionLanes + 2, Valid(new RegfileWriteDataIO)))
val writeAddrFloat = Option.when(p.enableFloat)(Input(new RegfileWriteAddrIO))
val writeDataFloat = Option.when(p.enableFloat)(Input(Vec(2, Valid(new RegfileWriteDataIO))))
val nSpace = Output(UInt(32.W))
val debug = Output(new RetirementBufferDebugIO(p))
})
val idxWidth = p.retirementBufferIdxWidth
val floatRegisterBase = 32.U
val noWriteRegIdx = ~0.U(idxWidth.W)
class Instruction extends Bundle {
val addr = UInt(32.W) // Memory address
val inst = UInt(32.W) // Instruction bits
val idx = UInt(idxWidth.W) // Register Index
}
val bufferSize = p.retirementBufferSize
assert(bufferSize >= p.instructionLanes)
assert(bufferSize >= io.writeDataScalar.length)
// Construct a circular buffer of `bufferSize`, that can enqueue and dequeue `bufferSize` elements
// per cycle. This will be used to store information about dispatched instructions.
val instBuffer = Module(new CircularBufferMulti(new Instruction,
/* needs to be at least writeDataScalar count */ bufferSize,
/* chosen sort-of-arbitrarily */ bufferSize))
// Check that we see no instructions fire after the first non-fire.
val instFires = io.inst.map(_.fire)
val seenFalseV = (instFires.scanLeft(false.B) { (acc, curr) => acc || !curr }).drop(1)
assert(!(seenFalseV.zip(instFires).map({ case (seenFalse, fire) => seenFalse && fire }).reduce(_|_)))
// Create Instruction wires out of io.inst + io.writeAddrScalar, and align.
def WireInstruction(i: Int) = {
val floatValid = (i == 0).B && io.writeAddrFloat.map(x => x.valid).getOrElse(false.B)
val floatAddr = io.writeAddrFloat.map(x => x.addr).getOrElse(0.U)
val scalarValid = io.writeAddrScalar(i).valid
val scalarAddr = io.writeAddrScalar(i).addr
val instr = Wire(new Instruction)
instr.addr := io.inst(i).bits.addr
instr.inst := io.inst(i).bits.inst
instr.idx := MuxCase(noWriteRegIdx, Seq(
floatValid -> (floatAddr +& floatRegisterBase),
(scalarValid && scalarAddr =/= 0.U) -> scalarAddr,
))
instr
}
val insts = (0 until p.instructionLanes).map(x => WireInstruction(x))
val instsWithWriteFired = PopCount(io.inst.map(_.fire))
instBuffer.io.enqValid := instsWithWriteFired
instBuffer.io.flush := false.B
io.nSpace := instBuffer.io.nSpace
for (i <- 0 until p.instructionLanes) {
instBuffer.io.enqData(i) := insts(i)
}
for (i <- p.instructionLanes until bufferSize) {
instBuffer.io.enqData(i) := 0.U.asTypeOf(instBuffer.io.enqData(i))
}
// Maintain a re-order buffer of instruction completion result.
// The order and alignment of these buffers should correspond to the
// output of `instBuffer`.
val resultBuffer = RegInit(VecInit(Seq.fill(bufferSize)(MakeInvalid(UInt(32.W)))))
// Compute update based on register writeback.
// Note: The shift when committing instructions will be handled in a later block.
val resultUpdate = Wire(Vec(bufferSize, Valid(UInt(32.W))))
for (i <- 0 until bufferSize) {
val bufferEntry = instBuffer.io.dataOut(i)
// Check which incoming (scalar,float) write port matches this entry's needed address.
val scalarWriteIdxMap = io.writeDataScalar.map(
x => x.valid && (x.bits.addr === bufferEntry.idx))
val floatWriteIdxMap = io.writeDataFloat.map(y => y.map(
x => x.valid && ((x.bits.addr +& floatRegisterBase) ===
bufferEntry.idx))).getOrElse(Seq(false.B))
// Check if this entry is an operation that doesn't require a register write (e.g., a store).
val nonWritingInstr = bufferEntry.idx === noWriteRegIdx
// The entry is active if it's validly enqueued and not already complete.
val validBufferEntry = (i.U < instBuffer.io.nEnqueued) && (!resultBuffer(i).valid)
// If the entry is active and its data dependency is met (or it has no dependency)...
val updated = (validBufferEntry && (scalarWriteIdxMap.reduce(_|_) || floatWriteIdxMap.reduce(_|_) || nonWritingInstr))
// Find the index of the first write port that provides the needed data.
val scalarWriteIdx = PriorityEncoder(scalarWriteIdxMap)
val floatWriteIdx = PriorityEncoder(floatWriteIdxMap)
// Select the actual data from the winning write port.
val writeDataScalar = io.writeDataScalar(scalarWriteIdx).bits.data
val writeDataFloat = io.writeDataFloat.map(x => x(floatWriteIdx).bits.data).getOrElse(0.U)
// If updated, mark this buffer entry as complete for the next cycle.
resultUpdate(i).valid := Mux(updated, true.B, resultBuffer(i).valid) // true.B
// Select the correct write-back data to store, if updated (FP has priority).
resultUpdate(i).bits := Mux(updated, MuxCase(0.U, Seq(
floatWriteIdxMap.reduce(_|_) -> writeDataFloat,
scalarWriteIdxMap.reduce(_|_) -> writeDataScalar,
)), resultBuffer(i).bits)
}
val deqReady = Cto(VecInit(resultUpdate.map(_.valid)).asUInt)
instBuffer.io.deqReady := deqReady
resultBuffer := ShiftVectorRight(resultUpdate, deqReady)
for (i <- 0 until bufferSize) {
val valid = (i.U < instBuffer.io.deqReady)
io.debug.inst(i).valid := valid
io.debug.inst(i).bits.pc := MuxOR(valid, instBuffer.io.dataOut(i).addr)
io.debug.inst(i).bits.inst := MuxOR(valid, instBuffer.io.dataOut(i).inst)
io.debug.inst(i).bits.idx := MuxOR(valid, instBuffer.io.dataOut(i).idx)
io.debug.inst(i).bits.data := MuxOR(valid, resultUpdate(i).bits)
}
}