refactor(hdl): Make CoreAxiCSR bus-width agnostic This change refactors the CoreAxiCSR module to be bus-width agnostic. This is a precursor to adding TileLink support, which has a different bus width than AXI. Change-Id: I83424b4b587a5bd6833d9a5e9db862a613af2210
diff --git a/hdl/chisel/src/kelvin/CoreAxiCSR.scala b/hdl/chisel/src/kelvin/CoreAxiCSR.scala index 3aac4a8..9639090 100644 --- a/hdl/chisel/src/kelvin/CoreAxiCSR.scala +++ b/hdl/chisel/src/kelvin/CoreAxiCSR.scala
@@ -49,6 +49,8 @@ val resetReg = RegInit(3.U(p.fetchAddrBits.W)) val pcStartReg = RegInit(0.U(p.fetchAddrBits.W)) val statusReg = RegInit(0.U(p.fetchAddrBits.W)) + + // Debug module registers, conditionally present. val debugReqAddrReg = Option.when(p.useDebugModule)(RegInit(0.U(32.W))) val debugReqDataReg = Option.when(p.useDebugModule)(RegInit(0.U(32.W))) val debugReqOpReg = Option.when(p.useDebugModule)(RegInit(DmReqOp.NOP.asUInt)) @@ -57,20 +59,25 @@ val writeAddr = io.fabric.writeDataAddr.bits val writeData = io.fabric.writeDataBits + // Debug module handling logic. val rsp_queue = if (p.useDebugModule) { + // Queue for debug responses. val queue = Module(new Queue(new DebugModuleRspIO(p), 1)) queue.io.enq <> io.debug.get.rsp + // Pulse valid signal for a single cycle on a write to the op register. val req_valid_pulse = RegInit(false.B) val write_to_op_reg = writeEn && writeAddr === CoreCsrAddrs.DbgReqOp req_valid_pulse := Mux(write_to_op_reg && io.debug.get.req.ready, true.B, false.B) io.debug.get.req.valid := req_valid_pulse + // Wire up debug request signals. io.debug.get.req.bits.address := debugReqAddrReg.get io.debug.get.req.bits.data := debugReqDataReg.get val (req_op, req_op_valid) = DmReqOp.safe(debugReqOpReg.get) io.debug.get.req.bits.op := Mux(req_op_valid, req_op, DmReqOp.NOP) + // Dequeue from the response queue when the status register is written to. val write_to_status_reg = writeEn && writeAddr === CoreCsrAddrs.DbgStatus queue.io.deq.ready := write_to_status_reg Some(queue) @@ -78,53 +85,74 @@ None } + val readAddr = io.fabric.readDataAddr.bits + // Align the read address to the AXI data bus width. + val alignedAddr = readAddr & ~((p.axi2DataBytes - 1).U(readAddr.getWidth.W)) + + val kRegWidthBits = 32 + val kRegWidthBytes = kRegWidthBits / 8 + val kCsrBaseAddr = 0x100 + + val regsPerBus = p.axi2DataBits / kRegWidthBits + val readData = Wire(Vec(regsPerBus, UInt(kRegWidthBits.W))) + for (i <- 0 until regsPerBus) { + readData(i) := 0.U + } + + // Map of core control registers. + val coreRegMap = Map( + 0x0 -> resetReg, + 0x4 -> pcStartReg, + 0x8 -> statusReg, + ) + + // Map of Kelvin's internal CSRs. + val csrRegs = io.kelvin_csr.value + val csrRegMap = (0 until p.csrOutCount).map { i => + (kCsrBaseAddr + i * kRegWidthBytes) -> csrRegs(i) + }.toMap + + // Map of debug registers, conditionally present. val debugReadMap = if (p.useDebugModule) { val debugStatusReg = Cat(rsp_queue.get.io.deq.valid, io.debug.get.req.ready) - Seq( - CoreCsrAddrs.DbgReqAddr -> Cat(0.U(96.W), debugReqAddrReg.get), - CoreCsrAddrs.DbgReqData -> Cat(0.U(64.W), debugReqDataReg.get, 0.U(32.W)), - CoreCsrAddrs.DbgReqOp -> Cat(0.U(32.W), debugReqOpReg.get, 0.U(64.W)), - CoreCsrAddrs.DbgRspData -> Cat(rsp_queue.get.io.deq.bits.data, 0.U(96.W)), - CoreCsrAddrs.DbgRspOp -> Cat(0.U(96.W), rsp_queue.get.io.deq.bits.op.asUInt), - CoreCsrAddrs.DbgStatus -> Cat(0.U(64.W), debugStatusReg, 0.U(32.W)), + val regs = Seq( + CoreCsrAddrs.DbgReqAddr -> debugReqAddrReg.get, + CoreCsrAddrs.DbgReqData -> debugReqDataReg.get, + CoreCsrAddrs.DbgReqOp -> debugReqOpReg.get, + CoreCsrAddrs.DbgRspData -> rsp_queue.get.io.deq.bits.data, + CoreCsrAddrs.DbgRspOp -> rsp_queue.get.io.deq.bits.op.asUInt, + CoreCsrAddrs.DbgStatus -> debugStatusReg, ) + regs.map { case (k, v) => k.litValue.toInt -> v }.toMap } else { - Seq() + Map[Int, Data]() } - val readData = - MuxLookup(io.fabric.readDataAddr.bits, 0.U)(Seq( - 0x0.U -> Cat(0.U(96.W), resetReg), - 0x4.U -> Cat(0.U(64.W), pcStartReg, 0.U(32.W)), - 0x8.U -> Cat(0.U(32.W), statusReg, 0.U(64.W)), - ) ++ debugReadMap - ++ ((0 until p.csrOutCount).map( - x => ((0x100 + 4*x).U -> (io.kelvin_csr.value(x) << (32 * (x % 4)).U)) - ))) + // Combine all register maps. + val allReadRegs = coreRegMap ++ csrRegMap ++ debugReadMap - val debugReadValidMap = if (p.useDebugModule) { - Seq( - CoreCsrAddrs.DbgReqAddr -> true.B, - CoreCsrAddrs.DbgReqData -> true.B, - CoreCsrAddrs.DbgReqOp -> true.B, - CoreCsrAddrs.DbgRspData -> true.B, - CoreCsrAddrs.DbgRspOp -> true.B, - CoreCsrAddrs.DbgStatus -> true.B, - ) - } else { - Seq() + // Group registers by their aligned base address to prevent multiple writers. + val groupedRegs = allReadRegs.groupBy { case (offset, _) => + offset & ~(p.axi2DataBytes - 1) } - val readDataValid = - MuxLookup(io.fabric.readDataAddr.bits, false.B)(Seq( - 0x0.U -> true.B, - 0x4.U -> true.B, - 0x8.U -> true.B, - ) ++ debugReadValidMap - ++ ((0 until p.csrOutCount).map(x => ((0x100 + 4*x).U -> true.B)))) + // Generate read logic for all registers. + for ((base, regs) <- groupedRegs) { + when(alignedAddr === base.U) { + for ((offset, reg) <- regs) { + // Place the register value into the correct 32-bit lane of the output bus. + readData((offset % p.axi2DataBytes) / kRegWidthBytes) := reg + } + } + } - // Delay reads by one cycle - val readDataNext = Pipe(readDataValid, readData, 1) + // A read is valid if it hits any of the registers in our map. + val readDataValid = MuxLookup(readAddr, false.B)( + allReadRegs.keys.map(addr => (addr.U -> true.B)).toSeq + ) + + // Delay reads by one cycle for timing. + val readDataNext = Pipe(readDataValid, readData.asUInt, 1) io.fabric.readData := readDataNext io.reset := resetReg(0) @@ -132,7 +160,7 @@ io.pcStart := pcStartReg statusReg := Cat(io.fault, io.halted) - // Register writes + // Register write logic. resetReg := Mux(writeEn && writeAddr === 0x0.U, writeData(31,0), resetReg) pcStartReg := Mux(writeEn && writeAddr === 0x4.U, writeData(63,32), pcStartReg) if (p.useDebugModule) { @@ -141,21 +169,26 @@ debugReqOpReg.get := Mux(writeEn && writeAddr === CoreCsrAddrs.DbgReqOp, writeData(95,64), debugReqOpReg.get) } + // Map of valid write addresses for the debug module. val debugWriteValidMap = if (p.useDebugModule) { - Seq( - CoreCsrAddrs.DbgReqAddr -> true.B, - CoreCsrAddrs.DbgReqData -> true.B, - CoreCsrAddrs.DbgReqOp -> true.B, - CoreCsrAddrs.DbgStatus -> true.B, + Map( + CoreCsrAddrs.DbgReqAddr.litValue.toInt -> true.B, + CoreCsrAddrs.DbgReqData.litValue.toInt -> true.B, + CoreCsrAddrs.DbgReqOp.litValue.toInt -> true.B, + CoreCsrAddrs.DbgStatus.litValue.toInt -> true.B, ) } else { - Seq() + Map[Int, Bool]() } - io.fabric.writeResp := writeEn && MuxLookup(writeAddr, false.B)(Seq( - 0x0.U -> true.B, - 0x4.U -> true.B, - ) ++ debugWriteValidMap) + val allWriteRegs = Map( + 0x0 -> true.B, + 0x4 -> true.B, + ) ++ debugWriteValidMap + + io.fabric.writeResp := writeEn && MuxLookup(writeAddr, false.B)( + allWriteRegs.map { case (k, v) => k.U -> v }.toSeq + ) } class CoreAxiCSR(p: Parameters,
diff --git a/hdl/chisel/src/kelvin/CoreAxiCSRTest.scala b/hdl/chisel/src/kelvin/CoreAxiCSRTest.scala index b2b187a..8715c09 100644 --- a/hdl/chisel/src/kelvin/CoreAxiCSRTest.scala +++ b/hdl/chisel/src/kelvin/CoreAxiCSRTest.scala
@@ -126,7 +126,7 @@ while (dut.io.axi.read.data.valid.peek().litValue != 1) { dut.clock.step() } - dut.io.axi.read.data.bits.data.expect(BigInt(0x20000000) << 32) + assert((dut.io.axi.read.data.bits.data.peek().litValue >> 32) == 0x20000000) dut.io.axi.read.data.bits.last.expect(1) dut.io.axi.read.data.bits.resp.expect(0)