blob: 55cc19c29edfef5b4b7c6acdd2be44704bb42455 [file] [log] [blame]
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package kelvin
import chisel3._
import chisel3.util._
import common._
object L1ICache {
def apply(p: Parameters): L1ICache = {
return Module(new L1ICache(p))
}
}
class L1ICache(p: Parameters) extends Module {
// A relatively simple cache block. Only one transaction may post at a time.
// 2^8 * 256 / 8 = 8KiB 4-way Tag[31,12] + Index[11,6] + Data[5,0]
assert(p.axi0IdBits == 4)
assert(p.axi0DataBits == 256)
val slots = p.l1islots
val slotBits = log2Ceil(slots)
val assoc = 4 // 2, 4, 8, 16, slots
val sets = slots / assoc
val setLsb = log2Ceil(p.fetchDataBits / 8)
val setMsb = log2Ceil(sets) + setLsb - 1
val tagLsb = setMsb + 1
val tagMsb = 31
val io = IO(new Bundle {
val ibus = Flipped(new IBusIO(p))
val flush = Flipped(new IFlushIO(p))
val axi = new Bundle {
val read = new AxiMasterReadIO(p.axi0AddrBits, p.axi0DataBits, p.axi0IdBits)
}
val volt_sel = Input(Bool())
})
assert(assoc == 2 || assoc == 4 || assoc == 8 || assoc == 16 || assoc == slots)
assert(assoc != 2 || setLsb == 5 && setMsb == 11 && tagLsb == 12)
assert(assoc != 4 || setLsb == 5 && setMsb == 10 && tagLsb == 11)
assert(assoc != 8 || setLsb == 5 && setMsb == 9 && tagLsb == 10)
assert(assoc != 16 || setLsb == 5 && setMsb == 8 && tagLsb == 9)
assert(assoc != slots || tagLsb == 5)
class Sram_1rw_256x256 extends BlackBox {
val io = IO(new Bundle {
val clock = Input(Clock())
val valid = Input(Bool())
val write = Input(Bool())
val addr = Input(UInt(slotBits.W))
val wdata = Input(UInt(p.axi0DataBits.W))
val rdata = Output(UInt(p.axi0DataBits.W))
val volt_sel = Input(Bool())
})
}
// ---------------------------------------------------------------------------
// CAM state.
val valid = RegInit(VecInit(Seq.fill(slots)(false.B)))
val camaddr = Reg(Vec(slots, UInt(32.W)))
// val mem = Mem1RW(slots, UInt(p.axi0DataBits.W))
val mem = Module(new Sram_1rw_256x256())
val history = Reg(Vec(slots / assoc, Vec(assoc, UInt(log2Ceil(assoc).W))))
val matchSet = Wire(Vec(slots, Bool()))
val matchAddr = Wire(Vec(assoc, Bool()))
val matchSlotB = Wire(Vec(slots, Bool()))
val matchSlot = matchSlotB.asUInt
val replaceSlotB = Wire(Vec(slots, Bool()))
val replaceSlot = replaceSlotB.asUInt
// OR mux lookup of associative entries.
def camaddrRead(i: Int, value: UInt = 0.U(32.W)): UInt = {
if (i < slots) {
camaddrRead(i + assoc, value | MuxOR(matchSet(i), camaddr(i)))
} else {
value
}
}
for (i <- 0 until assoc) {
val ca = camaddrRead(i)
matchAddr(i) := io.ibus.addr(tagMsb, tagLsb) === ca(tagMsb, tagLsb)
}
for (i <- 0 until slots) {
val set = i / assoc
val setMatch = if (assoc == slots) true.B else io.ibus.addr(setMsb, setLsb) === set.U
matchSet(i) := setMatch
}
for (i <- 0 until slots) {
val set = i / assoc
val index = i % assoc
matchSlotB(i) := valid(i) && matchSet(i) && matchAddr(index)
val historyMatch = history(set)(index) === 0.U
replaceSlotB(i) := matchSet(i) && historyMatch
assert((i - set * assoc) == index)
}
assert(PopCount(matchSlot) <= 1.U)
assert(PopCount(replaceSlot) <= 1.U)
val found = io.ibus.valid && matchSlot =/= 0.U
val replaceNum = Wire(Vec(slots, UInt(slotBits.W)))
for (i <- 0 until slots) {
replaceNum(i) := MuxOR(replaceSlot(i), i.U)
}
val replaceId = VecOR(replaceNum, slots)
assert(replaceId.getWidth == slotBits)
val readNum = Wire(Vec(slots, UInt(slotBits.W)))
for (i <- 0 until slots) {
readNum(i) := MuxOR(matchSlotB(i), i.U)
}
val readId = VecOR(readNum, slots)
for (i <- 0 until slots / assoc) {
// Get the matched value from the OneHot encoding of the set.
val matchSet = matchSlot((i + 1) * assoc - 1, i * assoc)
assert(PopCount(matchSet) <= 1.U)
val matchIndices = Wire(Vec(assoc, UInt(log2Ceil(assoc).W)))
for (j <- 0 until assoc) {
matchIndices(j) := MuxOR(matchSet(j), j.U)
}
val matchIndex = VecOR(matchIndices, assoc)
assert(matchIndex.getWidth == log2Ceil(assoc))
val matchValue = history(i)(matchIndex)
// History based on count values so that high set size has less DFF usage.
when (io.ibus.valid && io.ibus.ready && (if (assoc == slots) true.B else io.ibus.addr(setMsb, setLsb) === i.U)) {
for (j <- 0 until assoc) {
when (matchSet(j)) {
history(i)(j) := (assoc - 1).U
} .elsewhen (history(i)(j) > matchValue) {
history(i)(j) := history(i)(j) - 1.U
assert(history(i)(j) > 0.U)
}
}
}
}
// Reset history to unique values within sets.
// Must be placed below all other assignments.
// Note the definition is Reg() so will generate an asynchronous reset.
when (reset.asBool) {
for (i <- 0 until slots / assoc) {
for (j <- 0 until assoc) {
history(i)(j) := j.U
}
}
}
// These checks are extremely slow to compile.
if (false) {
for (i <- 0 until slots / assoc) {
for (j <- 0 until assoc) {
for (k <- 0 until assoc) {
if (j != k) {
assert(history(i)(j) =/= history(i)(k))
}
}
}
}
}
// ---------------------------------------------------------------------------
// Core Instruction Bus.
io.ibus.ready := found
io.ibus.rdata := mem.io.rdata
// ---------------------------------------------------------------------------
// axi interface.
val axivalid = RegInit(false.B) // io.axi.read.addr.valid
val axiready = RegInit(false.B) // io.axi.read.data.ready
val axiaddr = Reg(UInt(32.W))
val replaceIdReg = Reg(UInt(slotBits.W))
when (io.ibus.valid && !io.ibus.ready && !axivalid && !axiready) {
replaceIdReg := replaceId
}
when (io.axi.read.addr.valid && io.axi.read.addr.ready) {
axivalid := false.B
} .elsewhen (io.ibus.valid && !io.ibus.ready && !axivalid && !axiready) {
axivalid := true.B
}
when (io.axi.read.data.valid && io.axi.read.data.ready) {
axiready := false.B
} .elsewhen (io.axi.read.addr.valid && io.axi.read.addr.ready && !axiready) {
axiready := true.B
}
when (io.flush.valid) {
for (i <- 0 until slots) {
valid(i) := false.B
}
} .elsewhen (io.ibus.valid && !io.ibus.ready && !axivalid && !axiready) {
valid(replaceId) := false.B
} .elsewhen (io.axi.read.data.valid && io.axi.read.data.ready) {
valid(replaceIdReg) := true.B
}
when (io.ibus.valid && !io.ibus.ready && !axivalid && !axiready) {
val alignedAddr = Cat(io.ibus.addr(31, setLsb), 0.U(setLsb.W))
axiaddr := alignedAddr
camaddr(replaceId) := alignedAddr
} .elsewhen (io.axi.read.addr.valid && io.axi.read.addr.ready) {
axiaddr := axiaddr + (p.axi0DataBits / 8).U
}
io.axi.read.defaults()
io.axi.read.addr.valid := axivalid
io.axi.read.addr.bits.addr := axiaddr
io.axi.read.addr.bits.id := 0.U
io.axi.read.data.ready := axiready
io.flush.ready := true.B
// IBus transaction must latch until completion.
val addrLatchActive = RegInit(false.B)
val addrLatchData = Reg(UInt(32.W))
when (io.flush.valid) {
addrLatchActive := false.B
} .elsewhen (io.ibus.valid && !io.ibus.ready && !addrLatchActive) {
addrLatchActive := true.B
addrLatchData := io.ibus.addr
} .elsewhen (addrLatchActive && io.ibus.ready) {
addrLatchActive := false.B
}
assert(!(addrLatchActive && !io.ibus.valid))
assert(!(addrLatchActive && addrLatchData =/= io.ibus.addr))
// ---------------------------------------------------------------------------
// Memory controls.
val memwrite = io.axi.read.data.valid && io.axi.read.data.ready
val memread = io.ibus.valid && !axivalid && !axiready
mem.io.clock := clock
mem.io.valid := memread || memwrite
mem.io.write := axiready
mem.io.addr := Mux(axiready, replaceIdReg, readId)
mem.io.wdata := io.axi.read.data.bits.data
mem.io.volt_sel := io.volt_sel
}
object EmitL1ICache extends App {
val p = new Parameters
(new chisel3.stage.ChiselStage).emitVerilog(new L1ICache(p), args)
}