blob: 0c51a55e9b177526adc964c3cad52cb25d7f590e [file] [log] [blame]
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Scalar Core Frontend
package kelvin
import chisel3._
import chisel3.util._
import common._
import _root_.circt.stage.ChiselStage
object SCore {
def apply(p: Parameters): SCore = {
return Module(new SCore(p))
}
}
class SCore(p: Parameters) extends Module {
val io = IO(new Bundle {
val csr = new CsrInOutIO(p)
val halted = Output(Bool())
val fault = Output(Bool())
val ibus = new IBusIO(p)
val dbus = new DBusIO(p)
val ubus = new DBusIO(p)
val vldst = Output(Bool())
val vcore = Flipped(new VCoreIO(p))
val iflush = new IFlushIO(p)
val dflush = new DFlushIO(p)
val slog = new SLogIO(p)
val debug = new DebugIO(p)
})
// The functional units that make up the core.
val regfile = Regfile(p)
val fetch = Fetch(p)
val decode = (0 until p.instructionLanes).map(x => Seq(Decode(p, x))).reduce(_ ++ _)
val alu = Seq.fill(p.instructionLanes)(Alu(p))
val bru = Seq.fill(p.instructionLanes)(Bru(p))
val csr = Csr(p)
val lsu = Lsu(p)
val mlu = Mlu(p)
val dvu = Dvu(p)
// Wire up the core.
val branchTaken = bru.map(x => x.io.taken.valid).reduce(_||_)
// ---------------------------------------------------------------------------
// IFlush
val iflush = RegInit(false.B)
when (bru(0).io.iflush) {
iflush := true.B
} .elsewhen (fetch.io.iflush.ready && io.iflush.ready &&
lsu.io.flush.ready && lsu.io.flush.fencei) {
iflush := false.B
}
io.dflush.valid := lsu.io.flush.valid
io.dflush.all := lsu.io.flush.all
io.dflush.clean := lsu.io.flush.clean
lsu.io.flush.ready := io.dflush.ready
for (i <- 1 until p.instructionLanes) {
assert(!bru(i).io.iflush)
}
// ---------------------------------------------------------------------------
// Fetch
fetch.io.csr := io.csr.in
for (i <- 0 until p.instructionLanes) {
fetch.io.branch(i) := bru(i).io.taken
}
fetch.io.linkPort := regfile.io.linkPort
fetch.io.iflush.valid := iflush
// ---------------------------------------------------------------------------
// Decode
val mask = VecInit(decode.map(_.io.inst.ready).scan(true.B)(_ && _))
for (i <- 0 until p.instructionLanes) {
decode(i).io.inst.valid := fetch.io.inst.lanes(i).valid && mask(i)
fetch.io.inst.lanes(i).ready := decode(i).io.inst.ready && mask(i)
decode(i).io.inst.addr := fetch.io.inst.lanes(i).addr
decode(i).io.inst.inst := fetch.io.inst.lanes(i).inst
decode(i).io.inst.brchFwd := fetch.io.inst.lanes(i).brchFwd
decode(i).io.branchTaken := branchTaken
decode(i).io.halted := csr.io.halted
}
// Interlock based on regfile write port dependencies.
decode(0).io.interlock := bru(0).io.interlock
for (i <- 1 until p.instructionLanes) {
decode(i).io.interlock := decode(i - 1).io.interlock
}
// Serialize opcodes with only one pipeline.
decode(0).io.serializeIn.defaults()
for (i <- 1 until p.instructionLanes) {
decode(i).io.serializeIn := decode(i - 1).io.serializeOut
}
// In decode update multi-issue scoreboard state.
val scoreboard_spec = decode.map(_.io.scoreboard.spec).scan(0.U)(_|_)
for (i <- 0 until p.instructionLanes) {
decode(i).io.scoreboard.comb := regfile.io.scoreboard.comb | scoreboard_spec(i)
decode(i).io.scoreboard.regd := regfile.io.scoreboard.regd | scoreboard_spec(i)
}
decode(0).io.mactive := io.vcore.mactive
for (i <- 1 until p.instructionLanes) {
decode(i).io.mactive := false.B
}
// ---------------------------------------------------------------------------
// ALU
for (i <- 0 until p.instructionLanes) {
alu(i).io.req := decode(i).io.alu
alu(i).io.rs1 := regfile.io.readData(2 * i + 0)
alu(i).io.rs2 := regfile.io.readData(2 * i + 1)
}
// ---------------------------------------------------------------------------
// Branch Unit
for (i <- 0 until p.instructionLanes) {
bru(i).io.req := decode(i).io.bru
bru(i).io.rs1 := regfile.io.readData(2 * i + 0)
bru(i).io.rs2 := regfile.io.readData(2 * i + 1)
bru(i).io.target := regfile.io.target(i)
}
bru(0).io.csr <> csr.io.bru
for (i <- 1 until p.instructionLanes) {
bru(i).io.csr.defaults()
}
io.iflush.valid := iflush
// Instruction counters
csr.io.counters.rfwriteCount := regfile.io.rfwriteCount
csr.io.counters.storeCount := lsu.io.storeCount
csr.io.counters.branchCount := bru(0).io.taken.valid
csr.io.counters.vrfwriteCount := io.vcore.vrfwriteCount
csr.io.counters.vstoreCount := io.vcore.vstoreCount
// ---------------------------------------------------------------------------
// Control Status Unit
csr.io.csr <> io.csr
csr.io.req <> decode(0).io.csr
csr.io.rs1 := regfile.io.readData(0)
csr.io.vcore.undef := io.vcore.undef
// ---------------------------------------------------------------------------
// Status
io.halted := csr.io.halted
io.fault := csr.io.fault
// ---------------------------------------------------------------------------
// Load/Store Unit
lsu.io.busPort := regfile.io.busPort
lsu.io.req <> decode.map(_.io.lsu)
// ---------------------------------------------------------------------------
// Multiplier Unit
for (i <- 0 until p.instructionLanes) {
mlu.io.req(i) := decode(i).io.mlu
mlu.io.rs1(i) := regfile.io.readData(2 * i)
mlu.io.rs2(i) := regfile.io.readData((2 * i) + 1)
}
// ---------------------------------------------------------------------------
// Divide Unit
dvu.io.req <> decode(0).io.dvu
dvu.io.rs1 := regfile.io.readData(0)
dvu.io.rs2 := regfile.io.readData(1)
dvu.io.rd.ready := !mlu.io.rd.valid
// TODO: make port conditional on pipeline index.
for (i <- 1 until p.instructionLanes) {
decode(i).io.dvu.ready := false.B
}
// ---------------------------------------------------------------------------
// Register File
for (i <- 0 until p.instructionLanes) {
regfile.io.readAddr(2 * i + 0) := decode(i).io.rs1Read
regfile.io.readAddr(2 * i + 1) := decode(i).io.rs2Read
regfile.io.readSet(2 * i + 0) := decode(i).io.rs1Set
regfile.io.readSet(2 * i + 1) := decode(i).io.rs2Set
regfile.io.writeAddr(i) := decode(i).io.rdMark
regfile.io.busAddr(i) := decode(i).io.busRead
val csr0Valid = if (i == 0) csr.io.rd.valid else false.B
val csr0Addr = if (i == 0) csr.io.rd.addr else 0.U
val csr0Data = if (i == 0) csr.io.rd.data else 0.U
regfile.io.writeData(i).valid := csr0Valid ||
alu(i).io.rd.valid || bru(i).io.rd.valid ||
io.vcore.rd(i).valid
regfile.io.writeData(i).addr :=
MuxOR(csr0Valid, csr0Addr) |
MuxOR(alu(i).io.rd.valid, alu(i).io.rd.addr) |
MuxOR(bru(i).io.rd.valid, bru(i).io.rd.addr) |
MuxOR(io.vcore.rd(i).valid, io.vcore.rd(i).addr)
regfile.io.writeData(i).data :=
MuxOR(csr0Valid, csr0Data) |
MuxOR(alu(i).io.rd.valid, alu(i).io.rd.data) |
MuxOR(bru(i).io.rd.valid, bru(i).io.rd.data) |
MuxOR(io.vcore.rd(i).valid, io.vcore.rd(i).data)
assert((csr0Valid +&
alu(i).io.rd.valid +& bru(i).io.rd.valid +&
io.vcore.rd(i).valid) <= 1.U)
}
val mluDvuOffset = p.instructionLanes
regfile.io.writeData(mluDvuOffset).valid := mlu.io.rd.valid || dvu.io.rd.valid
regfile.io.writeData(mluDvuOffset).addr := Mux(mlu.io.rd.valid, mlu.io.rd.addr, dvu.io.rd.addr)
regfile.io.writeData(mluDvuOffset).data := Mux(mlu.io.rd.valid, mlu.io.rd.data, dvu.io.rd.data)
assert(!(mlu.io.rd.valid && (dvu.io.rd.valid && dvu.io.rd.ready))) // TODO: stall dvu on mlu write
val lsuOffset = p.instructionLanes + 1
regfile.io.writeData(lsuOffset).valid := lsu.io.rd.valid
regfile.io.writeData(lsuOffset).addr := lsu.io.rd.addr
regfile.io.writeData(lsuOffset).data := lsu.io.rd.data
val writeMask = bru.map(_.io.taken.valid).scan(false.B)(_||_)
for (i <- 0 until p.instructionLanes) {
regfile.io.writeMask(i).valid := writeMask(i)
}
// ---------------------------------------------------------------------------
// Vector Extension
for (i <- 0 until p.instructionLanes) {
io.vcore.vinst(i) <> decode(i).io.vinst
}
for (i <- 0 until p.instructionLanes * 2) {
io.vcore.rs(i) := regfile.io.readData(i)
}
// ---------------------------------------------------------------------------
// Fetch Bus
io.ibus <> fetch.io.ibus
// ---------------------------------------------------------------------------
// Local Data Bus Port
io.dbus <> lsu.io.dbus
io.ubus <> lsu.io.ubus
io.vldst := lsu.io.vldst
// ---------------------------------------------------------------------------
// Scalar logging interface
val slogValid = RegInit(false.B)
val slogAddr = Reg(UInt(2.W))
val slogEn = decode(0).io.slog
slogValid := slogEn
when (slogEn) {
slogAddr := decode(0).io.inst.inst(14,12)
}
io.slog.valid := slogValid
io.slog.addr := MuxOR(slogValid, slogAddr)
io.slog.data := MuxOR(slogValid, regfile.io.readData(0).data)
// ---------------------------------------------------------------------------
// DEBUG
val cycles = RegInit(0.U(32.W))
cycles := cycles + 1.U
io.debug.cycles := cycles
val debugEn = RegInit(0.U(p.instructionLanes.W))
val debugAddr = Reg(Vec(p.instructionLanes, UInt(32.W)))
val debugInst = Reg(Vec(p.instructionLanes, UInt(32.W)))
val debugBrch = Cat(bru.map(_.io.taken.valid).scanRight(false.B)(_ || _))
debugEn := Cat(fetch.io.inst.lanes.map(x => x.valid && x.ready && !branchTaken))
for (i <- 0 until p.instructionLanes) {
debugAddr(i) := fetch.io.inst.lanes(i).addr
debugInst(i) := fetch.io.inst.lanes(i).inst
}
io.debug.en := debugEn & ~debugBrch
io.debug.addr <> debugAddr
io.debug.inst <> debugInst
}
object EmitSCore extends App {
val p = new Parameters
ChiselStage.emitSystemVerilogFile(new SCore(p), args)
}