blob: d9cc36e8db2f52507caef778f9767b06d0d35b07 [file] [log] [blame]
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package kelvin
import chisel3._
import chisel3.util._
import common._
object VCore {
def apply(p: Parameters): VCore = {
return Module(new VCore(p))
}
}
class VCoreIO(p: Parameters) extends Bundle {
// Decode cycle.
val vinst = Vec(p.instructionLanes, Flipped(Decoupled(new VInstCmd)))
// Execute cycle.
val rs = Vec(p.instructionLanes * 2, Flipped(new RegfileReadDataIO))
val rd = Vec(p.instructionLanes, Flipped(new RegfileWriteDataIO))
// Status.
val mactive = Output(Bool())
// Faults.
val undef = Output(Bool())
val vrfwriteCount = Output(UInt(3.W))
val vstoreCount = Output(UInt(2.W))
}
class VCore(p: Parameters) extends Module {
val io = IO(new Bundle {
// Score <> VCore
val score = new VCoreIO(p)
// Data bus interface.
val dbus = new DBusIO(p)
val last = Output(Bool())
// AXI interface.
val ld = new AxiMasterReadIO(p.axi2AddrBits, p.axi2DataBits, p.axi2IdBits)
val st = new AxiMasterWriteIO(p.axi2AddrBits, p.axi2DataBits, p.axi2IdBits)
})
// Decode : VInst.in
// Execute+0 : VInst.slice
// Execute+1 : VInst.out <> VDec::Fifo.in
// Execute+2 : VDec::Fifo.out <> VDec::Shuffle.in
// Execute+3 : VDec::Shuffle.out <> VCmdq::Fifo.in
// Execute+4 : VCmdq::Fifo.out <> VCmdq::Reg.in
// Execute+5 : VCmdq::Reg.out <> {VLdSt, VAlu, ...}
val vinst = VInst(p)
val vdec = VDecode(p)
val valu = VAlu(p)
val vconv = VConvCtrl(p)
val vldst = VLdSt(p)
val vld = VLd(p)
val vst = VSt(p)
val vrf = VRegfile(p)
io.score.vrfwriteCount := vrf.io.vrfwriteCount
io.score.vstoreCount := vst.io.vstoreCount + vldst.io.vstoreCount
vinst.io.in <> io.score.vinst
vinst.io.rs <> io.score.rs
vinst.io.rd <> io.score.rd
assert(PopCount(Cat(vst.io.read.valid && vst.io.read.ready,
vldst.io.read.valid && vldst.io.read.ready)) <= 1.U)
// ---------------------------------------------------------------------------
// VDecode.
vdec.io.vrfsb <> vrf.io.vrfsb
vdec.io.active := valu.io.active | vconv.io.active | vldst.io.active | vst.io.active
vdec.io.in.valid := vinst.io.out.valid
vinst.io.out.ready := vdec.io.in.ready
assert(!(vdec.io.in.valid && !vdec.io.in.ready))
vinst.io.out.stall := vdec.io.stall // decode backpressure
for (i <- 0 until p.instructionLanes) {
vdec.io.in.bits(i) := vinst.io.out.lane(i)
}
io.score.undef := vdec.io.undef
// ---------------------------------------------------------------------------
// VRegfile.
for (i <- 0 until vrf.readPorts) {
vrf.io.read(i).valid := false.B
vrf.io.read(i).addr := 0.U
vrf.io.read(i).tag := 0.U
}
for (i <- 0 until vrf.writePorts) {
vrf.io.write(i).valid := false.B
vrf.io.write(i).addr := 0.U
vrf.io.write(i).data := 0.U
}
for (i <- 0 until vrf.whintPorts) {
vrf.io.whint(i).valid := false.B
vrf.io.whint(i).addr := 0.U
}
for (i <- 0 until vrf.scalarPorts) {
vrf.io.scalar(i).valid := false.B
vrf.io.scalar(i).data := 0.U
}
vrf.io.transpose.valid := false.B
vrf.io.transpose.index := 0.U
vrf.io.transpose.addr := 0.U
// ---------------------------------------------------------------------------
// VALU.
val aluvalid = (0 until p.instructionLanes).map(x => vdec.io.out(x).valid && vdec.io.cmdq(x).alu)
val aluready = (0 until p.instructionLanes).map(x => valu.io.in.ready && vdec.io.cmdq(x).alu)
valu.io.in.valid := aluvalid.reduce(_ || _)
for (i <- 0 until p.instructionLanes) {
valu.io.in.bits(i).valid := aluvalid(i)
valu.io.in.bits(i).bits := vdec.io.out(i).bits
}
for (i <- 0 until vrf.readPorts) {
vrf.io.read(i).valid := valu.io.read(i).valid
vrf.io.read(i).addr := valu.io.read(i).addr
vrf.io.read(i).tag := valu.io.read(i).tag
}
for (i <- 0 until vrf.readPorts) {
valu.io.read(i).data := vrf.io.read(i).data
}
for (i <- 0 until vrf.writePorts - 2) {
vrf.io.write(i).valid := valu.io.write(i).valid
vrf.io.write(i).addr := valu.io.write(i).addr
vrf.io.write(i).data := valu.io.write(i).data
}
for (i <- 0 until vrf.whintPorts) {
vrf.io.whint(i).valid := valu.io.whint(i).valid
vrf.io.whint(i).addr := valu.io.whint(i).addr
}
for (i <- 0 until vrf.scalarPorts) {
vrf.io.scalar(i).valid := valu.io.scalar(i).valid
vrf.io.scalar(i).data := valu.io.scalar(i).data
}
valu.io.vrfsb := vrf.io.vrfsb.data
// ---------------------------------------------------------------------------
// VCONV.
val convvalid = (0 until p.instructionLanes).map(x => vdec.io.out(x).valid && vdec.io.cmdq(x).conv)
val convready = (0 until p.instructionLanes).map(x => vconv.io.in.ready && vdec.io.cmdq(x).conv)
vconv.io.in.valid := convvalid.reduce(_ || _)
for (i <- 0 until p.instructionLanes) {
vconv.io.in.bits(i).valid := convvalid(i)
vconv.io.in.bits(i).bits := vdec.io.out(i).bits
}
vrf.io.conv := vconv.io.out
vconv.io.vrfsb := vrf.io.vrfsb.data
// ---------------------------------------------------------------------------
// VLdSt.
val ldstvalid = (0 until p.instructionLanes).map(x => vdec.io.out(x).valid && vdec.io.cmdq(x).ldst)
val ldstready = (0 until p.instructionLanes).map(x => vldst.io.in.ready && vdec.io.cmdq(x).ldst)
vldst.io.in.valid := ldstvalid.reduce(_ || _)
for (i <- 0 until p.instructionLanes) {
vldst.io.in.bits(i).valid := ldstvalid(i)
vldst.io.in.bits(i).bits := vdec.io.out(i).bits
}
vldst.io.read.ready := !vst.io.read.valid
vldst.io.read.data := vrf.io.read(vrf.readPorts - 1).data
vldst.io.vrfsb := vrf.io.vrfsb.data
io.dbus <> vldst.io.dbus
io.last := vldst.io.last
// ---------------------------------------------------------------------------
// VLd.
val ldvalid = (0 until p.instructionLanes).map(x => vdec.io.cmdq(x).ld && vdec.io.out(x).valid)
val ldready = (0 until p.instructionLanes).map(x => vdec.io.cmdq(x).ld && vld.io.in.ready)
vld.io.in.valid := ldvalid.reduce(_ || _)
for (i <- 0 until p.instructionLanes) {
vld.io.in.bits(i).valid := ldvalid(i)
vld.io.in.bits(i).bits := vdec.io.out(i).bits
}
io.ld <> vld.io.axi
// ---------------------------------------------------------------------------
// VSt.
val stvalid = (0 until p.instructionLanes).map(x => vdec.io.out(x).valid && vdec.io.cmdq(x).st)
val stready = (0 until p.instructionLanes).map(x => vst.io.in.ready && vdec.io.cmdq(x).st)
vst.io.in.valid := stvalid.reduce(_ || _)
for (i <- 0 until p.instructionLanes) {
vst.io.in.bits(i).valid := stvalid(i)
vst.io.in.bits(i).bits := vdec.io.out(i).bits
}
io.st <> vst.io.axi
vst.io.vrfsb := vrf.io.vrfsb.data
vst.io.read.ready := true.B
vst.io.read.data := vrf.io.read(vrf.readPorts - 1).data
// ---------------------------------------------------------------------------
// Load write.
vrf.io.write(vrf.readPorts - 3).valid := vldst.io.write.valid
vrf.io.write(vrf.readPorts - 3).addr := vldst.io.write.addr
vrf.io.write(vrf.readPorts - 3).data := vldst.io.write.data
vrf.io.write(vrf.readPorts - 2).valid := vld.io.write.valid
vrf.io.write(vrf.readPorts - 2).addr := vld.io.write.addr
vrf.io.write(vrf.readPorts - 2).data := vld.io.write.data
// ---------------------------------------------------------------------------
// Store read.
vrf.io.read(vrf.readPorts - 1).valid := vst.io.read.valid || vldst.io.read.valid
vrf.io.read(vrf.readPorts - 1).addr := Mux(vst.io.read.valid, vst.io.read.addr,
vldst.io.read.addr)
vrf.io.read(vrf.readPorts - 1).tag := Mux(vst.io.read.valid, vst.io.read.tag,
vldst.io.read.tag)
// ---------------------------------------------------------------------------
// VDecode.
for (i <- 0 until p.instructionLanes) {
vdec.io.out(i).ready := aluready(i) || convready(i) || ldstready(i) ||
ldready(i) || stready(i)
}
// ---------------------------------------------------------------------------
// Memory active status.
io.score.mactive := vinst.io.nempty || vdec.io.nempty ||
vld.io.nempty || vst.io.nempty
}