Refactor Decoder.
- Create "DecodedInstruction" data type.
- Convert "DecodedInstruction" module into "DecodeInstruction" function.
- Move Alu, Mlu, Csr, Dvu, Lsu and VCore to ChiselEnum.
- Introduce use of move Valid/Decoupled interfaces.
Change-Id: Ia0f1299b17bcf2e0dd4486b94d516297f9f15fb0
diff --git a/hdl/chisel/src/common/Library.scala b/hdl/chisel/src/common/Library.scala
index 3c69d1d..d722e89 100644
--- a/hdl/chisel/src/common/Library.scala
+++ b/hdl/chisel/src/common/Library.scala
@@ -26,3 +26,12 @@
Mux(valid, data, false.B)
}
}
+
+object MakeValid {
+ def apply[T <: Data](valid: Bool, bits: T): ValidIO[T] = {
+ val result = Wire(Valid(chiselTypeOf(bits)))
+ result.valid := valid
+ result.bits := bits
+ result
+ }
+}
\ No newline at end of file
diff --git a/hdl/chisel/src/kelvin/scalar/Alu.scala b/hdl/chisel/src/kelvin/scalar/Alu.scala
index 06818c1..ed63314 100644
--- a/hdl/chisel/src/kelvin/scalar/Alu.scala
+++ b/hdl/chisel/src/kelvin/scalar/Alu.scala
@@ -24,38 +24,36 @@
}
}
-case class AluOp() {
- val ADD = 0
- val SUB = 1
- val SLT = 2
- val SLTU = 3
- val XOR = 4
- val OR = 5
- val AND = 6
- val SLL = 7
- val SRL = 8
- val SRA = 9
- val LUI = 10
- val CLZ = 11
- val CTZ = 12
- val PCNT = 13
- val MIN = 14
- val MINU = 15
- val MAX = 16
- val MAXU = 17
- val Entries = 18
+object AluOp extends ChiselEnum {
+ val ADD = Value
+ val SUB = Value
+ val SLT = Value
+ val SLTU = Value
+ val XOR = Value
+ val OR = Value
+ val AND = Value
+ val SLL = Value
+ val SRL = Value
+ val SRA = Value
+ val LUI = Value
+ val CLZ = Value
+ val CTZ = Value
+ val PCNT = Value
+ val MIN = Value
+ val MINU = Value
+ val MAX = Value
+ val MAXU = Value
}
-class AluIO(p: Parameters) extends Bundle {
- val valid = Input(Bool())
- val addr = Input(UInt(5.W))
- val op = Input(UInt(new AluOp().Entries.W))
+class AluCmd extends Bundle {
+ val addr = UInt(5.W)
+ val op = AluOp()
}
class Alu(p: Parameters) extends Module {
val io = IO(new Bundle {
// Decode cycle.
- val req = new AluIO(p)
+ val req = Flipped(Valid(new AluCmd))
// Execute cycle.
val rs1 = Flipped(new RegfileReadDataIO)
@@ -63,11 +61,9 @@
val rd = Flipped(new RegfileWriteDataIO)
})
- val alu = new AluOp()
-
val valid = RegInit(false.B)
val addr = Reg(UInt(5.W))
- val op = RegInit(0.U(alu.Entries.W))
+ val op = RegInit(AluOp.ADD)
// Pulse the cycle after the decoded request.
valid := io.req.valid
@@ -75,8 +71,8 @@
// Avoid output toggles by not updating state between uses.
// The Regfile has the same behavior, leaving read ports unchanged.
when (io.req.valid) {
- addr := io.req.addr
- op := io.req.op
+ addr := io.req.bits.addr
+ op := io.req.bits.op
}
val rs1 = io.rs1.data
@@ -85,26 +81,29 @@
io.rd.valid := valid
io.rd.addr := addr
- io.rd.data := MuxOR(op(alu.ADD), rs1 + rs2) |
- MuxOR(op(alu.SUB), rs1 - rs2) |
- MuxOR(op(alu.SLT), rs1.asSInt < rs2.asSInt) |
- MuxOR(op(alu.SLTU), rs1 < rs2) |
- MuxOR(op(alu.XOR), rs1 ^ rs2) |
- MuxOR(op(alu.OR), rs1 | rs2) |
- MuxOR(op(alu.AND), rs1 & rs2) |
- MuxOR(op(alu.SLL), rs1 << shamt) |
- MuxOR(op(alu.SRL), rs1 >> shamt) |
- MuxOR(op(alu.SRA), (rs1.asSInt >> shamt).asUInt) |
- MuxOR(op(alu.LUI), rs2) |
- MuxOR(op(alu.CLZ), Clz(rs1)) |
- MuxOR(op(alu.CTZ), Ctz(rs1)) |
- MuxOR(op(alu.PCNT), PopCount(rs1)) |
- MuxOR(op(alu.MIN), Mux(rs1.asSInt < rs2.asSInt, rs1, rs2)) |
- MuxOR(op(alu.MAX), Mux(rs1.asSInt > rs2.asSInt, rs1, rs2)) |
- MuxOR(op(alu.MINU), Mux(rs1 < rs2, rs1, rs2)) |
- MuxOR(op(alu.MAXU), Mux(rs1 > rs2, rs1, rs2))
+
+ io.rd.data := MuxLookup(op, 0.U)(Seq(
+ AluOp.ADD -> (rs1 + rs2),
+ AluOp.SUB -> (rs1 - rs2),
+ AluOp.SLT -> (rs1.asSInt < rs2.asSInt),
+ AluOp.SLTU -> (rs1 < rs2),
+ AluOp.XOR -> (rs1 ^ rs2),
+ AluOp.OR -> (rs1 | rs2),
+ AluOp.AND -> (rs1 & rs2),
+ AluOp.SLL -> (rs1 << shamt),
+ AluOp.SRL -> (rs1 >> shamt),
+ AluOp.SRA -> (rs1.asSInt >> shamt).asUInt,
+ AluOp.LUI -> rs2,
+ AluOp.CLZ -> Clz(rs1),
+ AluOp.CTZ -> Ctz(rs1),
+ AluOp.PCNT -> PopCount(rs1),
+ AluOp.MIN -> Mux(rs1.asSInt < rs2.asSInt, rs1, rs2),
+ AluOp.MAX -> Mux(rs1.asSInt > rs2.asSInt, rs1, rs2),
+ AluOp.MINU -> Mux(rs1 < rs2, rs1, rs2),
+ AluOp.MAXU -> Mux(rs1 > rs2, rs1, rs2)
+ ))
// Assertions.
- assert(!(valid && !io.rs1.valid && !op(alu.LUI)))
+ assert(!(valid && !io.rs1.valid && !op.isOneOf(AluOp.LUI)))
assert(!(valid && !io.rs2.valid))
}
diff --git a/hdl/chisel/src/kelvin/scalar/Csr.scala b/hdl/chisel/src/kelvin/scalar/Csr.scala
index b7c2315..18202d3 100644
--- a/hdl/chisel/src/kelvin/scalar/Csr.scala
+++ b/hdl/chisel/src/kelvin/scalar/Csr.scala
@@ -24,11 +24,10 @@
}
}
-case class CsrOp() {
- val CSRRW = 0
- val CSRRS = 1
- val CSRRC = 2
- val Entries = 3
+object CsrOp extends ChiselEnum {
+ val CSRRW = Value
+ val CSRRS = Value
+ val CSRRC = Value
}
class CsrInIO(p: Parameters) extends Bundle {
@@ -73,11 +72,10 @@
}
}
-class CsrIO(p: Parameters) extends Bundle {
- val valid = Input(Bool())
- val addr = Input(UInt(5.W))
- val index = Input(UInt(12.W))
- val op = Input(UInt(new CsrOp().Entries.W))
+class CsrCmd extends Bundle {
+ val addr = UInt(5.W)
+ val index = UInt(12.W)
+ val op = CsrOp()
}
class Csr(p: Parameters) extends Module {
@@ -86,7 +84,7 @@
val csr = new CsrInOutIO(p)
// Decode cycle.
- val req = new CsrIO(p)
+ val req = Flipped(Valid(new CsrCmd))
// Execute cycle.
val rs1 = Flipped(new RegfileReadDataIO)
@@ -103,12 +101,8 @@
val fault = Output(Bool())
})
- val csr = new CsrOp()
-
- val valid = RegInit(false.B)
- val addr = Reg(UInt(5.W))
- val index = Reg(UInt(12.W))
- val op = RegInit(0.U(csr.Entries.W))
+ // Control registers.
+ val req = Pipe(io.req)
// Pipeline Control.
val halted = RegInit(false.B)
@@ -159,51 +153,38 @@
val fcsr = Cat(frm, fflags)
// Decode the Index.
- val fflagsEn = index === 0x001.U
- val frmEn = index === 0x002.U
- val fcsrEn = index === 0x003.U
- val misaEn = index === 0x301.U
- val mieEn = index === 0x304.U
- val mtvecEn = index === 0x305.U
- val mscratchEn = index === 0x340.U
- val mepcEn = index === 0x341.U
- val mcauseEn = index === 0x342.U
- val mtvalEn = index === 0x343.U
- val mcontext0En = index === 0x7C0.U
- val mcontext1En = index === 0x7C1.U
- val mcontext2En = index === 0x7C2.U
- val mcontext3En = index === 0x7C3.U
- val mcontext4En = index === 0x7C4.U
- val mcontext5En = index === 0x7C5.U
- val mcontext6En = index === 0x7C6.U
- val mcontext7En = index === 0x7C7.U
- val mpcEn = index === 0x7E0.U
- val mspEn = index === 0x7E1.U
+ val fflagsEn = req.bits.index === 0x001.U
+ val frmEn = req.bits.index === 0x002.U
+ val fcsrEn = req.bits.index === 0x003.U
+ val misaEn = req.bits.index === 0x301.U
+ val mieEn = req.bits.index === 0x304.U
+ val mtvecEn = req.bits.index === 0x305.U
+ val mscratchEn = req.bits.index === 0x340.U
+ val mepcEn = req.bits.index === 0x341.U
+ val mcauseEn = req.bits.index === 0x342.U
+ val mtvalEn = req.bits.index === 0x343.U
+ val mcontext0En = req.bits.index === 0x7C0.U
+ val mcontext1En = req.bits.index === 0x7C1.U
+ val mcontext2En = req.bits.index === 0x7C2.U
+ val mcontext3En = req.bits.index === 0x7C3.U
+ val mcontext4En = req.bits.index === 0x7C4.U
+ val mcontext5En = req.bits.index === 0x7C5.U
+ val mcontext6En = req.bits.index === 0x7C6.U
+ val mcontext7En = req.bits.index === 0x7C7.U
+ val mpcEn = req.bits.index === 0x7E0.U
+ val mspEn = req.bits.index === 0x7E1.U
// M-mode performance CSRs.
- val mcycleEn = index === 0xB00.U
- val minstretEn = index === 0xB02.U
- val mcyclehEn = index === 0xB80.U
- val minstrethEn = index === 0xB82.U
+ val mcycleEn = req.bits.index === 0xB00.U
+ val minstretEn = req.bits.index === 0xB02.U
+ val mcyclehEn = req.bits.index === 0xB80.U
+ val minstrethEn = req.bits.index === 0xB82.U
// M-mode information CSRs.
- val mvendoridEn = index === 0xF11.U
- val marchidEn = index === 0xF12.U
- val mimpidEn = index === 0xF13.U
- val mhartidEn = index === 0xF14.U
+ val mvendoridEn = req.bits.index === 0xF11.U
+ val marchidEn = req.bits.index === 0xF12.U
+ val mimpidEn = req.bits.index === 0xF13.U
+ val mhartidEn = req.bits.index === 0xF14.U
// Start of custom CSRs.
- val kisaEn = index === 0xFC0.U
-
- // Control registers.
- when (io.req.valid) {
- valid := io.req.valid
- addr := io.req.addr
- index := io.req.index
- op := io.req.op
- } .elsewhen (valid) {
- valid := false.B
- addr := 0.U
- index := 0.U
- op := 0.U
- }
+ val kisaEn = req.bits.index === 0xFC0.U
// Pipeline Control.
when (io.bru.in.halt || io.vcore.undef) {
@@ -252,11 +233,13 @@
MuxOR(mhartidEn, mhartid) |
MuxOR(kisaEn, kisa)
- val wdata = MuxOR(op(csr.CSRRW), rs1) |
- MuxOR(op(csr.CSRRS), rdata | rs1) |
- MuxOR(op(csr.CSRRC), rdata & ~rs1)
+ val wdata = MuxLookup(req.bits.op, 0.U)(Seq(
+ CsrOp.CSRRW -> rs1,
+ CsrOp.CSRRS -> (rdata | rs1),
+ CsrOp.CSRRC -> (rdata & ~rs1)
+ ))
- when (valid) {
+ when (req.valid) {
when (fflagsEn) { fflags := wdata }
when (frmEn) { frm := wdata }
when (fcsrEn) { fflags := wdata(4,0)
@@ -287,13 +270,13 @@
val mcycle_th = Mux(mcyclehEn, wdata, mcycle(63,32))
val mcycle_tl = Mux(mcycleEn, wdata, mcycle(31,0))
val mcycle_t = Cat(mcycle_th, mcycle_tl)
- mcycle := Mux(valid, mcycle_t, mcycle) + 1.U
+ mcycle := Mux(req.valid, mcycle_t, mcycle) + 1.U
val minstret_th = Mux(minstrethEn, wdata, minstret(63,32))
val minstret_tl = Mux(minstretEn, wdata, minstret(31,0))
val minstret_t = Cat(minstret_th, minstret_tl)
- minstret := Mux(valid, minstret_t, minstret) +
+ minstret := Mux(req.valid, minstret_t, minstret) +
io.counters.rfwriteCount +
io.counters.storeCount +
io.counters.branchCount +
@@ -349,10 +332,10 @@
io.csr.out.value(7) := mcontext3
// Write port.
- io.rd.valid := valid
- io.rd.addr := addr
+ io.rd.valid := req.valid
+ io.rd.addr := req.bits.addr
io.rd.data := rdata
// Assertions.
- assert(!(valid && !io.rs1.valid))
+ assert(!(req.valid && !io.rs1.valid))
}
diff --git a/hdl/chisel/src/kelvin/scalar/Decode.scala b/hdl/chisel/src/kelvin/scalar/Decode.scala
index 75ace28..37a0c86 100644
--- a/hdl/chisel/src/kelvin/scalar/Decode.scala
+++ b/hdl/chisel/src/kelvin/scalar/Decode.scala
@@ -46,6 +46,125 @@
}
}
+class DecodedInstruction extends Bundle {
+ // Immediates
+ val imm12 = UInt(32.W)
+ val imm20 = UInt(32.W)
+ val immjal = UInt(32.W)
+ val immbr = UInt(32.W)
+ val immcsr = UInt(32.W)
+ val immst = UInt(32.W)
+
+ // RV32I
+ val lui = Bool()
+ val auipc = Bool()
+ val jal = Bool()
+ val jalr = Bool()
+ val beq = Bool()
+ val bne = Bool()
+ val blt = Bool()
+ val bge = Bool()
+ val bltu = Bool()
+ val bgeu = Bool()
+ val csrrw = Bool()
+ val csrrs = Bool()
+ val csrrc = Bool()
+ val lb = Bool()
+ val lh = Bool()
+ val lw = Bool()
+ val lbu = Bool()
+ val lhu = Bool()
+ val sb = Bool()
+ val sh = Bool()
+ val sw = Bool()
+ val fence = Bool()
+ val addi = Bool()
+ val slti = Bool()
+ val sltiu = Bool()
+ val xori = Bool()
+ val ori = Bool()
+ val andi = Bool()
+ val slli = Bool()
+ val srli = Bool()
+ val srai = Bool()
+ val add = Bool()
+ val sub = Bool()
+ val slt = Bool()
+ val sltu = Bool()
+ val xor = Bool()
+ val or = Bool()
+ val and = Bool()
+ val sll = Bool()
+ val srl = Bool()
+ val sra = Bool()
+
+ // RV32M
+ val mul = Bool()
+ val mulh = Bool()
+ val mulhsu = Bool()
+ val mulhu = Bool()
+ val mulhr = Bool()
+ val mulhsur = Bool()
+ val mulhur = Bool()
+ val dmulh = Bool()
+ val dmulhr = Bool()
+ val div = Bool()
+ val divu = Bool()
+ val rem = Bool()
+ val remu = Bool()
+
+ // RV32B
+ val clz = Bool()
+ val ctz = Bool()
+ val pcnt = Bool()
+ val min = Bool()
+ val minu = Bool()
+ val max = Bool()
+ val maxu = Bool()
+
+ // Vector instructions.
+ val getvl = Bool()
+ val getmaxvl = Bool()
+ val vld = Bool()
+ val vst = Bool()
+ val viop = Bool()
+
+ // Core controls.
+ val ebreak = Bool()
+ val ecall = Bool()
+ val eexit = Bool()
+ val eyield = Bool()
+ val ectxsw = Bool()
+ val mpause = Bool()
+ val mret = Bool()
+ val undef = Bool()
+
+ // Fences.
+ val fencei = Bool()
+ val flushat = Bool()
+ val flushall = Bool()
+
+ // Scalar logging.
+ val slog = Bool()
+
+ def isAluImm(): Bool = {
+ addi || slti || sltiu || xori || ori || andi || slli || srli || srai
+ }
+ def isAluReg(): Bool = {
+ add || sub || slt || sltu || xor || or || and || sll || srl || sra
+ }
+ def isAlu1Bit(): Bool = { clz || ctz || pcnt }
+ def isAlu2Bit(): Bool = { min || minu || max || maxu }
+ def isCsr(): Bool = { csrrw || csrrs || csrrc }
+ def isCondBr(): Bool = { beq || bne || blt || bge || bltu || bgeu }
+ def isLoad(): Bool = { lb || lh || lw || lbu || lhu }
+ def isStore(): Bool = { sb || sh || sw }
+ def isLsu(): Bool = { isLoad() || isStore() || vld || vst || flushat || flushall }
+ def isMul(): Bool = { mul || mulh || mulhsu || mulhu || mulhr || mulhsur || mulhur || dmulh || dmulhr }
+ def isDvu(): Bool = { div || divu || rem || remu }
+ def isVector(): Bool = { vld || vst || viop || getvl || getmaxvl }
+}
+
class Decode(p: Parameters, pipeline: Int) extends Module {
val io = IO(new Bundle {
// Core controls.
@@ -69,25 +188,25 @@
val busRead = Flipped(new RegfileBusAddrIO)
// ALU interface.
- val alu = Flipped(new AluIO(p))
+ val alu = Valid(new AluCmd)
// Branch interface.
val bru = Flipped(new BruIO(p))
// CSR interface.
- val csr = Flipped(new CsrIO(p))
+ val csr = Valid(new CsrCmd)
// LSU interface.
- val lsu = Flipped(new LsuIO(p))
+ val lsu = Decoupled(new LsuCmd)
// Multiplier interface.
- val mlu = Flipped(new MluIO(p))
+ val mlu = Valid(new MluCmd)
// Divide interface.
- val dvu = Flipped(new DvuIO(p))
+ val dvu = Decoupled(new DvuCmd)
// Vector interface.
- val vinst = Flipped(new VInstIO)
+ val vinst = Decoupled(new VInstCmd)
// Branch status.
val branchTaken = Input(Bool())
@@ -104,11 +223,9 @@
val decodeEn = io.inst.valid && io.inst.ready && !io.branchTaken
// The decode logic.
- val d = Module(new DecodedInstruction(p, pipeline))
- d.io.addr := io.inst.addr
- d.io.inst := io.inst.inst
+ val d = DecodeInstruction(pipeline, io.inst.addr, io.inst.inst)
- val vldst = d.io.vld || d.io.vst
+ val vldst = d.vld || d.vst
val vldst_wb = vldst && io.inst.inst(28)
val rdAddr = Mux(vldst, io.inst.inst(19,15), io.inst.inst(11,7))
@@ -116,94 +233,71 @@
val rs2Addr = io.inst.inst(24,20)
val rs3Addr = io.inst.inst(31,27)
- val isAluImm = d.io.addi || d.io.slti || d.io.sltiu || d.io.xori ||
- d.io.ori || d.io.andi || d.io.slli || d.io.srli || d.io.srai
+ val isCsrImm = d.isCsr() && io.inst.inst(14)
+ val isCsrReg = d.isCsr() && !io.inst.inst(14)
- val isAluReg = d.io.add || d.io.sub || d.io.slt || d.io.sltu || d.io.xor ||
- d.io.or || d.io.and || d.io.sll || d.io.srl || d.io.sra
-
- val isAlu1Bit = d.io.clz || d.io.ctz || d.io.pcnt
- val isAlu2Bit = d.io.min || d.io.minu || d.io.max || d.io.maxu
-
- val isCondBr = d.io.beq || d.io.bne || d.io.blt || d.io.bge ||
- d.io.bltu || d.io.bgeu
-
- val isCsr = d.io.csrrw || d.io.csrrs || d.io.csrrc
- val isCsrImm = isCsr && io.inst.inst(14)
- val isCsrReg = isCsr && !io.inst.inst(14)
-
- val isLoad = d.io.lb || d.io.lh || d.io.lw || d.io.lbu || d.io.lhu
- val isStore = d.io.sb || d.io.sh || d.io.sw
- val isLsu = isLoad || isStore || d.io.vld || d.io.vst || d.io.flushat || d.io.flushall
-
- val isMul = d.io.mul || d.io.mulh || d.io.mulhsu || d.io.mulhu || d.io.mulhr || d.io.mulhsur || d.io.mulhur || d.io.dmulh || d.io.dmulhr
-
- val isDvu = d.io.div || d.io.divu || d.io.rem || d.io.remu
-
- val isVIop = io.vinst.op(new VInstOp().VIOP)
+ val isVIop = (io.vinst.bits.op === VInstOp.VIOP)
val isVIopVs1 = isVIop
val isVIopVs2 = isVIop && io.inst.inst(1,0) === 0.U // exclude: .vv
val isVIopVs3 = isVIop && io.inst.inst(2,0) === 1.U // exclude: .vvv
// Use the forwarded scoreboard to interlock on multicycle operations.
- val aluRdEn = !io.scoreboard.comb(rdAddr) || isVIopVs1 || isStore || isCondBr
- val aluRs1En = !io.scoreboard.comb(rs1Addr) || isVIopVs1 || isLsu || d.io.auipc
- val aluRs2En = !io.scoreboard.comb(rs2Addr) || isVIopVs2 || isLsu || d.io.auipc || isAluImm || isAlu1Bit
+ val aluRdEn = !io.scoreboard.comb(rdAddr) || isVIopVs1 || d.isStore() || d.isCondBr()
+ val aluRs1En = !io.scoreboard.comb(rs1Addr) || isVIopVs1 || d.isLsu() || d.auipc
+ val aluRs2En = !io.scoreboard.comb(rs2Addr) || isVIopVs2 || d.isLsu() || d.auipc || d.isAluImm() || d.isAlu1Bit()
// val aluRs3En = !io.scoreboard.comb(rs3Addr) || isVIopVs3
// val aluEn = aluRdEn && aluRs1En && aluRs2En && aluRs3En // TODO: is aluRs3En needed?
val aluEn = aluRdEn && aluRs1En && aluRs2En
// Interlock jalr but special case return.
- val bruEn = !d.io.jalr || !io.scoreboard.regd(rs1Addr) ||
+ val bruEn = !d.jalr || !io.scoreboard.regd(rs1Addr) ||
io.inst.inst(31,20) === 0.U
// Require interlock on address generation as there is no write forwarding.
- val lsuEn = !isLsu ||
+ val lsuEn = !d.isLsu() ||
!io.serializeIn.lsu && io.lsu.ready &&
- (!isLsu || !io.serializeIn.brcond) && // TODO: can this line be removed?
+ (!d.isLsu() || !io.serializeIn.brcond) && // TODO: can this line be removed?
!(Mux(io.busRead.bypass, io.scoreboard.comb(rs1Addr),
io.scoreboard.regd(rs1Addr)) ||
- io.scoreboard.comb(rs2Addr) && (isStore || vldst))
+ io.scoreboard.comb(rs2Addr) && (d.isStore() || vldst))
// Interlock mul, only one lane accepted.
- val mulEn = (!isMul || !io.serializeIn.mul) && !io.serializeIn.brcond
+ val mulEn = (!d.isMul() || !io.serializeIn.mul) && !io.serializeIn.brcond
// Vector extension interlock.
val vinstEn = !(io.serializeIn.vinst || isVIop && io.serializeIn.brcond) &&
- !(io.vinst.op =/= 0.U && !io.vinst.ready)
+ !(d.isVector() && !io.vinst.ready)
// Fence interlock.
// Input mactive used passthrough, prefer to avoid registers in Decode.
- val fenceEn = !(d.io.fence && io.mactive)
+ val fenceEn = !(d.fence && io.mactive)
// ALU opcode.
- val alu = new AluOp()
- val aluOp = Wire(Vec(alu.Entries, Bool()))
- val aluValid = WiredOR(io.alu.op) // used without decodeEn
- io.alu.valid := decodeEn && aluValid
- io.alu.addr := rdAddr
- io.alu.op := aluOp.asUInt
-
- aluOp(alu.ADD) := d.io.auipc || d.io.addi || d.io.add
- aluOp(alu.SUB) := d.io.sub
- aluOp(alu.SLT) := d.io.slti || d.io.slt
- aluOp(alu.SLTU) := d.io.sltiu || d.io.sltu
- aluOp(alu.XOR) := d.io.xori || d.io.xor
- aluOp(alu.OR) := d.io.ori || d.io.or
- aluOp(alu.AND) := d.io.andi || d.io.and
- aluOp(alu.SLL) := d.io.slli || d.io.sll
- aluOp(alu.SRL) := d.io.srli || d.io.srl
- aluOp(alu.SRA) := d.io.srai || d.io.sra
- aluOp(alu.LUI) := d.io.lui
- aluOp(alu.CLZ) := d.io.clz
- aluOp(alu.CTZ) := d.io.ctz
- aluOp(alu.PCNT) := d.io.pcnt
- aluOp(alu.MIN) := d.io.min
- aluOp(alu.MINU) := d.io.minu
- aluOp(alu.MAX) := d.io.max
- aluOp(alu.MAXU) := d.io.maxu
+ val alu = MuxCase(MakeValid(false.B, AluOp.ADD), Seq(
+ (d.auipc || d.addi || d.add) -> MakeValid(true.B, AluOp.ADD),
+ d.sub -> MakeValid(true.B, AluOp.SUB),
+ (d.slti || d.slt) -> MakeValid(true.B, AluOp.SLT),
+ (d.sltiu || d.sltu) -> MakeValid(true.B, AluOp.SLTU),
+ (d.xori || d.xor) -> MakeValid(true.B, AluOp.XOR),
+ (d.ori || d.or) -> MakeValid(true.B, AluOp.OR),
+ (d.andi || d.and) -> MakeValid(true.B, AluOp.AND),
+ (d.slli || d.sll) -> MakeValid(true.B, AluOp.SLL),
+ (d.srli || d.srl) -> MakeValid(true.B, AluOp.SRL),
+ (d.srai || d.sra) -> MakeValid(true.B, AluOp.SRA),
+ d.lui -> MakeValid(true.B, AluOp.LUI),
+ d.clz -> MakeValid(true.B, AluOp.CLZ),
+ d.ctz -> MakeValid(true.B, AluOp.CTZ),
+ d.pcnt -> MakeValid(true.B, AluOp.PCNT),
+ d.min -> MakeValid(true.B, AluOp.MIN),
+ d.minu -> MakeValid(true.B, AluOp.MINU),
+ d.max -> MakeValid(true.B, AluOp.MAX),
+ d.maxu -> MakeValid(true.B, AluOp.MAXU)
+ ))
+ io.alu.valid := decodeEn && alu.valid
+ io.alu.bits.addr := rdAddr
+ io.alu.bits.op := alu.bits
// Branch conditional opcode.
val bru = new BruOp()
@@ -213,122 +307,109 @@
io.bru.fwd := io.inst.brchFwd
io.bru.op := bruOp.asUInt
io.bru.pc := io.inst.addr
- io.bru.target := io.inst.addr + Mux(io.inst.inst(2), d.io.immjal, d.io.immbr)
+ io.bru.target := io.inst.addr + Mux(io.inst.inst(2), d.immjal, d.immbr)
io.bru.link := rdAddr
- bruOp(bru.JAL) := d.io.jal
- bruOp(bru.JALR) := d.io.jalr
- bruOp(bru.BEQ) := d.io.beq
- bruOp(bru.BNE) := d.io.bne
- bruOp(bru.BLT) := d.io.blt
- bruOp(bru.BGE) := d.io.bge
- bruOp(bru.BLTU) := d.io.bltu
- bruOp(bru.BGEU) := d.io.bgeu
- bruOp(bru.EBREAK) := d.io.ebreak
- bruOp(bru.ECALL) := d.io.ecall
- bruOp(bru.EEXIT) := d.io.eexit
- bruOp(bru.EYIELD) := d.io.eyield
- bruOp(bru.ECTXSW) := d.io.ectxsw
- bruOp(bru.MPAUSE) := d.io.mpause
- bruOp(bru.MRET) := d.io.mret
- bruOp(bru.FENCEI) := d.io.fencei
- bruOp(bru.UNDEF) := d.io.undef
+ bruOp(bru.JAL) := d.jal
+ bruOp(bru.JALR) := d.jalr
+ bruOp(bru.BEQ) := d.beq
+ bruOp(bru.BNE) := d.bne
+ bruOp(bru.BLT) := d.blt
+ bruOp(bru.BGE) := d.bge
+ bruOp(bru.BLTU) := d.bltu
+ bruOp(bru.BGEU) := d.bgeu
+ bruOp(bru.EBREAK) := d.ebreak
+ bruOp(bru.ECALL) := d.ecall
+ bruOp(bru.EEXIT) := d.eexit
+ bruOp(bru.EYIELD) := d.eyield
+ bruOp(bru.ECTXSW) := d.ectxsw
+ bruOp(bru.MPAUSE) := d.mpause
+ bruOp(bru.MRET) := d.mret
+ bruOp(bru.FENCEI) := d.fencei
+ bruOp(bru.UNDEF) := d.undef
// CSR opcode.
- val csr = new CsrOp()
- val csrOp = Wire(Vec(csr.Entries, Bool()))
- val csrValid = WiredOR(io.csr.op) // used without decodeEn
- io.csr.valid := decodeEn && csrValid
- io.csr.addr := rdAddr
- io.csr.index := io.inst.inst(31,20)
- io.csr.op := csrOp.asUInt
-
- csrOp(csr.CSRRW) := d.io.csrrw
- csrOp(csr.CSRRS) := d.io.csrrs
- csrOp(csr.CSRRC) := d.io.csrrc
+ val csr = MuxCase(MakeValid(false.B, CsrOp.CSRRW), Seq(
+ d.csrrw -> MakeValid(true.B, CsrOp.CSRRW),
+ d.csrrs -> MakeValid(true.B, CsrOp.CSRRS),
+ d.csrrc -> MakeValid(true.B, CsrOp.CSRRC)
+ ))
+ io.csr.valid := decodeEn && csr.valid
+ io.csr.bits.addr := rdAddr
+ io.csr.bits.index := io.inst.inst(31,20)
+ io.csr.bits.op := csr.bits
// LSU opcode.
- val lsu = new LsuOp()
- val lsuOp = Wire(Vec(lsu.Entries, Bool()))
- val lsuValid = WiredOR(io.lsu.op) // used without decodeEn
- io.lsu.valid := decodeEn && lsuValid
- io.lsu.store := io.inst.inst(5)
- io.lsu.addr := rdAddr
- io.lsu.op := lsuOp.asUInt
-
- lsuOp(lsu.LB) := d.io.lb
- lsuOp(lsu.LH) := d.io.lh
- lsuOp(lsu.LW) := d.io.lw
- lsuOp(lsu.LBU) := d.io.lbu
- lsuOp(lsu.LHU) := d.io.lhu
- lsuOp(lsu.SB) := d.io.sb
- lsuOp(lsu.SH) := d.io.sh
- lsuOp(lsu.SW) := d.io.sw
- lsuOp(lsu.FENCEI) := d.io.fencei
- lsuOp(lsu.FLUSHAT) := d.io.flushat
- lsuOp(lsu.FLUSHALL) := d.io.flushall
-
- lsuOp(lsu.VLDST) := d.io.vld || d.io.vst
+ val lsu = MuxCase(MakeValid(false.B, LsuOp.LB), Seq(
+ d.lb -> MakeValid(true.B, LsuOp.LB),
+ d.lh -> MakeValid(true.B, LsuOp.LH),
+ d.lw -> MakeValid(true.B, LsuOp.LW),
+ d.lbu -> MakeValid(true.B, LsuOp.LBU),
+ d.lhu -> MakeValid(true.B, LsuOp.LHU),
+ d.sb -> MakeValid(true.B, LsuOp.SB),
+ d.sh -> MakeValid(true.B, LsuOp.SH),
+ d.sw -> MakeValid(true.B, LsuOp.SW),
+ d.fencei -> MakeValid(true.B, LsuOp.FENCEI),
+ d.flushat -> MakeValid(true.B, LsuOp.FLUSHAT),
+ d.flushall -> MakeValid(true.B, LsuOp.FLUSHALL),
+ (d.vld || d.vst) -> MakeValid(true.B, LsuOp.VLDST),
+ ))
+ io.lsu.valid := decodeEn && lsu.valid
+ io.lsu.bits.store := io.inst.inst(5)
+ io.lsu.bits.addr := rdAddr
+ io.lsu.bits.op := lsu.bits
// MLU opcode.
- val mlu = new MluOp()
- val mluOp = Wire(Vec(mlu.Entries, Bool()))
- val mluValid = WiredOR(io.mlu.op) // used without decodeEn
- io.mlu.valid := decodeEn && mluValid
- io.mlu.addr := rdAddr
- io.mlu.op := mluOp.asUInt
-
- mluOp(mlu.MUL) := d.io.mul
- mluOp(mlu.MULH) := d.io.mulh
- mluOp(mlu.MULHSU) := d.io.mulhsu
- mluOp(mlu.MULHU) := d.io.mulhu
- mluOp(mlu.MULHR) := d.io.mulhr
- mluOp(mlu.MULHSUR) := d.io.mulhsur
- mluOp(mlu.MULHUR) := d.io.mulhur
- mluOp(mlu.DMULH) := d.io.dmulh
- mluOp(mlu.DMULHR) := d.io.dmulhr
+ val mlu = MuxCase(MakeValid(false.B, MluOp.MUL), Seq(
+ d.mul -> MakeValid(true.B, MluOp.MUL),
+ d.mulh -> MakeValid(true.B, MluOp.MULH),
+ d.mulhsu -> MakeValid(true.B, MluOp.MULHSU),
+ d.mulhu -> MakeValid(true.B, MluOp.MULHU),
+ d.mulhr -> MakeValid(true.B, MluOp.MULHR),
+ d.mulhsur -> MakeValid(true.B, MluOp.MULHSUR),
+ d.mulhur -> MakeValid(true.B, MluOp.MULHUR),
+ d.dmulh -> MakeValid(true.B, MluOp.DMULH),
+ d.dmulhr -> MakeValid(true.B, MluOp.DMULHR),
+ ))
+ io.mlu.valid := decodeEn && mlu.valid
+ io.mlu.bits.addr := rdAddr
+ io.mlu.bits.op := mlu.bits
// DIV opcode.
- val dvu = new DvuOp()
- val dvuOp = Wire(Vec(dvu.Entries, Bool()))
- val dvuValid = WiredOR(io.dvu.op) // used without decodeEn
- io.dvu.valid := decodeEn && dvuValid
- io.dvu.addr := rdAddr
- io.dvu.op := dvuOp.asUInt
-
- dvuOp(dvu.DIV) := d.io.div
- dvuOp(dvu.DIVU) := d.io.divu
- dvuOp(dvu.REM) := d.io.rem
- dvuOp(dvu.REMU) := d.io.remu
-
- val dvuEn = WiredOR(io.dvu.op) === 0.U || io.dvu.ready
+ val dvu = MuxCase(MakeValid(false.B, DvuOp.DIV), Seq(
+ d.div -> MakeValid(true.B, DvuOp.DIV),
+ d.divu -> MakeValid(true.B, DvuOp.DIVU),
+ d.rem -> MakeValid(true.B, DvuOp.REM),
+ d.remu -> MakeValid(true.B, DvuOp.REMU)
+ ))
+ io.dvu.valid := decodeEn && dvu.valid
+ io.dvu.bits.addr := rdAddr
+ io.dvu.bits.op := dvu.bits
+ val dvuEn = !dvu.valid || io.dvu.ready
// Vector instructions.
- val vinst = new VInstOp()
- val vinstOp = Wire(Vec(vinst.Entries, Bool()))
- val vinstValid = WiredOR(vinstOp) // used without decodeEn
-
- io.vinst.valid := decodeEn && vinstValid
- io.vinst.addr := rdAddr
- io.vinst.inst := io.inst.inst
- io.vinst.op := vinstOp.asUInt
-
- vinstOp(vinst.VLD) := d.io.vld
- vinstOp(vinst.VST) := d.io.vst
- vinstOp(vinst.VIOP) := d.io.viop
- vinstOp(vinst.GETVL) := d.io.getvl
- vinstOp(vinst.GETMAXVL) := d.io.getmaxvl
+ val vinst = MuxCase(MakeValid(false.B, VInstOp.VLD), Seq(
+ d.vld -> MakeValid(true.B, VInstOp.VLD),
+ d.vst -> MakeValid(true.B, VInstOp.VST),
+ d.viop -> MakeValid(true.B, VInstOp.VIOP),
+ d.getvl -> MakeValid(true.B, VInstOp.GETVL),
+ d.getmaxvl -> MakeValid(true.B, VInstOp.GETMAXVL),
+ ))
+ io.vinst.valid := decodeEn && vinst.valid
+ io.vinst.bits.addr := rdAddr
+ io.vinst.bits.inst := io.inst.inst
+ io.vinst.bits.op := vinst.bits
// Scalar logging.
- io.slog := decodeEn && d.io.slog
+ io.slog := decodeEn && d.slog
// Register file read ports.
- io.rs1Read.valid := decodeEn && (isCondBr || isAluReg || isAluImm || isAlu1Bit || isAlu2Bit ||
- isCsrImm || isCsrReg || isMul || isDvu || d.io.slog ||
- d.io.getvl || d.io.vld || d.io.vst)
- io.rs2Read.valid := decodeEn && (isCondBr || isAluReg || isAlu2Bit || isStore ||
- isCsrReg || isMul || isDvu || d.io.slog || d.io.getvl ||
- d.io.vld || d.io.vst || d.io.viop)
+ io.rs1Read.valid := decodeEn && (d.isCondBr() || d.isAluReg() || d.isAluImm() || d.isAlu1Bit() || d.isAlu2Bit() ||
+ isCsrImm || isCsrReg || d.isMul() || d.isDvu() || d.slog ||
+ d.getvl || d.vld || d.vst)
+ io.rs2Read.valid := decodeEn && (d.isCondBr() || d.isAluReg() || d.isAlu2Bit() || d.isStore() ||
+ isCsrReg || d.isMul() || d.isDvu() || d.slog || d.getvl ||
+ d.vld || d.vst || d.viop)
// rs1 is on critical path to busPortAddr.
io.rs1Read.addr := Mux(io.inst.inst(0), rs1Addr, rs3Addr)
@@ -337,20 +418,20 @@
io.rs2Read.addr := rs2Addr
// Register file set ports.
- io.rs1Set.valid := decodeEn && (d.io.auipc || isCsrImm)
- io.rs2Set.valid := io.rs1Set.valid || decodeEn && (isAluImm || isAlu1Bit || d.io.lui)
+ io.rs1Set.valid := decodeEn && (d.auipc || isCsrImm)
+ io.rs2Set.valid := io.rs1Set.valid || decodeEn && (d.isAluImm() || d.isAlu1Bit() || d.lui)
- io.rs1Set.value := Mux(isCsr, d.io.immcsr, io.inst.addr) // Program Counter (PC)
+ io.rs1Set.value := Mux(d.isCsr, d.immcsr, io.inst.addr) // Program Counter (PC)
- io.rs2Set.value := MuxCase(d.io.imm12,
- IndexedSeq((d.io.auipc || d.io.lui) -> d.io.imm20))
+ io.rs2Set.value := MuxCase(d.imm12,
+ IndexedSeq((d.auipc || d.lui) -> d.imm20))
// Register file write address ports. We speculate without knowing the decode
// enable status to improve timing, and under a branch is ignored anyway.
val rdMark_valid =
- aluValid || csrValid || mluValid || dvuValid && io.dvu.ready ||
- lsuValid && isLoad ||
- d.io.getvl || d.io.getmaxvl || vldst_wb ||
+ alu.valid || csr.valid || mlu.valid || dvu.valid && io.dvu.ready ||
+ lsu.valid && d.isLoad() ||
+ d.getvl || d.getmaxvl || vldst_wb ||
bruValid && (bruOp(bru.JAL) || bruOp(bru.JALR)) && rdAddr =/= 0.U
// val scoreboard_spec = Mux(rdMark_valid || d.io.vst, UIntToOH(rdAddr, 32), 0.U) // TODO: why was d.io.vst included?
@@ -363,317 +444,205 @@
// Register file bus address port.
// Pointer chasing bypass if immediate is zero.
// Load/Store immediate selection keys off bit5, and RET off bit6.
- io.busRead.valid := lsuValid
+ io.busRead.valid := lsu.valid
io.busRead.bypass := io.inst.inst(31,25) === 0.U &&
Mux(!io.inst.inst(5) || io.inst.inst(6), io.inst.inst(24,20) === 0.U,
io.inst.inst(11,7) === 0.U)
// SB,SH,SW 0100011
val storeSelect = io.inst.inst(6,3) === 4.U && io.inst.inst(1,0) === 3.U
- io.busRead.immen := !d.io.flushat
- io.busRead.immed := Cat(d.io.imm12(31,5),
- Mux(storeSelect, d.io.immst(4,0), d.io.imm12(4,0)))
+ io.busRead.immen := !d.flushat
+ io.busRead.immed := Cat(d.imm12(31,5),
+ Mux(storeSelect, d.immst(4,0), d.imm12(4,0)))
// Decode ready signalling to fetch.
// This must not factor branchTaken, which will be done directly in the
// fetch unit. Note above decodeEn resolves for branch for execute usage.
io.inst.ready := aluEn && bruEn && lsuEn && mulEn && dvuEn && vinstEn && fenceEn &&
!io.serializeIn.jump && !io.halted && !io.interlock &&
- (pipeline.U === 0.U || !d.io.undef)
+ (pipeline.U === 0.U || !d.undef)
// Serialize Interface.
- // io.serializeOut.lsu := io.serializeIn.lsu || lsuValid || vldst // vldst interlock for address generation cycle in vinst
+ // io.serializeOut.lsu := io.serializeIn.lsu || lsu.valid || vldst // vldst interlock for address generation cycle in vinst
// io.serializeOut.lsu := io.serializeIn.lsu || vldst // vldst interlock for address generation cycle in vinst
io.serializeOut.lsu := io.serializeIn.lsu
- io.serializeOut.mul := io.serializeIn.mul || mluValid
- io.serializeOut.jump := io.serializeIn.jump || d.io.jal || d.io.jalr ||
- d.io.ebreak || d.io.ecall || d.io.eexit ||
- d.io.eyield || d.io.ectxsw || d.io.mpause || d.io.mret
+ io.serializeOut.mul := io.serializeIn.mul || mlu.valid
+ io.serializeOut.jump := io.serializeIn.jump || d.jal || d.jalr ||
+ d.ebreak || d.ecall || d.eexit ||
+ d.eyield || d.ectxsw || d.mpause || d.mret
io.serializeOut.brcond := io.serializeIn.brcond |
- d.io.beq || d.io.bne || d.io.blt || d.io.bge || d.io.bltu || d.io.bgeu
+ d.beq || d.bne || d.blt || d.bge || d.bltu || d.bgeu
io.serializeOut.vinst := io.serializeIn.vinst
}
-class DecodedInstruction(p: Parameters, pipeline: Int) extends Module {
- val io = IO(new Bundle {
- val addr = Input(UInt(32.W))
- val inst = Input(UInt(32.W))
+object DecodeInstruction {
+ def apply(pipeline: Int, addr: UInt, op: UInt): DecodedInstruction = {
+ val d = Wire(new DecodedInstruction)
// Immediates
- val imm12 = Output(UInt(32.W))
- val imm20 = Output(UInt(32.W))
- val immjal = Output(UInt(32.W))
- val immbr = Output(UInt(32.W))
- val immcsr = Output(UInt(32.W))
- val immst = Output(UInt(32.W))
+ d.imm12 := Cat(Fill(20, op(31)), op(31,20))
+ d.imm20 := Cat(op(31,12), 0.U(12.W))
+ d.immjal := Cat(Fill(12, op(31)), op(19,12), op(20), op(30,21), 0.U(1.W))
+ d.immbr := Cat(Fill(20, op(31)), op(7), op(30,25), op(11,8), 0.U(1.W))
+ d.immcsr := op(19,15)
+ d.immst := Cat(Fill(20, op(31)), op(31,25), op(11,7))
// RV32I
- val lui = Output(Bool())
- val auipc = Output(Bool())
- val jal = Output(Bool())
- val jalr = Output(Bool())
- val beq = Output(Bool())
- val bne = Output(Bool())
- val blt = Output(Bool())
- val bge = Output(Bool())
- val bltu = Output(Bool())
- val bgeu = Output(Bool())
- val csrrw = Output(Bool())
- val csrrs = Output(Bool())
- val csrrc = Output(Bool())
- val lb = Output(Bool())
- val lh = Output(Bool())
- val lw = Output(Bool())
- val lbu = Output(Bool())
- val lhu = Output(Bool())
- val sb = Output(Bool())
- val sh = Output(Bool())
- val sw = Output(Bool())
- val fence = Output(Bool())
- val addi = Output(Bool())
- val slti = Output(Bool())
- val sltiu = Output(Bool())
- val xori = Output(Bool())
- val ori = Output(Bool())
- val andi = Output(Bool())
- val slli = Output(Bool())
- val srli = Output(Bool())
- val srai = Output(Bool())
- val add = Output(Bool())
- val sub = Output(Bool())
- val slt = Output(Bool())
- val sltu = Output(Bool())
- val xor = Output(Bool())
- val or = Output(Bool())
- val and = Output(Bool())
- val sll = Output(Bool())
- val srl = Output(Bool())
- val sra = Output(Bool())
+ d.lui := DecodeBits(op, "xxxxxxxxxxxxxxxxxxxx_xxxxx_0110111")
+ d.auipc := DecodeBits(op, "xxxxxxxxxxxxxxxxxxxx_xxxxx_0010111")
+ d.jal := DecodeBits(op, "xxxxxxxxxxxxxxxxxxxx_xxxxx_1101111")
+ d.jalr := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_000_xxxxx_1100111")
+ d.beq := DecodeBits(op, "xxxxxxx_xxxxx_xxxxx_000_xxxxx_1100011")
+ d.bne := DecodeBits(op, "xxxxxxx_xxxxx_xxxxx_001_xxxxx_1100011")
+ d.blt := DecodeBits(op, "xxxxxxx_xxxxx_xxxxx_100_xxxxx_1100011")
+ d.bge := DecodeBits(op, "xxxxxxx_xxxxx_xxxxx_101_xxxxx_1100011")
+ d.bltu := DecodeBits(op, "xxxxxxx_xxxxx_xxxxx_110_xxxxx_1100011")
+ d.bgeu := DecodeBits(op, "xxxxxxx_xxxxx_xxxxx_111_xxxxx_1100011")
+ d.csrrw := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_x01_xxxxx_1110011")
+ d.csrrs := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_x10_xxxxx_1110011")
+ d.csrrc := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_x11_xxxxx_1110011")
+ d.lb := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_000_xxxxx_0000011")
+ d.lh := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_001_xxxxx_0000011")
+ d.lw := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_010_xxxxx_0000011")
+ d.lbu := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_100_xxxxx_0000011")
+ d.lhu := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_101_xxxxx_0000011")
+ d.sb := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_000_xxxxx_0100011")
+ d.sh := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_001_xxxxx_0100011")
+ d.sw := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_010_xxxxx_0100011")
+ d.fence := DecodeBits(op, "0000_xxxx_xxxx_00000_000_00000_0001111")
+ d.addi := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_000_xxxxx_0010011")
+ d.slti := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_010_xxxxx_0010011")
+ d.sltiu := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_011_xxxxx_0010011")
+ d.xori := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_100_xxxxx_0010011")
+ d.ori := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_110_xxxxx_0010011")
+ d.andi := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_111_xxxxx_0010011")
+ d.slli := DecodeBits(op, "0000000_xxxxx_xxxxx_001_xxxxx_0010011")
+ d.srli := DecodeBits(op, "0000000_xxxxx_xxxxx_101_xxxxx_0010011")
+ d.srai := DecodeBits(op, "0100000_xxxxx_xxxxx_101_xxxxx_0010011")
+ d.add := DecodeBits(op, "0000000_xxxxx_xxxxx_000_xxxxx_0110011")
+ d.sub := DecodeBits(op, "0100000_xxxxx_xxxxx_000_xxxxx_0110011")
+ d.slt := DecodeBits(op, "0000000_xxxxx_xxxxx_010_xxxxx_0110011")
+ d.sltu := DecodeBits(op, "0000000_xxxxx_xxxxx_011_xxxxx_0110011")
+ d.xor := DecodeBits(op, "0000000_xxxxx_xxxxx_100_xxxxx_0110011")
+ d.or := DecodeBits(op, "0000000_xxxxx_xxxxx_110_xxxxx_0110011")
+ d.and := DecodeBits(op, "0000000_xxxxx_xxxxx_111_xxxxx_0110011")
+ d.sll := DecodeBits(op, "0000000_xxxxx_xxxxx_001_xxxxx_0110011")
+ d.srl := DecodeBits(op, "0000000_xxxxx_xxxxx_101_xxxxx_0110011")
+ d.sra := DecodeBits(op, "0100000_xxxxx_xxxxx_101_xxxxx_0110011")
// RV32M
- val mul = Output(Bool())
- val mulh = Output(Bool())
- val mulhsu = Output(Bool())
- val mulhu = Output(Bool())
- val mulhr = Output(Bool())
- val mulhsur = Output(Bool())
- val mulhur = Output(Bool())
- val dmulh = Output(Bool())
- val dmulhr = Output(Bool())
- val div = Output(Bool())
- val divu = Output(Bool())
- val rem = Output(Bool())
- val remu = Output(Bool())
+ d.mul := DecodeBits(op, "0000_001_xxxxx_xxxxx_000_xxxxx_0110011")
+ d.mulh := DecodeBits(op, "0000_001_xxxxx_xxxxx_001_xxxxx_0110011")
+ d.mulhsu := DecodeBits(op, "0000_001_xxxxx_xxxxx_010_xxxxx_0110011")
+ d.mulhu := DecodeBits(op, "0000_001_xxxxx_xxxxx_011_xxxxx_0110011")
+ d.mulhr := DecodeBits(op, "0010_001_xxxxx_xxxxx_001_xxxxx_0110011")
+ d.mulhsur := DecodeBits(op, "0010_001_xxxxx_xxxxx_010_xxxxx_0110011")
+ d.mulhur := DecodeBits(op, "0010_001_xxxxx_xxxxx_011_xxxxx_0110011")
+ d.dmulh := DecodeBits(op, "0000_010_xxxxx_xxxxx_001_xxxxx_0110011")
+ d.dmulhr := DecodeBits(op, "0010_010_xxxxx_xxxxx_001_xxxxx_0110011")
+ d.div := DecodeBits(op, "0000_001_xxxxx_xxxxx_100_xxxxx_0110011")
+ d.divu := DecodeBits(op, "0000_001_xxxxx_xxxxx_101_xxxxx_0110011")
+ d.rem := DecodeBits(op, "0000_001_xxxxx_xxxxx_110_xxxxx_0110011")
+ d.remu := DecodeBits(op, "0000_001_xxxxx_xxxxx_111_xxxxx_0110011")
// RV32B
- val clz = Output(Bool())
- val ctz = Output(Bool())
- val pcnt = Output(Bool())
- val min = Output(Bool())
- val minu = Output(Bool())
- val max = Output(Bool())
- val maxu = Output(Bool())
+ d.clz := DecodeBits(op, "0110000_00000_xxxxx_001_xxxxx_0010011")
+ d.ctz := DecodeBits(op, "0110000_00001_xxxxx_001_xxxxx_0010011")
+ d.pcnt := DecodeBits(op, "0110000_00010_xxxxx_001_xxxxx_0010011")
+ d.min := DecodeBits(op, "0000101_xxxxx_xxxxx_100_xxxxx_0110011")
+ d.minu := DecodeBits(op, "0000101_xxxxx_xxxxx_101_xxxxx_0110011")
+ d.max := DecodeBits(op, "0000101_xxxxx_xxxxx_110_xxxxx_0110011")
+ d.maxu := DecodeBits(op, "0000101_xxxxx_xxxxx_111_xxxxx_0110011")
+
+ // Decode scalar log.
+ val slog = DecodeBits(op, "01111_00_00000_xxxxx_0xx_00000_11101_11")
+
+ // Vector length.
+ d.getvl := DecodeBits(op, "0001x_xx_xxxxx_xxxxx_000_xxxxx_11101_11") && op(26,25) =/= 3.U && (op(24,20) =/= 0.U || op(19,15) =/= 0.U)
+ d.getmaxvl := DecodeBits(op, "0001x_xx_00000_00000_000_xxxxx_11101_11") && op(26,25) =/= 3.U
+
+ // Vector load/store.
+ d.vld := DecodeBits(op, "000xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") // vld
+
+ d.vst := DecodeBits(op, "001xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") || // vst
+ DecodeBits(op, "011xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") // vstq
+
+ // Convolution transfer accumulators to vregs. Also decodes acset/actr ops.
+ val vconv = DecodeBits(op, "010100_000000_000000_xx_xxxxxx_x_111_11")
+
+ // Duplicate
+ val vdup = DecodeBits(op, "01000x_0xxxxx_000000_xx_xxxxxx_x_111_11") && op(13,12) <= 2.U
+ val vdupi = vdup && op(26) === 0.U
// Vector instructions.
- val getvl = Output(Bool())
- val getmaxvl = Output(Bool())
- val vld = Output(Bool())
- val vst = Output(Bool())
- val viop = Output(Bool())
+ d.viop := op(0) === 0.U || // .vv .vx
+ op(1,0) === 1.U || // .vvv .vxv
+ vconv || vdupi
- // Core controls.
- val ebreak = Output(Bool())
- val ecall = Output(Bool())
- val eexit = Output(Bool())
- val eyield = Output(Bool())
- val ectxsw = Output(Bool())
- val mpause = Output(Bool())
- val mret = Output(Bool())
- val undef = Output(Bool())
+ // [extensions] Core controls.
+ d.ebreak := DecodeBits(op, "000000000001_00000_000_00000_11100_11")
+ d.ecall := DecodeBits(op, "000000000000_00000_000_00000_11100_11")
+ d.eexit := DecodeBits(op, "000000100000_00000_000_00000_11100_11")
+ d.eyield := DecodeBits(op, "000001000000_00000_000_00000_11100_11")
+ d.ectxsw := DecodeBits(op, "000001100000_00000_000_00000_11100_11")
+ d.mpause := DecodeBits(op, "000010000000_00000_000_00000_11100_11")
+ d.mret := DecodeBits(op, "001100000010_00000_000_00000_11100_11")
// Fences.
- val fencei = Output(Bool())
- val flushat = Output(Bool())
- val flushall = Output(Bool())
+ d.fencei := DecodeBits(op, "0000_0000_0000_00000_001_00000_0001111")
+ d.flushat := DecodeBits(op, "0010x_xx_00000_xxxxx_000_00000_11101_11") && op(19,15) =/= 0.U
+ d.flushall := DecodeBits(op, "0010x_xx_00000_00000_000_00000_11101_11")
- // Scalar logging.
- val slog = Output(Bool())
- })
+ // [extensions] Scalar logging.
+ d.slog := slog
- val op = io.inst
+ // Stub out decoder state not used beyond pipeline0.
+ if (pipeline > 0) {
+ d.csrrw := false.B
+ d.csrrs := false.B
+ d.csrrc := false.B
- // Immediates
- io.imm12 := Cat(Fill(20, op(31)), op(31,20))
- io.imm20 := Cat(op(31,12), 0.U(12.W))
- io.immjal := Cat(Fill(12, op(31)), op(19,12), op(20), op(30,21), 0.U(1.W))
- io.immbr := Cat(Fill(20, op(31)), op(7), op(30,25), op(11,8), 0.U(1.W))
- io.immcsr := op(19,15)
- io.immst := Cat(Fill(20, op(31)), op(31,25), op(11,7))
+ d.div := false.B
+ d.divu := false.B
+ d.rem := false.B
+ d.remu := false.B
- // RV32I
- io.lui := DecodeBits(op, "xxxxxxxxxxxxxxxxxxxx_xxxxx_0110111")
- io.auipc := DecodeBits(op, "xxxxxxxxxxxxxxxxxxxx_xxxxx_0010111")
- io.jal := DecodeBits(op, "xxxxxxxxxxxxxxxxxxxx_xxxxx_1101111")
- io.jalr := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_000_xxxxx_1100111")
- io.beq := DecodeBits(op, "xxxxxxx_xxxxx_xxxxx_000_xxxxx_1100011")
- io.bne := DecodeBits(op, "xxxxxxx_xxxxx_xxxxx_001_xxxxx_1100011")
- io.blt := DecodeBits(op, "xxxxxxx_xxxxx_xxxxx_100_xxxxx_1100011")
- io.bge := DecodeBits(op, "xxxxxxx_xxxxx_xxxxx_101_xxxxx_1100011")
- io.bltu := DecodeBits(op, "xxxxxxx_xxxxx_xxxxx_110_xxxxx_1100011")
- io.bgeu := DecodeBits(op, "xxxxxxx_xxxxx_xxxxx_111_xxxxx_1100011")
- io.csrrw := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_x01_xxxxx_1110011")
- io.csrrs := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_x10_xxxxx_1110011")
- io.csrrc := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_x11_xxxxx_1110011")
- io.lb := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_000_xxxxx_0000011")
- io.lh := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_001_xxxxx_0000011")
- io.lw := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_010_xxxxx_0000011")
- io.lbu := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_100_xxxxx_0000011")
- io.lhu := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_101_xxxxx_0000011")
- io.sb := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_000_xxxxx_0100011")
- io.sh := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_001_xxxxx_0100011")
- io.sw := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_010_xxxxx_0100011")
- io.fence := DecodeBits(op, "0000_xxxx_xxxx_00000_000_00000_0001111")
- io.addi := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_000_xxxxx_0010011")
- io.slti := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_010_xxxxx_0010011")
- io.sltiu := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_011_xxxxx_0010011")
- io.xori := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_100_xxxxx_0010011")
- io.ori := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_110_xxxxx_0010011")
- io.andi := DecodeBits(op, "xxxxxxxxxxxx_xxxxx_111_xxxxx_0010011")
- io.slli := DecodeBits(op, "0000000_xxxxx_xxxxx_001_xxxxx_0010011")
- io.srli := DecodeBits(op, "0000000_xxxxx_xxxxx_101_xxxxx_0010011")
- io.srai := DecodeBits(op, "0100000_xxxxx_xxxxx_101_xxxxx_0010011")
- io.add := DecodeBits(op, "0000000_xxxxx_xxxxx_000_xxxxx_0110011")
- io.sub := DecodeBits(op, "0100000_xxxxx_xxxxx_000_xxxxx_0110011")
- io.slt := DecodeBits(op, "0000000_xxxxx_xxxxx_010_xxxxx_0110011")
- io.sltu := DecodeBits(op, "0000000_xxxxx_xxxxx_011_xxxxx_0110011")
- io.xor := DecodeBits(op, "0000000_xxxxx_xxxxx_100_xxxxx_0110011")
- io.or := DecodeBits(op, "0000000_xxxxx_xxxxx_110_xxxxx_0110011")
- io.and := DecodeBits(op, "0000000_xxxxx_xxxxx_111_xxxxx_0110011")
- io.sll := DecodeBits(op, "0000000_xxxxx_xxxxx_001_xxxxx_0110011")
- io.srl := DecodeBits(op, "0000000_xxxxx_xxxxx_101_xxxxx_0110011")
- io.sra := DecodeBits(op, "0100000_xxxxx_xxxxx_101_xxxxx_0110011")
+ d.ebreak := false.B
+ d.ecall := false.B
+ d.eexit := false.B
+ d.eyield := false.B
+ d.ectxsw := false.B
+ d.mpause := false.B
+ d.mret := false.B
- // RV32M
- io.mul := DecodeBits(op, "0000_001_xxxxx_xxxxx_000_xxxxx_0110011")
- io.mulh := DecodeBits(op, "0000_001_xxxxx_xxxxx_001_xxxxx_0110011")
- io.mulhsu := DecodeBits(op, "0000_001_xxxxx_xxxxx_010_xxxxx_0110011")
- io.mulhu := DecodeBits(op, "0000_001_xxxxx_xxxxx_011_xxxxx_0110011")
- io.mulhr := DecodeBits(op, "0010_001_xxxxx_xxxxx_001_xxxxx_0110011")
- io.mulhsur := DecodeBits(op, "0010_001_xxxxx_xxxxx_010_xxxxx_0110011")
- io.mulhur := DecodeBits(op, "0010_001_xxxxx_xxxxx_011_xxxxx_0110011")
- io.dmulh := DecodeBits(op, "0000_010_xxxxx_xxxxx_001_xxxxx_0110011")
- io.dmulhr := DecodeBits(op, "0010_010_xxxxx_xxxxx_001_xxxxx_0110011")
- io.div := DecodeBits(op, "0000_001_xxxxx_xxxxx_100_xxxxx_0110011")
- io.divu := DecodeBits(op, "0000_001_xxxxx_xxxxx_101_xxxxx_0110011")
- io.rem := DecodeBits(op, "0000_001_xxxxx_xxxxx_110_xxxxx_0110011")
- io.remu := DecodeBits(op, "0000_001_xxxxx_xxxxx_111_xxxxx_0110011")
+ d.fence := false.B
+ d.fencei := false.B
+ d.flushat := false.B
+ d.flushall := false.B
- // RV32B
- io.clz := DecodeBits(op, "0110000_00000_xxxxx_001_xxxxx_0010011")
- io.ctz := DecodeBits(op, "0110000_00001_xxxxx_001_xxxxx_0010011")
- io.pcnt := DecodeBits(op, "0110000_00010_xxxxx_001_xxxxx_0010011")
- io.min := DecodeBits(op, "0000101_xxxxx_xxxxx_100_xxxxx_0110011")
- io.minu := DecodeBits(op, "0000101_xxxxx_xxxxx_101_xxxxx_0110011")
- io.max := DecodeBits(op, "0000101_xxxxx_xxxxx_110_xxxxx_0110011")
- io.maxu := DecodeBits(op, "0000101_xxxxx_xxxxx_111_xxxxx_0110011")
+ d.slog := false.B
+ }
- // Decode scalar log.
- val slog = DecodeBits(op, "01111_00_00000_xxxxx_0xx_00000_11101_11")
+ // Generate the undefined opcode.
+ val decoded = Cat(d.lui, d.auipc,
+ d.jal, d.jalr,
+ d.beq, d.bne, d.blt, d.bge, d.bltu, d.bgeu,
+ d.csrrw, d.csrrs, d.csrrc,
+ d.lb, d.lh, d.lw, d.lbu, d.lhu,
+ d.sb, d.sh, d.sw, d.fence,
+ d.addi, d.slti, d.sltiu, d.xori, d.ori, d.andi,
+ d.add, d.sub, d.slt, d.sltu, d.xor, d.or, d.and,
+ d.slli, d.srli, d.srai, d.sll, d.srl, d.sra,
+ d.mul, d.mulh, d.mulhsu, d.mulhu, d.mulhr, d.mulhsur, d.mulhur, d.dmulh, d.dmulhr,
+ d.div, d.divu, d.rem, d.remu,
+ d.clz, d.ctz, d.pcnt, d.min, d.minu, d.max, d.maxu,
+ d.viop, d.vld, d.vst,
+ d.getvl, d.getmaxvl,
+ d.ebreak, d.ecall, d.eexit, d.eyield, d.ectxsw,
+ d.mpause, d.mret, d.fencei, d.flushat, d.flushall, d.slog)
- // Vector length.
- io.getvl := DecodeBits(op, "0001x_xx_xxxxx_xxxxx_000_xxxxx_11101_11") && op(26,25) =/= 3.U && (op(24,20) =/= 0.U || op(19,15) =/= 0.U)
- io.getmaxvl := DecodeBits(op, "0001x_xx_00000_00000_000_xxxxx_11101_11") && op(26,25) =/= 3.U
+ d.undef := !WiredOR(decoded)
- // Vector load/store.
- io.vld := DecodeBits(op, "000xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") // vld
-
- io.vst := DecodeBits(op, "001xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") || // vst
- DecodeBits(op, "011xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") // vstq
-
- // Convolution transfer accumulators to vregs. Also decodes acset/actr ops.
- val vconv = DecodeBits(op, "010100_000000_000000_xx_xxxxxx_x_111_11")
-
- // Duplicate
- val vdup = DecodeBits(op, "01000x_0xxxxx_000000_xx_xxxxxx_x_111_11") && op(13,12) <= 2.U
- val vdupi = vdup && op(26) === 0.U
-
- // Vector instructions.
- io.viop := op(0) === 0.U || // .vv .vx
- op(1,0) === 1.U || // .vvv .vxv
- vconv || vdupi
-
- // [extensions] Core controls.
- io.ebreak := DecodeBits(op, "000000000001_00000_000_00000_11100_11")
- io.ecall := DecodeBits(op, "000000000000_00000_000_00000_11100_11")
- io.eexit := DecodeBits(op, "000000100000_00000_000_00000_11100_11")
- io.eyield := DecodeBits(op, "000001000000_00000_000_00000_11100_11")
- io.ectxsw := DecodeBits(op, "000001100000_00000_000_00000_11100_11")
- io.mpause := DecodeBits(op, "000010000000_00000_000_00000_11100_11")
- io.mret := DecodeBits(op, "001100000010_00000_000_00000_11100_11")
-
- // Fences.
- io.fencei := DecodeBits(op, "0000_0000_0000_00000_001_00000_0001111")
- io.flushat := DecodeBits(op, "0010x_xx_00000_xxxxx_000_00000_11101_11") && op(19,15) =/= 0.U
- io.flushall := DecodeBits(op, "0010x_xx_00000_00000_000_00000_11101_11")
-
- // [extensions] Scalar logging.
- io.slog := slog
-
- // Stub out decoder state not used beyond pipeline0.
- if (pipeline > 0) {
- io.csrrw := false.B
- io.csrrs := false.B
- io.csrrc := false.B
-
- io.div := false.B
- io.divu := false.B
- io.rem := false.B
- io.remu := false.B
-
- io.ebreak := false.B
- io.ecall := false.B
- io.eexit := false.B
- io.eyield := false.B
- io.ectxsw := false.B
- io.mpause := false.B
- io.mret := false.B
-
- io.fence := false.B
- io.fencei := false.B
- io.flushat := false.B
- io.flushall := false.B
-
- io.slog := false.B
- }
-
- // Generate the undefined opcode.
- val decoded = Cat(io.lui, io.auipc,
- io.jal, io.jalr,
- io.beq, io.bne, io.blt, io.bge, io.bltu, io.bgeu,
- io.csrrw, io.csrrs, io.csrrc,
- io.lb, io.lh, io.lw, io.lbu, io.lhu,
- io.sb, io.sh, io.sw, io.fence,
- io.addi, io.slti, io.sltiu, io.xori, io.ori, io.andi,
- io.add, io.sub, io.slt, io.sltu, io.xor, io.or, io.and,
- io.slli, io.srli, io.srai, io.sll, io.srl, io.sra,
- io.mul, io.mulh, io.mulhsu, io.mulhu, io.mulhr, io.mulhsur, io.mulhur, io.dmulh, io.dmulhr,
- io.div, io.divu, io.rem, io.remu,
- io.clz, io.ctz, io.pcnt, io.min, io.minu, io.max, io.maxu,
- io.viop, io.vld, io.vst,
- io.getvl, io.getmaxvl,
- io.ebreak, io.ecall, io.eexit, io.eyield, io.ectxsw,
- io.mpause, io.mret, io.fencei, io.flushat, io.flushall, io.slog)
-
- io.undef := !WiredOR(decoded)
-
- // Delay the assert until the next cycle, so that logs appear on console.
- val onehot_failed = RegInit(false.B)
- assert(!onehot_failed)
-
- val onehot_decode = PopCount(decoded)
- when ((onehot_decode + io.undef) =/= 1.U) {
- onehot_failed := true.B
- printf("[FAIL] decode inst=%x addr=%x decoded=0b%b pipeline=%d\n",
- io.inst, io.addr, decoded, pipeline.U)
+ d
}
}
diff --git a/hdl/chisel/src/kelvin/scalar/Dvu.scala b/hdl/chisel/src/kelvin/scalar/Dvu.scala
index 479b489..312589c 100644
--- a/hdl/chisel/src/kelvin/scalar/Dvu.scala
+++ b/hdl/chisel/src/kelvin/scalar/Dvu.scala
@@ -25,25 +25,22 @@
}
}
-case class DvuOp() {
- val DIV = 0
- val DIVU = 1
- val REM = 2
- val REMU = 3
- val Entries = 4
+object DvuOp extends ChiselEnum {
+ val DIV = Value
+ val DIVU = Value
+ val REM = Value
+ val REMU = Value
}
-class DvuIO(p: Parameters) extends Bundle {
- val valid = Input(Bool())
- val ready = Output(Bool())
- val addr = Input(UInt(5.W))
- val op = Input(UInt(new DvuOp().Entries.W))
+class DvuCmd extends Bundle {
+ val addr = UInt(5.W)
+ val op = DvuOp()
}
class Dvu(p: Parameters) extends Module {
val io = IO(new Bundle {
// Decode cycle.
- val req = new DvuIO(p)
+ val req = Flipped(Decoupled(new DvuCmd))
// Execute cycle.
val rs1 = Flipped(new RegfileReadDataIO)
@@ -58,7 +55,6 @@
// This implemention differs to common::idiv by supporting early termination,
// and only performs one bit per cycle.
- val dvu = new DvuOp()
def Divide(prvDivide: UInt, prvRemain: UInt, denom: UInt): (UInt, UInt) = {
val shfRemain = Cat(prvRemain(30,0), prvDivide(31))
@@ -116,9 +112,9 @@
compute := active
when (io.req.valid && io.req.ready) {
- addr1 := io.req.addr
- signed1 := io.req.op(dvu.DIV) || io.req.op(dvu.REM)
- divide1 := io.req.op(dvu.DIV) || io.req.op(dvu.DIVU)
+ addr1 := io.req.bits.addr
+ signed1 := io.req.bits.op.isOneOf(DvuOp.DIV, DvuOp.REM)
+ divide1 := io.req.bits.op.isOneOf(DvuOp.DIV, DvuOp.DIVU)
}
when (active && !compute) {
diff --git a/hdl/chisel/src/kelvin/scalar/Lsu.scala b/hdl/chisel/src/kelvin/scalar/Lsu.scala
index b13364d..0da8e36 100644
--- a/hdl/chisel/src/kelvin/scalar/Lsu.scala
+++ b/hdl/chisel/src/kelvin/scalar/Lsu.scala
@@ -38,28 +38,25 @@
val rdata = Input(UInt(p.lsuDataBits.W))
}
-case class LsuOp() {
- val LB = 0
- val LH = 1
- val LW = 2
- val LBU = 3
- val LHU = 4
- val SB = 5
- val SH = 6
- val SW = 7
- val FENCEI = 8
- val FLUSHAT = 9
- val FLUSHALL = 10
- val VLDST = 11
- val Entries = 12
+object LsuOp extends ChiselEnum {
+ val LB = Value
+ val LH = Value
+ val LW = Value
+ val LBU = Value
+ val LHU = Value
+ val SB = Value
+ val SH = Value
+ val SW = Value
+ val FENCEI = Value
+ val FLUSHAT = Value
+ val FLUSHALL = Value
+ val VLDST = Value
}
-class LsuIO(p: Parameters) extends Bundle {
- val valid = Input(Bool())
- val ready = Output(Bool())
- val store = Input(Bool())
- val addr = Input(UInt(5.W))
- val op = Input(UInt(new LsuOp().Entries.W))
+class LsuCmd extends Bundle {
+ val store = Bool()
+ val addr = UInt(5.W)
+ val op = LsuOp()
}
class LsuCtrl(p: Parameters) extends Bundle {
@@ -92,7 +89,7 @@
class Lsu(p: Parameters) extends Module {
val io = IO(new Bundle {
// Decode cycle.
- val req = Vec(p.instructionLanes, new LsuIO(p))
+ val req = Vec(p.instructionLanes, Flipped(Decoupled(new LsuCmd)))
val busPort = Flipped(new RegfileBusPortIO(p))
// Execute cycle(s).
@@ -111,8 +108,6 @@
val storeCount = Output(UInt(2.W))
})
- val lsu = new LsuOp()
-
// AXI Queues.
val n = 8
val ctrl = FifoX(new LsuCtrl(p), p.instructionLanes, n)
@@ -143,25 +138,25 @@
val uncached = io.busPort.addr(i)(31) ||
(if (uncacheable.length > 0) uncacheable.map(x => (io.busPort.addr(i) >= x.memStart.U) && (io.busPort.addr(i) < (x.memStart + x.memSize).U)).reduce(_||_) else false.B)
- val opstore = io.req(i).op(lsu.SW) || io.req(i).op(lsu.SH) || io.req(i).op(lsu.SB)
- val opiload = io.req(i).op(lsu.LW) || io.req(i).op(lsu.LH) || io.req(i).op(lsu.LB) || io.req(i).op(lsu.LHU) || io.req(i).op(lsu.LBU)
+ val opstore = io.req(i).bits.op.isOneOf(LsuOp.SW, LsuOp.SH, LsuOp.SB)
+ val opiload = io.req(i).bits.op.isOneOf(LsuOp.LW, LsuOp.LH, LsuOp.LB, LsuOp.LHU, LsuOp.LBU)
val opload = opiload
- val opfencei = io.req(i).op(lsu.FENCEI)
- val opflushat = io.req(i).op(lsu.FLUSHAT)
- val opflushall = io.req(i).op(lsu.FLUSHALL)
+ val opfencei = (io.req(i).bits.op === LsuOp.FENCEI)
+ val opflushat = (io.req(i).bits.op === LsuOp.FLUSHAT)
+ val opflushall = (io.req(i).bits.op === LsuOp.FLUSHALL)
val opsldst = opstore || opload
- val opvldst = io.req(i).op(lsu.VLDST)
- val opsext = io.req(i).op(lsu.LB) || io.req(i).op(lsu.LH)
- val opsize = Cat(io.req(i).op(lsu.LW) || io.req(i).op(lsu.SW),
- io.req(i).op(lsu.LH) || io.req(i).op(lsu.LHU) || io.req(i).op(lsu.SH),
- io.req(i).op(lsu.LB) || io.req(i).op(lsu.LBU) || io.req(i).op(lsu.SB))
+ val opvldst = (io.req(i).bits.op === LsuOp.VLDST)
+ val opsext = io.req(i).bits.op.isOneOf(LsuOp.LB, LsuOp.LH)
+ val opsize = Cat(io.req(i).bits.op.isOneOf(LsuOp.LW, LsuOp.SW),
+ io.req(i).bits.op.isOneOf(LsuOp.LH, LsuOp.LHU, LsuOp.SH),
+ io.req(i).bits.op.isOneOf(LsuOp.LB, LsuOp.LBU, LsuOp.SB))
ctrl.io.in.bits(i).valid := io.req(i).valid && ctrlready(i) && !(opvldst && uncached)
ctrl.io.in.bits(i).bits.addr := io.busPort.addr(i)
ctrl.io.in.bits(i).bits.adrx := io.busPort.addr(i) + lineoffset.U
ctrl.io.in.bits(i).bits.data := io.busPort.data(i)
- ctrl.io.in.bits(i).bits.index := io.req(i).addr
+ ctrl.io.in.bits(i).bits.index := io.req(i).bits.addr
ctrl.io.in.bits(i).bits.sext := opsext
ctrl.io.in.bits(i).bits.size := opsize
ctrl.io.in.bits(i).bits.iload := opiload
diff --git a/hdl/chisel/src/kelvin/scalar/Mlu.scala b/hdl/chisel/src/kelvin/scalar/Mlu.scala
index b7ad953..335036f 100644
--- a/hdl/chisel/src/kelvin/scalar/Mlu.scala
+++ b/hdl/chisel/src/kelvin/scalar/Mlu.scala
@@ -25,29 +25,28 @@
}
}
-case class MluOp() {
- val MUL = 0
- val MULH = 1
- val MULHSU = 2
- val MULHU = 3
- val MULHR = 4
- val MULHSUR = 5
- val MULHUR = 6
- val DMULH = 7
- val DMULHR = 8
- val Entries = 9
+object MluOp extends ChiselEnum {
+ val MUL = Value
+ val MULH = Value
+ val MULHSU = Value
+ val MULHU = Value
+ val MULHR = Value
+ val MULHSUR = Value
+ val MULHUR = Value
+ val DMULH = Value
+ val DMULHR = Value
+ val Entries = Value
}
-class MluIO(p: Parameters) extends Bundle {
- val valid = Input(Bool())
- val addr = Input(UInt(5.W))
- val op = Input(UInt(new MluOp().Entries.W))
+class MluCmd extends Bundle {
+ val addr = UInt(5.W)
+ val op = MluOp()
}
class Mlu(p: Parameters) extends Module {
val io = IO(new Bundle {
// Decode cycle.
- val req = Vec(p.instructionLanes, new MluIO(p))
+ val req = Flipped(Vec(p.instructionLanes, Valid(new MluCmd)))
// Execute cycle.
val rs1 = Vec(p.instructionLanes, Flipped(new RegfileReadDataIO))
@@ -55,9 +54,7 @@
val rd = Flipped(new RegfileWriteDataIO)
})
- val mlu = new MluOp()
-
- val op = RegInit(0.U(mlu.Entries.W))
+ val op = Reg(MluOp())
val valid1 = RegInit(false.B)
val valid2 = RegInit(false.B)
val addr1 = Reg(UInt(5.W))
@@ -71,12 +68,9 @@
when (valids.reduce(_||_)) {
val idx = PriorityEncoder(valids)
- op := io.req(idx).op
- addr1 := io.req(idx).addr
+ op := io.req(idx).bits.op
+ addr1 := io.req(idx).bits.addr
sel := (1.U << idx)
- } .otherwise {
- op := 0.U
- sel := 0.U
}
val rs1 = (0 until p.instructionLanes).map(x => MuxOR(valid1 & sel(x), io.rs1(x).data)).reduce(_ | _)
@@ -87,22 +81,22 @@
val round2 = Reg(UInt(1.W))
when (valid1) {
- val rs2signed = op(mlu.MULH) || op(mlu.MULHR) || op(mlu.DMULH) || op(mlu.DMULHR)
- val rs1signed = op(mlu.MULHSU) || op(mlu.MULHSUR) || rs2signed
+ val rs2signed = op.isOneOf(MluOp.MULH, MluOp.MULHR, MluOp.DMULH, MluOp.DMULHR)
+ val rs1signed = op.isOneOf(MluOp.MULHSU, MluOp.MULHSUR) || rs2signed
val rs1s = Cat(rs1signed && rs1(31), rs1).asSInt
val rs2s = Cat(rs2signed && rs2(31), rs2).asSInt
val prod = rs1s.asSInt * rs2s.asSInt
assert(prod.getWidth == 66)
addr2 := addr1
- round2 := prod(30) && op(mlu.DMULHR) ||
- prod(31) && (op(mlu.MULHR) || op(mlu.MULHSUR) || op(mlu.MULHUR))
+ round2 := prod(30) && op.isOneOf(MluOp.DMULHR) ||
+ prod(31) && (op.isOneOf(MluOp.MULHR, MluOp.MULHSUR, MluOp.MULHUR))
- when (op(mlu.MUL)) {
+ when (op === MluOp.MUL) {
mul2 := prod(31,0)
- } .elsewhen (op(mlu.MULH) || op(mlu.MULHSU) || op(mlu.MULHU) || op(mlu.MULHR) || op(mlu.MULHSUR) || op(mlu.MULHUR)) {
+ } .elsewhen (op.isOneOf(MluOp.MULH, MluOp.MULHSU, MluOp.MULHU, MluOp.MULHR, MluOp.MULHSUR, MluOp.MULHUR)) {
mul2 := prod(63,32)
- } .elsewhen (op(mlu.DMULH) || op(mlu.DMULHR)) {
+ } .elsewhen (op.isOneOf(MluOp.DMULH, MluOp.DMULHR)) {
val maxneg = 2.U(2.W)
val halfneg = 1.U(2.W)
val sat = rs1(29,0) === 0.U && rs2(29,0) === 0.U &&
diff --git a/hdl/chisel/src/kelvin/scalar/SCore.scala b/hdl/chisel/src/kelvin/scalar/SCore.scala
index d9e2c32..0c51a55 100644
--- a/hdl/chisel/src/kelvin/scalar/SCore.scala
+++ b/hdl/chisel/src/kelvin/scalar/SCore.scala
@@ -180,14 +180,7 @@
// ---------------------------------------------------------------------------
// Load/Store Unit
lsu.io.busPort := regfile.io.busPort
-
- for (i <- 0 until p.instructionLanes) {
- lsu.io.req(i).valid := decode(i).io.lsu.valid
- lsu.io.req(i).store := decode(i).io.lsu.store
- lsu.io.req(i).addr := decode(i).io.lsu.addr
- lsu.io.req(i).op := decode(i).io.lsu.op
- decode(i).io.lsu.ready := lsu.io.req(i).ready
- }
+ lsu.io.req <> decode.map(_.io.lsu)
// ---------------------------------------------------------------------------
// Multiplier Unit
diff --git a/hdl/chisel/src/kelvin/vector/VCore.scala b/hdl/chisel/src/kelvin/vector/VCore.scala
index 919cb27..d9cc36e 100644
--- a/hdl/chisel/src/kelvin/vector/VCore.scala
+++ b/hdl/chisel/src/kelvin/vector/VCore.scala
@@ -28,7 +28,7 @@
class VCoreIO(p: Parameters) extends Bundle {
// Decode cycle.
- val vinst = Vec(p.instructionLanes, new VInstIO)
+ val vinst = Vec(p.instructionLanes, Flipped(Decoupled(new VInstCmd)))
// Execute cycle.
val rs = Vec(p.instructionLanes * 2, Flipped(new RegfileReadDataIO))
diff --git a/hdl/chisel/src/kelvin/vector/VInst.scala b/hdl/chisel/src/kelvin/vector/VInst.scala
index 8757cea..a273bd9 100644
--- a/hdl/chisel/src/kelvin/vector/VInst.scala
+++ b/hdl/chisel/src/kelvin/vector/VInst.scala
@@ -26,22 +26,18 @@
}
}
-case class VInstOp() {
- val GETVL = 0
- val GETMAXVL = 1
- val VLD = 2
- val VST = 3
- val VIOP = 4
- val Entries = 5
- val Bits = log2Ceil(Entries)
+object VInstOp extends ChiselEnum {
+ val GETVL = Value
+ val GETMAXVL = Value
+ val VLD = Value
+ val VST = Value
+ val VIOP = Value
}
-class VInstIO extends Bundle {
- val valid = Input(Bool())
- val ready = Output(Bool())
- val addr = Input(UInt(5.W))
- val inst = Input(UInt(32.W))
- val op = Input(UInt(new VInstOp().Entries.W))
+class VInstCmd extends Bundle {
+ val addr = UInt(5.W)
+ val inst = UInt(32.W)
+ val op = VInstOp()
}
class VectorInstructionIO(p: Parameters) extends Bundle {
@@ -68,7 +64,7 @@
class VInst(p: Parameters) extends Module {
val io = IO(new Bundle {
// Decode cycle.
- val in = Vec(p.instructionLanes, new VInstIO)
+ val in = Vec(p.instructionLanes, Flipped(Decoupled(new VInstCmd)))
// Execute cycle.
val rs = Vec(p.instructionLanes * 2, Flipped(new RegfileReadDataIO))
@@ -81,8 +77,6 @@
val nempty = Output(Bool())
})
- val vinst = new VInstOp()
-
val maxvlb = (p.vectorBits / 8).U(p.vectorCountBits.W)
val maxvlh = (p.vectorBits / 16).U(p.vectorCountBits.W)
val maxvlw = (p.vectorBits / 32).U(p.vectorCountBits.W)
@@ -98,7 +92,7 @@
}), true)
val reqvalid = VecInit(io.in.map(x => x.valid && x.ready))
- val reqaddr = VecInit(io.in.map(x => x.inst(19,15)))
+ val reqaddr = VecInit(io.in.map(_.bits.inst(19,15)))
// ---------------------------------------------------------------------------
// Response to Decode.
@@ -120,7 +114,7 @@
for (i <- 0 until p.instructionLanes) {
when (reqvalid(i)) {
- rdAddr(i) := io.in(i).addr
+ rdAddr(i) := io.in(i).bits.addr
}
}
@@ -134,23 +128,23 @@
vvalid := nxtVinstValid.asUInt =/= 0.U
for (i <- 0 until p.instructionLanes) {
- nxtVinstValid(i) := reqvalid(i) && (io.in(i).op(vinst.VLD) ||
- io.in(i).op(vinst.VST) ||
- io.in(i).op(vinst.VIOP))
+ nxtVinstValid(i) := reqvalid(i) && io.in(i).bits.op.isOneOf(
+ VInstOp.VLD, VInstOp.VST, VInstOp.VIOP)
vinstValid(i) := nxtVinstValid(i)
- vinstInst(i) := io.in(i).inst
+ vinstInst(i) := io.in(i).bits.inst
}
for (i <- 0 until p.instructionLanes) {
- val p = io.in(i).inst(28) // func2
- val q = io.in(i).inst(30) // func2
- vld_o(i) := reqvalid(i) && io.in(i).op(vinst.VLD) && !p
- vld_u(i) := reqvalid(i) && io.in(i).op(vinst.VLD) && p
- vst_o(i) := reqvalid(i) && io.in(i).op(vinst.VST) && !p
- vst_u(i) := reqvalid(i) && io.in(i).op(vinst.VST) && p && !q
- vst_q(i) := reqvalid(i) && io.in(i).op(vinst.VST) && p && q
- getvl(i) := reqvalid(i) && io.in(i).op(vinst.GETVL)
- getmaxvl(i) := reqvalid(i) && io.in(i).op(vinst.GETMAXVL)
+ val p = io.in(i).bits.inst(28) // func2
+ val q = io.in(i).bits.inst(30) // func2
+ val op = io.in(i).bits.op
+ vld_o(i) := reqvalid(i) && (op === VInstOp.VLD) && !p
+ vld_u(i) := reqvalid(i) && (op === VInstOp.VLD) && p
+ vst_o(i) := reqvalid(i) && (op === VInstOp.VST) && !p
+ vst_u(i) := reqvalid(i) && (op === VInstOp.VST) && p && !q
+ vst_q(i) := reqvalid(i) && (op === VInstOp.VST) && p && q
+ getvl(i) := reqvalid(i) && (op === VInstOp.GETVL)
+ getmaxvl(i) := reqvalid(i) && (op === VInstOp.GETMAXVL)
}
// ---------------------------------------------------------------------------