[Reland] Add option to disable VCore.
Change-Id: Ife4f7c63e0110ead0e34d55e7d562375b37652fa
diff --git a/hdl/chisel/src/kelvin/Core.scala b/hdl/chisel/src/kelvin/Core.scala
index 100078c..72b1c03 100644
--- a/hdl/chisel/src/kelvin/Core.scala
+++ b/hdl/chisel/src/kelvin/Core.scala
@@ -34,7 +34,9 @@
val ibus = new IBusIO(p)
val dbus = new DBusIO(p)
- val axi0 = new AxiMasterIO(p.axi2AddrBits, p.axi2DataBits, p.axi2IdBits)
+ val axi0 = if(p.enableVector) {
+ Some(new AxiMasterIO(p.axi2AddrBits, p.axi2DataBits, p.axi2IdBits))
+ } else { None }
val axi1 = new AxiMasterIO(p.axi2AddrBits, p.axi2DataBits, p.axi2IdBits)
val iflush = new IFlushIO(p)
@@ -45,8 +47,7 @@
})
val score = SCore(p)
- val vcore = VCore(p)
- val dbusmux = DBusMux(p)
+ val vcore = if (p.enableVector) { Some(VCore(p)) } else { None }
// ---------------------------------------------------------------------------
// Scalar Core outputs.
@@ -61,17 +62,22 @@
// ---------------------------------------------------------------------------
// Vector core.
- score.io.vcore <> vcore.io.score
+ if (p.enableVector) {
+ score.io.vcore.get <> vcore.get.io.score
+ }
// ---------------------------------------------------------------------------
// Local Data Bus Port
- dbusmux.io.vldst := score.io.vldst
- dbusmux.io.vlast := vcore.io.last
-
- dbusmux.io.vcore <> vcore.io.dbus
- dbusmux.io.score <> score.io.dbus
-
- io.dbus <> dbusmux.io.dbus
+ if (p.enableVector) {
+ val dbusmux = DBusMux(p)
+ dbusmux.io.vldst := score.io.vldst.get
+ dbusmux.io.vlast := vcore.get.io.last
+ dbusmux.io.vcore <> vcore.get.io.dbus
+ dbusmux.io.score <> score.io.dbus
+ io.dbus <> dbusmux.io.dbus
+ } else {
+ io.dbus <> score.io.dbus
+ }
// ---------------------------------------------------------------------------
// Scalar DBus to AXI.
@@ -80,8 +86,10 @@
// ---------------------------------------------------------------------------
// AXI ports.
- io.axi0.read <> vcore.io.ld
- io.axi0.write <> vcore.io.st
+ if (p.enableVector) {
+ io.axi0.get.read <> vcore.get.io.ld
+ io.axi0.get.write <> vcore.get.io.st
+ }
io.axi1 <> dbus2axi.io.axi
}
diff --git a/hdl/chisel/src/kelvin/Parameters.scala b/hdl/chisel/src/kelvin/Parameters.scala
index 3546a74..9e0efc4 100644
--- a/hdl/chisel/src/kelvin/Parameters.scala
+++ b/hdl/chisel/src/kelvin/Parameters.scala
@@ -56,6 +56,8 @@
val vectorCountBits = log2Ceil(vectorBits / 8) + 1 + 2 // +2 stripmine
+ // Enable Vector
+ val enableVector = true
val vectorAluCount = 2
val vectorReadPorts = (vectorAluCount * 3) + 1
val vectorWritePorts = 6
@@ -112,12 +114,16 @@
println("#ifndef KELVIN_PARAMETERS_H_")
println("#define KELVIN_PARAMETERS_H_")
+ println("")
+ println("#include <stdbool.h>")
+ println("")
fields.foreach { x =>
val fieldMirror = instanceMirror.reflectField(x.asTerm)
val fieldType = x.asTerm.typeSignature
val value = fieldMirror.get
val ctype = fieldType match {
case t if t =:= ru.typeOf[Int] => Some("int")
+ case t if t =:= ru.typeOf[Boolean] => Some("bool")
case _ => None
}
if (ctype != None) {
diff --git a/hdl/chisel/src/kelvin/scalar/Csr.scala b/hdl/chisel/src/kelvin/scalar/Csr.scala
index 18202d3..6db1af5 100644
--- a/hdl/chisel/src/kelvin/scalar/Csr.scala
+++ b/hdl/chisel/src/kelvin/scalar/Csr.scala
@@ -47,8 +47,12 @@
val rfwriteCount = UInt(3.W)
val storeCount = UInt(2.W)
val branchCount = UInt(1.W)
- val vrfwriteCount = UInt(3.W)
- val vstoreCount = UInt(2.W)
+ val vrfwriteCount = if (p.enableVector) {
+ Some(UInt(3.W))
+ } else { None }
+ val vstoreCount = if (p.enableVector) {
+ Some(UInt(2.W))
+ } else { None }
}
class CsrBruIO(p: Parameters) extends Bundle {
@@ -92,7 +96,9 @@
val bru = Flipped(new CsrBruIO(p))
// Vector core.
- val vcore = Input(new Bundle { val undef = Bool() })
+ val vcore = (if (p.enableVector) {
+ Some(Input(new Bundle { val undef = Bool() }))
+ } else { None })
val counters = Input(new CsrCounters(p))
@@ -187,11 +193,12 @@
val kisaEn = req.bits.index === 0xFC0.U
// Pipeline Control.
- when (io.bru.in.halt || io.vcore.undef) {
+ val vcoreUndef = if (p.enableVector) { io.vcore.get.undef } else { false.B }
+ when (io.bru.in.halt || vcoreUndef) {
halted := true.B
}
- when (io.bru.in.fault || io.vcore.undef) {
+ when (io.bru.in.fault || vcoreUndef) {
fault := true.B
}
@@ -280,8 +287,10 @@
io.counters.rfwriteCount +
io.counters.storeCount +
io.counters.branchCount +
- io.counters.vrfwriteCount +
- io.counters.vstoreCount
+ (if (p.enableVector) {
+ io.counters.vrfwriteCount.get +
+ io.counters.vstoreCount.get
+ } else { 0.U })
when (io.bru.in.mode.valid) {
mode := io.bru.in.mode.bits
diff --git a/hdl/chisel/src/kelvin/scalar/Decode.scala b/hdl/chisel/src/kelvin/scalar/Decode.scala
index 37a0c86..206c516 100644
--- a/hdl/chisel/src/kelvin/scalar/Decode.scala
+++ b/hdl/chisel/src/kelvin/scalar/Decode.scala
@@ -206,7 +206,9 @@
val dvu = Decoupled(new DvuCmd)
// Vector interface.
- val vinst = Decoupled(new VInstCmd)
+ val vinst = if (p.enableVector) {
+ Some(Decoupled(new VInstCmd))
+ } else { None }
// Branch status.
val branchTaken = Input(Bool())
@@ -223,7 +225,7 @@
val decodeEn = io.inst.valid && io.inst.ready && !io.branchTaken
// The decode logic.
- val d = DecodeInstruction(pipeline, io.inst.addr, io.inst.inst)
+ val d = DecodeInstruction(p, pipeline, io.inst.addr, io.inst.inst)
val vldst = d.vld || d.vst
val vldst_wb = vldst && io.inst.inst(28)
@@ -236,7 +238,9 @@
val isCsrImm = d.isCsr() && io.inst.inst(14)
val isCsrReg = d.isCsr() && !io.inst.inst(14)
- val isVIop = (io.vinst.bits.op === VInstOp.VIOP)
+ val isVIop = if (p.enableVector) {
+ io.vinst.get.bits.op === VInstOp.VIOP
+ } else { false.B }
val isVIopVs1 = isVIop
val isVIopVs2 = isVIop && io.inst.inst(1,0) === 0.U // exclude: .vv
@@ -267,8 +271,10 @@
// Vector extension interlock.
- val vinstEn = !(io.serializeIn.vinst || isVIop && io.serializeIn.brcond) &&
- !(d.isVector() && !io.vinst.ready)
+ val vinstEn = if (p.enableVector) {
+ !(io.serializeIn.vinst || isVIop && io.serializeIn.brcond) &&
+ !(d.isVector() && !io.vinst.get.ready)
+ } else { false.B }
// Fence interlock.
// Input mactive used passthrough, prefer to avoid registers in Decode.
@@ -395,10 +401,12 @@
d.getvl -> MakeValid(true.B, VInstOp.GETVL),
d.getmaxvl -> MakeValid(true.B, VInstOp.GETMAXVL),
))
- io.vinst.valid := decodeEn && vinst.valid
- io.vinst.bits.addr := rdAddr
- io.vinst.bits.inst := io.inst.inst
- io.vinst.bits.op := vinst.bits
+ if (p.enableVector) {
+ io.vinst.get.valid := decodeEn && vinst.valid
+ io.vinst.get.bits.addr := rdAddr
+ io.vinst.get.bits.inst := io.inst.inst
+ io.vinst.get.bits.op := vinst.bits
+ }
// Scalar logging.
io.slog := decodeEn && d.slog
@@ -476,7 +484,7 @@
}
object DecodeInstruction {
- def apply(pipeline: Int, addr: UInt, op: UInt): DecodedInstruction = {
+ def apply(p: Parameters, pipeline: Int, addr: UInt, op: UInt): DecodedInstruction = {
val d = Wire(new DecodedInstruction)
// Immediates
@@ -557,27 +565,35 @@
// Decode scalar log.
val slog = DecodeBits(op, "01111_00_00000_xxxxx_0xx_00000_11101_11")
- // Vector length.
- d.getvl := DecodeBits(op, "0001x_xx_xxxxx_xxxxx_000_xxxxx_11101_11") && op(26,25) =/= 3.U && (op(24,20) =/= 0.U || op(19,15) =/= 0.U)
- d.getmaxvl := DecodeBits(op, "0001x_xx_00000_00000_000_xxxxx_11101_11") && op(26,25) =/= 3.U
+ if (p.enableVector) {
+ // Vector length.
+ d.getvl := DecodeBits(op, "0001x_xx_xxxxx_xxxxx_000_xxxxx_11101_11") && op(26,25) =/= 3.U && (op(24,20) =/= 0.U || op(19,15) =/= 0.U)
+ d.getmaxvl := DecodeBits(op, "0001x_xx_00000_00000_000_xxxxx_11101_11") && op(26,25) =/= 3.U
- // Vector load/store.
- d.vld := DecodeBits(op, "000xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") // vld
+ // Vector load/store.
+ d.vld := DecodeBits(op, "000xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") // vld
- d.vst := DecodeBits(op, "001xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") || // vst
- DecodeBits(op, "011xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") // vstq
+ d.vst := DecodeBits(op, "001xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") || // vst
+ DecodeBits(op, "011xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") // vstq
- // Convolution transfer accumulators to vregs. Also decodes acset/actr ops.
- val vconv = DecodeBits(op, "010100_000000_000000_xx_xxxxxx_x_111_11")
+ // Convolution transfer accumulators to vregs. Also decodes acset/actr ops.
+ val vconv = DecodeBits(op, "010100_000000_000000_xx_xxxxxx_x_111_11")
- // Duplicate
- val vdup = DecodeBits(op, "01000x_0xxxxx_000000_xx_xxxxxx_x_111_11") && op(13,12) <= 2.U
- val vdupi = vdup && op(26) === 0.U
+ // Duplicate
+ val vdup = DecodeBits(op, "01000x_0xxxxx_000000_xx_xxxxxx_x_111_11") && op(13,12) <= 2.U
+ val vdupi = vdup && op(26) === 0.U
- // Vector instructions.
- d.viop := op(0) === 0.U || // .vv .vx
- op(1,0) === 1.U || // .vvv .vxv
- vconv || vdupi
+ // Vector instructions.
+ d.viop := op(0) === 0.U || // .vv .vx
+ op(1,0) === 1.U || // .vvv .vxv
+ vconv || vdupi
+ } else {
+ d.getvl := false.B
+ d.getmaxvl := false.B
+ d.vld := false.B
+ d.vst := false.B
+ d.viop := false.B
+ }
// [extensions] Core controls.
d.ebreak := DecodeBits(op, "000000000001_00000_000_00000_11100_11")
diff --git a/hdl/chisel/src/kelvin/scalar/SCore.scala b/hdl/chisel/src/kelvin/scalar/SCore.scala
index 0c51a55..03c8dd7 100644
--- a/hdl/chisel/src/kelvin/scalar/SCore.scala
+++ b/hdl/chisel/src/kelvin/scalar/SCore.scala
@@ -36,9 +36,11 @@
val ibus = new IBusIO(p)
val dbus = new DBusIO(p)
val ubus = new DBusIO(p)
- val vldst = Output(Bool())
- val vcore = Flipped(new VCoreIO(p))
+ val vldst = if (p.enableVector) { Some(Output(Bool())) } else { None }
+ val vcore = if (p.enableVector) {
+ Some(Flipped(new VCoreIO(p)))
+ } else { None }
val iflush = new IFlushIO(p)
val dflush = new DFlushIO(p)
@@ -127,7 +129,7 @@
decode(i).io.scoreboard.regd := regfile.io.scoreboard.regd | scoreboard_spec(i)
}
- decode(0).io.mactive := io.vcore.mactive
+ decode(0).io.mactive := (if (p.enableVector) { io.vcore.get.mactive } else { false.B })
for (i <- 1 until p.instructionLanes) {
decode(i).io.mactive := false.B
}
@@ -160,8 +162,10 @@
csr.io.counters.rfwriteCount := regfile.io.rfwriteCount
csr.io.counters.storeCount := lsu.io.storeCount
csr.io.counters.branchCount := bru(0).io.taken.valid
- csr.io.counters.vrfwriteCount := io.vcore.vrfwriteCount
- csr.io.counters.vstoreCount := io.vcore.vstoreCount
+ if (p.enableVector) {
+ csr.io.counters.vrfwriteCount.get := io.vcore.get.vrfwriteCount
+ csr.io.counters.vstoreCount.get := io.vcore.get.vstoreCount
+ }
// ---------------------------------------------------------------------------
// Control Status Unit
@@ -170,7 +174,9 @@
csr.io.req <> decode(0).io.csr
csr.io.rs1 := regfile.io.readData(0)
- csr.io.vcore.undef := io.vcore.undef
+ if (p.enableVector) {
+ csr.io.vcore.get.undef := io.vcore.get.undef
+ }
// ---------------------------------------------------------------------------
// Status
@@ -219,23 +225,34 @@
regfile.io.writeData(i).valid := csr0Valid ||
alu(i).io.rd.valid || bru(i).io.rd.valid ||
- io.vcore.rd(i).valid
+ (if (p.enableVector) {
+ io.vcore.get.rd(i).valid
+ } else { false.B })
regfile.io.writeData(i).addr :=
MuxOR(csr0Valid, csr0Addr) |
MuxOR(alu(i).io.rd.valid, alu(i).io.rd.addr) |
MuxOR(bru(i).io.rd.valid, bru(i).io.rd.addr) |
- MuxOR(io.vcore.rd(i).valid, io.vcore.rd(i).addr)
+ (if (p.enableVector) {
+ MuxOR(io.vcore.get.rd(i).valid, io.vcore.get.rd(i).addr)
+ } else { false.B })
regfile.io.writeData(i).data :=
MuxOR(csr0Valid, csr0Data) |
MuxOR(alu(i).io.rd.valid, alu(i).io.rd.data) |
MuxOR(bru(i).io.rd.valid, bru(i).io.rd.data) |
- MuxOR(io.vcore.rd(i).valid, io.vcore.rd(i).data)
+ (if (p.enableVector) {
+ MuxOR(io.vcore.get.rd(i).valid, io.vcore.get.rd(i).data)
+ } else { false.B })
- assert((csr0Valid +&
- alu(i).io.rd.valid +& bru(i).io.rd.valid +&
- io.vcore.rd(i).valid) <= 1.U)
+ if (p.enableVector) {
+ assert((csr0Valid +&
+ alu(i).io.rd.valid +& bru(i).io.rd.valid +&
+ io.vcore.get.rd(i).valid) <= 1.U)
+ } else {
+ assert((csr0Valid +&
+ alu(i).io.rd.valid +& bru(i).io.rd.valid) <= 1.U)
+ }
}
val mluDvuOffset = p.instructionLanes
@@ -256,12 +273,9 @@
// ---------------------------------------------------------------------------
// Vector Extension
- for (i <- 0 until p.instructionLanes) {
- io.vcore.vinst(i) <> decode(i).io.vinst
- }
-
- for (i <- 0 until p.instructionLanes * 2) {
- io.vcore.rs(i) := regfile.io.readData(i)
+ if (p.enableVector) {
+ io.vcore.get.vinst <> decode.map(_.io.vinst.get)
+ io.vcore.get.rs := regfile.io.readData
}
// ---------------------------------------------------------------------------
@@ -273,7 +287,9 @@
io.dbus <> lsu.io.dbus
io.ubus <> lsu.io.ubus
- io.vldst := lsu.io.vldst
+ if (p.enableVector) {
+ io.vldst.get := lsu.io.vldst
+ }
// ---------------------------------------------------------------------------
// Scalar logging interface
diff --git a/hdl/chisel/src/matcha/Kelvin.scala b/hdl/chisel/src/matcha/Kelvin.scala
index 5fd07aa..ad5a386 100644
--- a/hdl/chisel/src/matcha/Kelvin.scala
+++ b/hdl/chisel/src/matcha/Kelvin.scala
@@ -120,7 +120,9 @@
// -------------------------------------------------------------------------
// Bus Mux.
- bus.io.in0 <> core.io.axi0
+ if (p.enableVector) {
+ bus.io.in0 <> core.io.axi0.get
+ }
bus.io.in1 <> core.io.axi1
bus.io.in2 <> l1d.io.axi
bus.io.in3.read <> l1i.io.axi.read
diff --git a/tests/verilator_sim/kelvin/core_tb.cc b/tests/verilator_sim/kelvin/core_tb.cc
index 140d0d5..45d2bc4 100644
--- a/tests/verilator_sim/kelvin/core_tb.cc
+++ b/tests/verilator_sim/kelvin/core_tb.cc
@@ -92,6 +92,7 @@
sc_signal<sc_bv<32> > io_slog_data;
sc_signal<sc_bv<4> > io_debug_en;
sc_signal<sc_bv<32> > io_debug_cycles;
+#if KP_enableVector
sc_signal<bool> io_axi0_write_addr_ready;
sc_signal<bool> io_axi0_write_addr_valid;
sc_signal<sc_bv<32> > io_axi0_write_addr_bits_addr;
@@ -113,6 +114,7 @@
sc_signal<sc_bv<2> > io_axi0_read_data_bits_resp;
sc_signal<sc_bv<kUncId> > io_axi0_read_data_bits_id;
sc_signal<sc_bv<kUncBits> > io_axi0_read_data_bits_data;
+#endif // KP_enableVector
sc_signal<bool> io_axi1_write_addr_ready;
sc_signal<bool> io_axi1_write_addr_valid;
sc_signal<sc_bv<32> > io_axi1_write_addr_bits_addr;
@@ -228,6 +230,7 @@
#define BINDAXI(a) \
core.a(a); \
mif.a(a)
+#if KP_enableVector
BINDAXI(io_axi0_write_addr_ready);
BINDAXI(io_axi0_write_addr_valid);
BINDAXI(io_axi0_write_addr_bits_addr);
@@ -249,6 +252,7 @@
BINDAXI(io_axi0_read_data_bits_resp);
BINDAXI(io_axi0_read_data_bits_id);
BINDAXI(io_axi0_read_data_bits_data);
+#endif // KP_enableVector
BINDAXI(io_axi1_write_addr_ready);
BINDAXI(io_axi1_write_addr_valid);
BINDAXI(io_axi1_write_addr_bits_addr);