Add option to disable VCore. Change-Id: Ie139a62fb0ac755dc5fbbec61b8b7aa68c17425e
diff --git a/hdl/chisel/src/kelvin/Core.scala b/hdl/chisel/src/kelvin/Core.scala index 100078c..72b1c03 100644 --- a/hdl/chisel/src/kelvin/Core.scala +++ b/hdl/chisel/src/kelvin/Core.scala
@@ -34,7 +34,9 @@ val ibus = new IBusIO(p) val dbus = new DBusIO(p) - val axi0 = new AxiMasterIO(p.axi2AddrBits, p.axi2DataBits, p.axi2IdBits) + val axi0 = if(p.enableVector) { + Some(new AxiMasterIO(p.axi2AddrBits, p.axi2DataBits, p.axi2IdBits)) + } else { None } val axi1 = new AxiMasterIO(p.axi2AddrBits, p.axi2DataBits, p.axi2IdBits) val iflush = new IFlushIO(p) @@ -45,8 +47,7 @@ }) val score = SCore(p) - val vcore = VCore(p) - val dbusmux = DBusMux(p) + val vcore = if (p.enableVector) { Some(VCore(p)) } else { None } // --------------------------------------------------------------------------- // Scalar Core outputs. @@ -61,17 +62,22 @@ // --------------------------------------------------------------------------- // Vector core. - score.io.vcore <> vcore.io.score + if (p.enableVector) { + score.io.vcore.get <> vcore.get.io.score + } // --------------------------------------------------------------------------- // Local Data Bus Port - dbusmux.io.vldst := score.io.vldst - dbusmux.io.vlast := vcore.io.last - - dbusmux.io.vcore <> vcore.io.dbus - dbusmux.io.score <> score.io.dbus - - io.dbus <> dbusmux.io.dbus + if (p.enableVector) { + val dbusmux = DBusMux(p) + dbusmux.io.vldst := score.io.vldst.get + dbusmux.io.vlast := vcore.get.io.last + dbusmux.io.vcore <> vcore.get.io.dbus + dbusmux.io.score <> score.io.dbus + io.dbus <> dbusmux.io.dbus + } else { + io.dbus <> score.io.dbus + } // --------------------------------------------------------------------------- // Scalar DBus to AXI. @@ -80,8 +86,10 @@ // --------------------------------------------------------------------------- // AXI ports. - io.axi0.read <> vcore.io.ld - io.axi0.write <> vcore.io.st + if (p.enableVector) { + io.axi0.get.read <> vcore.get.io.ld + io.axi0.get.write <> vcore.get.io.st + } io.axi1 <> dbus2axi.io.axi }
diff --git a/hdl/chisel/src/kelvin/Parameters.scala b/hdl/chisel/src/kelvin/Parameters.scala index 3546a74..a93f69e 100644 --- a/hdl/chisel/src/kelvin/Parameters.scala +++ b/hdl/chisel/src/kelvin/Parameters.scala
@@ -56,6 +56,8 @@ val vectorCountBits = log2Ceil(vectorBits / 8) + 1 + 2 // +2 stripmine + // Enable Vector + val enableVector = true val vectorAluCount = 2 val vectorReadPorts = (vectorAluCount * 3) + 1 val vectorWritePorts = 6
diff --git a/hdl/chisel/src/kelvin/scalar/Csr.scala b/hdl/chisel/src/kelvin/scalar/Csr.scala index 18202d3..621de70 100644 --- a/hdl/chisel/src/kelvin/scalar/Csr.scala +++ b/hdl/chisel/src/kelvin/scalar/Csr.scala
@@ -47,8 +47,12 @@ val rfwriteCount = UInt(3.W) val storeCount = UInt(2.W) val branchCount = UInt(1.W) - val vrfwriteCount = UInt(3.W) - val vstoreCount = UInt(2.W) + val vrfwriteCount = if (p.enableVector) { + Some(UInt(3.W)) + } else { None } + val vstoreCount = if (p.enableVector) { + Some(UInt(2.W)) + } else { None } } class CsrBruIO(p: Parameters) extends Bundle { @@ -92,7 +96,9 @@ val bru = Flipped(new CsrBruIO(p)) // Vector core. - val vcore = Input(new Bundle { val undef = Bool() }) + val vcore = (if (p.enableVector) { + Some(Input(new Bundle { val undef = Bool() })) + } else { None }) val counters = Input(new CsrCounters(p)) @@ -187,13 +193,15 @@ val kisaEn = req.bits.index === 0xFC0.U // Pipeline Control. - when (io.bru.in.halt || io.vcore.undef) { + val vcoreUndef = if (p.enableVector) { io.vcore.get.undef } else { false.B } + when (io.bru.in.halt || vcoreUndef) { halted := true.B } - when (io.bru.in.fault || io.vcore.undef) { + when (io.bru.in.fault || vcoreUndef) { fault := true.B } + io.halted := halted io.fault := fault @@ -280,8 +288,10 @@ io.counters.rfwriteCount + io.counters.storeCount + io.counters.branchCount + - io.counters.vrfwriteCount + - io.counters.vstoreCount + (if (p.enableVector) { + io.counters.vrfwriteCount.get + + io.counters.vstoreCount.get + } else { 0.U }) when (io.bru.in.mode.valid) { mode := io.bru.in.mode.bits
diff --git a/hdl/chisel/src/kelvin/scalar/Decode.scala b/hdl/chisel/src/kelvin/scalar/Decode.scala index 37a0c86..206c516 100644 --- a/hdl/chisel/src/kelvin/scalar/Decode.scala +++ b/hdl/chisel/src/kelvin/scalar/Decode.scala
@@ -206,7 +206,9 @@ val dvu = Decoupled(new DvuCmd) // Vector interface. - val vinst = Decoupled(new VInstCmd) + val vinst = if (p.enableVector) { + Some(Decoupled(new VInstCmd)) + } else { None } // Branch status. val branchTaken = Input(Bool()) @@ -223,7 +225,7 @@ val decodeEn = io.inst.valid && io.inst.ready && !io.branchTaken // The decode logic. - val d = DecodeInstruction(pipeline, io.inst.addr, io.inst.inst) + val d = DecodeInstruction(p, pipeline, io.inst.addr, io.inst.inst) val vldst = d.vld || d.vst val vldst_wb = vldst && io.inst.inst(28) @@ -236,7 +238,9 @@ val isCsrImm = d.isCsr() && io.inst.inst(14) val isCsrReg = d.isCsr() && !io.inst.inst(14) - val isVIop = (io.vinst.bits.op === VInstOp.VIOP) + val isVIop = if (p.enableVector) { + io.vinst.get.bits.op === VInstOp.VIOP + } else { false.B } val isVIopVs1 = isVIop val isVIopVs2 = isVIop && io.inst.inst(1,0) === 0.U // exclude: .vv @@ -267,8 +271,10 @@ // Vector extension interlock. - val vinstEn = !(io.serializeIn.vinst || isVIop && io.serializeIn.brcond) && - !(d.isVector() && !io.vinst.ready) + val vinstEn = if (p.enableVector) { + !(io.serializeIn.vinst || isVIop && io.serializeIn.brcond) && + !(d.isVector() && !io.vinst.get.ready) + } else { false.B } // Fence interlock. // Input mactive used passthrough, prefer to avoid registers in Decode. @@ -395,10 +401,12 @@ d.getvl -> MakeValid(true.B, VInstOp.GETVL), d.getmaxvl -> MakeValid(true.B, VInstOp.GETMAXVL), )) - io.vinst.valid := decodeEn && vinst.valid - io.vinst.bits.addr := rdAddr - io.vinst.bits.inst := io.inst.inst - io.vinst.bits.op := vinst.bits + if (p.enableVector) { + io.vinst.get.valid := decodeEn && vinst.valid + io.vinst.get.bits.addr := rdAddr + io.vinst.get.bits.inst := io.inst.inst + io.vinst.get.bits.op := vinst.bits + } // Scalar logging. io.slog := decodeEn && d.slog @@ -476,7 +484,7 @@ } object DecodeInstruction { - def apply(pipeline: Int, addr: UInt, op: UInt): DecodedInstruction = { + def apply(p: Parameters, pipeline: Int, addr: UInt, op: UInt): DecodedInstruction = { val d = Wire(new DecodedInstruction) // Immediates @@ -557,27 +565,35 @@ // Decode scalar log. val slog = DecodeBits(op, "01111_00_00000_xxxxx_0xx_00000_11101_11") - // Vector length. - d.getvl := DecodeBits(op, "0001x_xx_xxxxx_xxxxx_000_xxxxx_11101_11") && op(26,25) =/= 3.U && (op(24,20) =/= 0.U || op(19,15) =/= 0.U) - d.getmaxvl := DecodeBits(op, "0001x_xx_00000_00000_000_xxxxx_11101_11") && op(26,25) =/= 3.U + if (p.enableVector) { + // Vector length. + d.getvl := DecodeBits(op, "0001x_xx_xxxxx_xxxxx_000_xxxxx_11101_11") && op(26,25) =/= 3.U && (op(24,20) =/= 0.U || op(19,15) =/= 0.U) + d.getmaxvl := DecodeBits(op, "0001x_xx_00000_00000_000_xxxxx_11101_11") && op(26,25) =/= 3.U - // Vector load/store. - d.vld := DecodeBits(op, "000xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") // vld + // Vector load/store. + d.vld := DecodeBits(op, "000xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") // vld - d.vst := DecodeBits(op, "001xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") || // vst - DecodeBits(op, "011xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") // vstq + d.vst := DecodeBits(op, "001xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") || // vst + DecodeBits(op, "011xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") // vstq - // Convolution transfer accumulators to vregs. Also decodes acset/actr ops. - val vconv = DecodeBits(op, "010100_000000_000000_xx_xxxxxx_x_111_11") + // Convolution transfer accumulators to vregs. Also decodes acset/actr ops. + val vconv = DecodeBits(op, "010100_000000_000000_xx_xxxxxx_x_111_11") - // Duplicate - val vdup = DecodeBits(op, "01000x_0xxxxx_000000_xx_xxxxxx_x_111_11") && op(13,12) <= 2.U - val vdupi = vdup && op(26) === 0.U + // Duplicate + val vdup = DecodeBits(op, "01000x_0xxxxx_000000_xx_xxxxxx_x_111_11") && op(13,12) <= 2.U + val vdupi = vdup && op(26) === 0.U - // Vector instructions. - d.viop := op(0) === 0.U || // .vv .vx - op(1,0) === 1.U || // .vvv .vxv - vconv || vdupi + // Vector instructions. + d.viop := op(0) === 0.U || // .vv .vx + op(1,0) === 1.U || // .vvv .vxv + vconv || vdupi + } else { + d.getvl := false.B + d.getmaxvl := false.B + d.vld := false.B + d.vst := false.B + d.viop := false.B + } // [extensions] Core controls. d.ebreak := DecodeBits(op, "000000000001_00000_000_00000_11100_11")
diff --git a/hdl/chisel/src/kelvin/scalar/SCore.scala b/hdl/chisel/src/kelvin/scalar/SCore.scala index 0c51a55..6c9c47e 100644 --- a/hdl/chisel/src/kelvin/scalar/SCore.scala +++ b/hdl/chisel/src/kelvin/scalar/SCore.scala
@@ -36,9 +36,11 @@ val ibus = new IBusIO(p) val dbus = new DBusIO(p) val ubus = new DBusIO(p) - val vldst = Output(Bool()) - val vcore = Flipped(new VCoreIO(p)) + val vldst = if (p.enableVector) { Some(Output(Bool())) } else { None } + val vcore = if (p.enableVector) { + Some(Flipped(new VCoreIO(p))) + } else { None } val iflush = new IFlushIO(p) val dflush = new DFlushIO(p) @@ -127,7 +129,7 @@ decode(i).io.scoreboard.regd := regfile.io.scoreboard.regd | scoreboard_spec(i) } - decode(0).io.mactive := io.vcore.mactive + decode(0).io.mactive := (if (p.enableVector) { io.vcore.get.mactive } else { false.B }) for (i <- 1 until p.instructionLanes) { decode(i).io.mactive := false.B } @@ -160,8 +162,10 @@ csr.io.counters.rfwriteCount := regfile.io.rfwriteCount csr.io.counters.storeCount := lsu.io.storeCount csr.io.counters.branchCount := bru(0).io.taken.valid - csr.io.counters.vrfwriteCount := io.vcore.vrfwriteCount - csr.io.counters.vstoreCount := io.vcore.vstoreCount + if (p.enableVector) { + csr.io.counters.vrfwriteCount.get := io.vcore.get.vrfwriteCount + csr.io.counters.vstoreCount.get := io.vcore.get.vstoreCount + } // --------------------------------------------------------------------------- // Control Status Unit @@ -170,7 +174,9 @@ csr.io.req <> decode(0).io.csr csr.io.rs1 := regfile.io.readData(0) - csr.io.vcore.undef := io.vcore.undef + if (p.enableVector) { + csr.io.vcore.get.undef := io.vcore.get.undef + } // --------------------------------------------------------------------------- // Status @@ -219,23 +225,35 @@ regfile.io.writeData(i).valid := csr0Valid || alu(i).io.rd.valid || bru(i).io.rd.valid || - io.vcore.rd(i).valid + (if (p.enableVector) { + io.vcore.get.rd(i).valid + } else { false.B }) regfile.io.writeData(i).addr := MuxOR(csr0Valid, csr0Addr) | MuxOR(alu(i).io.rd.valid, alu(i).io.rd.addr) | MuxOR(bru(i).io.rd.valid, bru(i).io.rd.addr) | - MuxOR(io.vcore.rd(i).valid, io.vcore.rd(i).addr) + (if (p.enableVector) { + MuxOR(io.vcore.get.rd(i).valid, io.vcore.get.rd(i).addr) + } else { false.B }) + regfile.io.writeData(i).data := MuxOR(csr0Valid, csr0Data) | MuxOR(alu(i).io.rd.valid, alu(i).io.rd.data) | MuxOR(bru(i).io.rd.valid, bru(i).io.rd.data) | - MuxOR(io.vcore.rd(i).valid, io.vcore.rd(i).data) + (if (p.enableVector) { + MuxOR(io.vcore.get.rd(i).valid, io.vcore.get.rd(i).data) + } else { false.B }) - assert((csr0Valid +& - alu(i).io.rd.valid +& bru(i).io.rd.valid +& - io.vcore.rd(i).valid) <= 1.U) + if (p.enableVector) { + assert((csr0Valid +& + alu(i).io.rd.valid +& bru(i).io.rd.valid +& + io.vcore.get.rd(i).valid) <= 1.U) + } else { + assert((csr0Valid +& + alu(i).io.rd.valid +& bru(i).io.rd.valid) <= 1.U) + } } val mluDvuOffset = p.instructionLanes @@ -256,12 +274,9 @@ // --------------------------------------------------------------------------- // Vector Extension - for (i <- 0 until p.instructionLanes) { - io.vcore.vinst(i) <> decode(i).io.vinst - } - - for (i <- 0 until p.instructionLanes * 2) { - io.vcore.rs(i) := regfile.io.readData(i) + if (p.enableVector) { + io.vcore.get.vinst <> decode.map(_.io.vinst.get) + io.vcore.get.rs := regfile.io.readData } // --------------------------------------------------------------------------- @@ -273,7 +288,9 @@ io.dbus <> lsu.io.dbus io.ubus <> lsu.io.ubus - io.vldst := lsu.io.vldst + if (p.enableVector) { + io.vldst.get := lsu.io.vldst + } // --------------------------------------------------------------------------- // Scalar logging interface
diff --git a/tests/verilator_sim/kelvin/core_tb.cc b/tests/verilator_sim/kelvin/core_tb.cc index 73396ab..9fff817 100644 --- a/tests/verilator_sim/kelvin/core_tb.cc +++ b/tests/verilator_sim/kelvin/core_tb.cc
@@ -98,6 +98,7 @@ sc_signal<sc_bv<32> > io_debug_inst2; sc_signal<sc_bv<32> > io_debug_inst3; sc_signal<sc_bv<32> > io_debug_cycles; +#if 1 sc_signal<bool> io_axi0_write_addr_ready; sc_signal<bool> io_axi0_write_addr_valid; sc_signal<sc_bv<32> > io_axi0_write_addr_bits_addr; @@ -119,6 +120,7 @@ sc_signal<sc_bv<2> > io_axi0_read_data_bits_resp; sc_signal<sc_bv<kUncId> > io_axi0_read_data_bits_id; sc_signal<sc_bv<kUncBits> > io_axi0_read_data_bits_data; +#endif // TODO: Disable if no VCore sc_signal<bool> io_axi1_write_addr_ready; sc_signal<bool> io_axi1_write_addr_valid; sc_signal<sc_bv<32> > io_axi1_write_addr_bits_addr; @@ -230,6 +232,7 @@ #define BINDAXI(a) \ core.a(a); \ mif.a(a) +#if 1 BINDAXI(io_axi0_write_addr_ready); BINDAXI(io_axi0_write_addr_valid); BINDAXI(io_axi0_write_addr_bits_addr); @@ -251,6 +254,7 @@ BINDAXI(io_axi0_read_data_bits_resp); BINDAXI(io_axi0_read_data_bits_id); BINDAXI(io_axi0_read_data_bits_data); +#endif // TODO: Disable if no VCore BINDAXI(io_axi1_write_addr_ready); BINDAXI(io_axi1_write_addr_valid); BINDAXI(io_axi1_write_addr_bits_addr);