[Reland] Add option to disable VCore.

Change-Id: Ife4f7c63e0110ead0e34d55e7d562375b37652fa
diff --git a/hdl/chisel/src/kelvin/Core.scala b/hdl/chisel/src/kelvin/Core.scala
index 100078c..72b1c03 100644
--- a/hdl/chisel/src/kelvin/Core.scala
+++ b/hdl/chisel/src/kelvin/Core.scala
@@ -34,7 +34,9 @@
 
     val ibus = new IBusIO(p)
     val dbus = new DBusIO(p)
-    val axi0 = new AxiMasterIO(p.axi2AddrBits, p.axi2DataBits, p.axi2IdBits)
+    val axi0 = if(p.enableVector) {
+      Some(new AxiMasterIO(p.axi2AddrBits, p.axi2DataBits, p.axi2IdBits))
+    } else { None }
     val axi1 = new AxiMasterIO(p.axi2AddrBits, p.axi2DataBits, p.axi2IdBits)
 
     val iflush = new IFlushIO(p)
@@ -45,8 +47,7 @@
   })
 
   val score = SCore(p)
-  val vcore = VCore(p)
-  val dbusmux = DBusMux(p)
+  val vcore = if (p.enableVector) { Some(VCore(p)) } else { None }
 
   // ---------------------------------------------------------------------------
   // Scalar Core outputs.
@@ -61,17 +62,22 @@
 
   // ---------------------------------------------------------------------------
   // Vector core.
-  score.io.vcore <> vcore.io.score
+  if (p.enableVector) {
+    score.io.vcore.get <> vcore.get.io.score
+  }
 
   // ---------------------------------------------------------------------------
   // Local Data Bus Port
-  dbusmux.io.vldst := score.io.vldst
-  dbusmux.io.vlast := vcore.io.last
-
-  dbusmux.io.vcore <> vcore.io.dbus
-  dbusmux.io.score <> score.io.dbus
-
-  io.dbus <> dbusmux.io.dbus
+  if (p.enableVector) {
+    val dbusmux = DBusMux(p)
+    dbusmux.io.vldst := score.io.vldst.get
+    dbusmux.io.vlast := vcore.get.io.last
+    dbusmux.io.vcore <> vcore.get.io.dbus
+    dbusmux.io.score <> score.io.dbus
+    io.dbus <> dbusmux.io.dbus
+  } else {
+    io.dbus <> score.io.dbus
+  }
 
   // ---------------------------------------------------------------------------
   // Scalar DBus to AXI.
@@ -80,8 +86,10 @@
 
   // ---------------------------------------------------------------------------
   // AXI ports.
-  io.axi0.read  <> vcore.io.ld
-  io.axi0.write <> vcore.io.st
+  if (p.enableVector) {
+    io.axi0.get.read  <> vcore.get.io.ld
+    io.axi0.get.write <> vcore.get.io.st
+  }
 
   io.axi1 <> dbus2axi.io.axi
 }
diff --git a/hdl/chisel/src/kelvin/Parameters.scala b/hdl/chisel/src/kelvin/Parameters.scala
index 3546a74..9e0efc4 100644
--- a/hdl/chisel/src/kelvin/Parameters.scala
+++ b/hdl/chisel/src/kelvin/Parameters.scala
@@ -56,6 +56,8 @@
 
   val vectorCountBits = log2Ceil(vectorBits / 8) + 1 + 2  // +2 stripmine
 
+  // Enable Vector
+  val enableVector = true
   val vectorAluCount = 2
   val vectorReadPorts = (vectorAluCount * 3) + 1
   val vectorWritePorts = 6
@@ -112,12 +114,16 @@
 
   println("#ifndef KELVIN_PARAMETERS_H_")
   println("#define KELVIN_PARAMETERS_H_")
+  println("")
+  println("#include <stdbool.h>")
+  println("")
   fields.foreach { x =>
     val fieldMirror = instanceMirror.reflectField(x.asTerm)
     val fieldType = x.asTerm.typeSignature
     val value = fieldMirror.get
     val ctype = fieldType match {
       case t if t =:= ru.typeOf[Int] => Some("int")
+      case t if t =:= ru.typeOf[Boolean] => Some("bool")
       case _ => None
     }
     if (ctype != None) {
diff --git a/hdl/chisel/src/kelvin/scalar/Csr.scala b/hdl/chisel/src/kelvin/scalar/Csr.scala
index 18202d3..6db1af5 100644
--- a/hdl/chisel/src/kelvin/scalar/Csr.scala
+++ b/hdl/chisel/src/kelvin/scalar/Csr.scala
@@ -47,8 +47,12 @@
   val rfwriteCount = UInt(3.W)
   val storeCount = UInt(2.W)
   val branchCount = UInt(1.W)
-  val vrfwriteCount = UInt(3.W)
-  val vstoreCount = UInt(2.W)
+  val vrfwriteCount = if (p.enableVector) {
+    Some(UInt(3.W))
+  } else { None }
+  val vstoreCount = if (p.enableVector) {
+    Some(UInt(2.W))
+  } else { None }
 }
 
 class CsrBruIO(p: Parameters) extends Bundle {
@@ -92,7 +96,9 @@
     val bru = Flipped(new CsrBruIO(p))
 
     // Vector core.
-    val vcore = Input(new Bundle { val undef = Bool() })
+    val vcore = (if (p.enableVector) {
+      Some(Input(new Bundle { val undef = Bool() }))
+    } else { None })
 
     val counters = Input(new CsrCounters(p))
 
@@ -187,11 +193,12 @@
   val kisaEn      = req.bits.index === 0xFC0.U
 
   // Pipeline Control.
-  when (io.bru.in.halt || io.vcore.undef) {
+  val vcoreUndef = if (p.enableVector) { io.vcore.get.undef } else { false.B }
+  when (io.bru.in.halt || vcoreUndef) {
     halted := true.B
   }
 
-  when (io.bru.in.fault || io.vcore.undef) {
+  when (io.bru.in.fault || vcoreUndef) {
     fault := true.B
   }
 
@@ -280,8 +287,10 @@
     io.counters.rfwriteCount +
     io.counters.storeCount +
     io.counters.branchCount +
-    io.counters.vrfwriteCount +
-    io.counters.vstoreCount
+    (if (p.enableVector) {
+      io.counters.vrfwriteCount.get +
+      io.counters.vstoreCount.get
+    } else { 0.U })
 
   when (io.bru.in.mode.valid) {
     mode := io.bru.in.mode.bits
diff --git a/hdl/chisel/src/kelvin/scalar/Decode.scala b/hdl/chisel/src/kelvin/scalar/Decode.scala
index 37a0c86..206c516 100644
--- a/hdl/chisel/src/kelvin/scalar/Decode.scala
+++ b/hdl/chisel/src/kelvin/scalar/Decode.scala
@@ -206,7 +206,9 @@
     val dvu = Decoupled(new DvuCmd)
 
     // Vector interface.
-    val vinst = Decoupled(new VInstCmd)
+    val vinst = if (p.enableVector) {
+      Some(Decoupled(new VInstCmd))
+    } else { None }
 
     // Branch status.
     val branchTaken = Input(Bool())
@@ -223,7 +225,7 @@
   val decodeEn = io.inst.valid && io.inst.ready && !io.branchTaken
 
   // The decode logic.
-  val d = DecodeInstruction(pipeline, io.inst.addr, io.inst.inst)
+  val d = DecodeInstruction(p, pipeline, io.inst.addr, io.inst.inst)
 
   val vldst = d.vld || d.vst
   val vldst_wb = vldst && io.inst.inst(28)
@@ -236,7 +238,9 @@
   val isCsrImm = d.isCsr() &&  io.inst.inst(14)
   val isCsrReg = d.isCsr() && !io.inst.inst(14)
 
-  val isVIop = (io.vinst.bits.op === VInstOp.VIOP)
+  val isVIop = if (p.enableVector) {
+    io.vinst.get.bits.op === VInstOp.VIOP
+  } else { false.B }
 
   val isVIopVs1 = isVIop
   val isVIopVs2 = isVIop && io.inst.inst(1,0) === 0.U  // exclude: .vv
@@ -267,8 +271,10 @@
 
 
   // Vector extension interlock.
-  val vinstEn = !(io.serializeIn.vinst || isVIop && io.serializeIn.brcond) &&
-                !(d.isVector() && !io.vinst.ready)
+  val vinstEn = if (p.enableVector) {
+      !(io.serializeIn.vinst || isVIop && io.serializeIn.brcond) &&
+      !(d.isVector() && !io.vinst.get.ready)
+  } else { false.B }
 
   // Fence interlock.
   // Input mactive used passthrough, prefer to avoid registers in Decode.
@@ -395,10 +401,12 @@
     d.getvl    -> MakeValid(true.B, VInstOp.GETVL),
     d.getmaxvl -> MakeValid(true.B, VInstOp.GETMAXVL),
   ))
-  io.vinst.valid := decodeEn && vinst.valid
-  io.vinst.bits.addr := rdAddr
-  io.vinst.bits.inst := io.inst.inst
-  io.vinst.bits.op := vinst.bits
+  if (p.enableVector) {
+    io.vinst.get.valid := decodeEn && vinst.valid
+    io.vinst.get.bits.addr := rdAddr
+    io.vinst.get.bits.inst := io.inst.inst
+    io.vinst.get.bits.op := vinst.bits
+  }
 
   // Scalar logging.
   io.slog := decodeEn && d.slog
@@ -476,7 +484,7 @@
 }
 
 object DecodeInstruction {
-  def apply(pipeline: Int, addr: UInt, op: UInt): DecodedInstruction = {
+  def apply(p: Parameters, pipeline: Int, addr: UInt, op: UInt): DecodedInstruction = {
     val d = Wire(new DecodedInstruction)
 
     // Immediates
@@ -557,27 +565,35 @@
     // Decode scalar log.
     val slog = DecodeBits(op, "01111_00_00000_xxxxx_0xx_00000_11101_11")
 
-    // Vector length.
-    d.getvl    := DecodeBits(op, "0001x_xx_xxxxx_xxxxx_000_xxxxx_11101_11") && op(26,25) =/= 3.U && (op(24,20) =/= 0.U || op(19,15) =/= 0.U)
-    d.getmaxvl := DecodeBits(op, "0001x_xx_00000_00000_000_xxxxx_11101_11") && op(26,25) =/= 3.U
+    if (p.enableVector) {
+      // Vector length.
+      d.getvl    := DecodeBits(op, "0001x_xx_xxxxx_xxxxx_000_xxxxx_11101_11") && op(26,25) =/= 3.U && (op(24,20) =/= 0.U || op(19,15) =/= 0.U)
+      d.getmaxvl := DecodeBits(op, "0001x_xx_00000_00000_000_xxxxx_11101_11") && op(26,25) =/= 3.U
 
-    // Vector load/store.
-    d.vld := DecodeBits(op, "000xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11")     // vld
+      // Vector load/store.
+      d.vld := DecodeBits(op, "000xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11")     // vld
 
-    d.vst := DecodeBits(op, "001xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") ||  // vst
-             DecodeBits(op, "011xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11")     // vstq
+      d.vst := DecodeBits(op, "001xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11") ||  // vst
+               DecodeBits(op, "011xxx_0xxxxx_xxxxx0_xx_xxxxxx_x_111_11")     // vstq
 
-    // Convolution transfer accumulators to vregs. Also decodes acset/actr ops.
-    val vconv = DecodeBits(op, "010100_000000_000000_xx_xxxxxx_x_111_11")
+      // Convolution transfer accumulators to vregs. Also decodes acset/actr ops.
+      val vconv = DecodeBits(op, "010100_000000_000000_xx_xxxxxx_x_111_11")
 
-    // Duplicate
-    val vdup = DecodeBits(op, "01000x_0xxxxx_000000_xx_xxxxxx_x_111_11") && op(13,12) <= 2.U
-    val vdupi = vdup && op(26) === 0.U
+      // Duplicate
+      val vdup = DecodeBits(op, "01000x_0xxxxx_000000_xx_xxxxxx_x_111_11") && op(13,12) <= 2.U
+      val vdupi = vdup && op(26) === 0.U
 
-    // Vector instructions.
-    d.viop := op(0) === 0.U ||     // .vv .vx
-              op(1,0) === 1.U ||  // .vvv .vxv
-              vconv || vdupi
+      // Vector instructions.
+      d.viop := op(0) === 0.U ||     // .vv .vx
+                op(1,0) === 1.U ||  // .vvv .vxv
+                vconv || vdupi
+    } else {
+      d.getvl    := false.B
+      d.getmaxvl := false.B
+      d.vld      := false.B
+      d.vst      := false.B
+      d.viop     := false.B
+    }
 
     // [extensions] Core controls.
     d.ebreak := DecodeBits(op, "000000000001_00000_000_00000_11100_11")
diff --git a/hdl/chisel/src/kelvin/scalar/SCore.scala b/hdl/chisel/src/kelvin/scalar/SCore.scala
index 0c51a55..03c8dd7 100644
--- a/hdl/chisel/src/kelvin/scalar/SCore.scala
+++ b/hdl/chisel/src/kelvin/scalar/SCore.scala
@@ -36,9 +36,11 @@
     val ibus = new IBusIO(p)
     val dbus = new DBusIO(p)
     val ubus = new DBusIO(p)
-    val vldst = Output(Bool())
 
-    val vcore = Flipped(new VCoreIO(p))
+    val vldst = if (p.enableVector) { Some(Output(Bool())) } else { None }
+    val vcore = if (p.enableVector) {
+        Some(Flipped(new VCoreIO(p)))
+    } else { None }
 
     val iflush = new IFlushIO(p)
     val dflush = new DFlushIO(p)
@@ -127,7 +129,7 @@
     decode(i).io.scoreboard.regd := regfile.io.scoreboard.regd | scoreboard_spec(i)
   }
 
-  decode(0).io.mactive := io.vcore.mactive
+  decode(0).io.mactive := (if (p.enableVector) { io.vcore.get.mactive } else { false.B })
   for (i <- 1 until p.instructionLanes) {
     decode(i).io.mactive := false.B
   }
@@ -160,8 +162,10 @@
   csr.io.counters.rfwriteCount := regfile.io.rfwriteCount
   csr.io.counters.storeCount := lsu.io.storeCount
   csr.io.counters.branchCount := bru(0).io.taken.valid
-  csr.io.counters.vrfwriteCount := io.vcore.vrfwriteCount
-  csr.io.counters.vstoreCount := io.vcore.vstoreCount
+  if (p.enableVector) {
+    csr.io.counters.vrfwriteCount.get := io.vcore.get.vrfwriteCount
+    csr.io.counters.vstoreCount.get := io.vcore.get.vstoreCount
+  }
 
   // ---------------------------------------------------------------------------
   // Control Status Unit
@@ -170,7 +174,9 @@
   csr.io.req <> decode(0).io.csr
   csr.io.rs1 := regfile.io.readData(0)
 
-  csr.io.vcore.undef := io.vcore.undef
+  if (p.enableVector) {
+    csr.io.vcore.get.undef := io.vcore.get.undef
+  }
 
   // ---------------------------------------------------------------------------
   // Status
@@ -219,23 +225,34 @@
 
     regfile.io.writeData(i).valid := csr0Valid ||
                                      alu(i).io.rd.valid || bru(i).io.rd.valid ||
-                                     io.vcore.rd(i).valid
+                                     (if (p.enableVector) {
+                                        io.vcore.get.rd(i).valid
+                                      } else { false.B })
 
     regfile.io.writeData(i).addr :=
         MuxOR(csr0Valid, csr0Addr) |
         MuxOR(alu(i).io.rd.valid, alu(i).io.rd.addr) |
         MuxOR(bru(i).io.rd.valid, bru(i).io.rd.addr) |
-        MuxOR(io.vcore.rd(i).valid, io.vcore.rd(i).addr)
+        (if (p.enableVector) {
+           MuxOR(io.vcore.get.rd(i).valid, io.vcore.get.rd(i).addr)
+         } else { false.B })
 
     regfile.io.writeData(i).data :=
         MuxOR(csr0Valid, csr0Data) |
         MuxOR(alu(i).io.rd.valid, alu(i).io.rd.data) |
         MuxOR(bru(i).io.rd.valid, bru(i).io.rd.data) |
-        MuxOR(io.vcore.rd(i).valid, io.vcore.rd(i).data)
+        (if (p.enableVector) {
+           MuxOR(io.vcore.get.rd(i).valid, io.vcore.get.rd(i).data)
+         } else { false.B })
 
-    assert((csr0Valid +&
-            alu(i).io.rd.valid +& bru(i).io.rd.valid +&
-            io.vcore.rd(i).valid) <= 1.U)
+    if (p.enableVector) {
+      assert((csr0Valid +&
+              alu(i).io.rd.valid +& bru(i).io.rd.valid +&
+              io.vcore.get.rd(i).valid) <= 1.U)
+    } else {
+      assert((csr0Valid +&
+              alu(i).io.rd.valid +& bru(i).io.rd.valid) <= 1.U)
+    }
   }
 
   val mluDvuOffset = p.instructionLanes
@@ -256,12 +273,9 @@
 
   // ---------------------------------------------------------------------------
   // Vector Extension
-  for (i <- 0 until p.instructionLanes) {
-    io.vcore.vinst(i) <> decode(i).io.vinst
-  }
-
-  for (i <- 0 until p.instructionLanes * 2) {
-    io.vcore.rs(i) := regfile.io.readData(i)
+  if (p.enableVector) {
+    io.vcore.get.vinst <> decode.map(_.io.vinst.get)
+    io.vcore.get.rs := regfile.io.readData
   }
 
   // ---------------------------------------------------------------------------
@@ -273,7 +287,9 @@
   io.dbus <> lsu.io.dbus
   io.ubus <> lsu.io.ubus
 
-  io.vldst := lsu.io.vldst
+  if (p.enableVector) {
+    io.vldst.get := lsu.io.vldst
+  }
 
   // ---------------------------------------------------------------------------
   // Scalar logging interface
diff --git a/hdl/chisel/src/matcha/Kelvin.scala b/hdl/chisel/src/matcha/Kelvin.scala
index 5fd07aa..ad5a386 100644
--- a/hdl/chisel/src/matcha/Kelvin.scala
+++ b/hdl/chisel/src/matcha/Kelvin.scala
@@ -120,7 +120,9 @@
 
     // -------------------------------------------------------------------------
     // Bus Mux.
-    bus.io.in0 <> core.io.axi0
+    if (p.enableVector) {
+      bus.io.in0 <> core.io.axi0.get
+    }
     bus.io.in1 <> core.io.axi1
     bus.io.in2 <> l1d.io.axi
     bus.io.in3.read <> l1i.io.axi.read
diff --git a/tests/verilator_sim/kelvin/core_tb.cc b/tests/verilator_sim/kelvin/core_tb.cc
index 140d0d5..45d2bc4 100644
--- a/tests/verilator_sim/kelvin/core_tb.cc
+++ b/tests/verilator_sim/kelvin/core_tb.cc
@@ -92,6 +92,7 @@
   sc_signal<sc_bv<32> > io_slog_data;
   sc_signal<sc_bv<4> > io_debug_en;
   sc_signal<sc_bv<32> > io_debug_cycles;
+#if KP_enableVector
   sc_signal<bool> io_axi0_write_addr_ready;
   sc_signal<bool> io_axi0_write_addr_valid;
   sc_signal<sc_bv<32> > io_axi0_write_addr_bits_addr;
@@ -113,6 +114,7 @@
   sc_signal<sc_bv<2> > io_axi0_read_data_bits_resp;
   sc_signal<sc_bv<kUncId> > io_axi0_read_data_bits_id;
   sc_signal<sc_bv<kUncBits> > io_axi0_read_data_bits_data;
+#endif  // KP_enableVector
   sc_signal<bool> io_axi1_write_addr_ready;
   sc_signal<bool> io_axi1_write_addr_valid;
   sc_signal<sc_bv<32> > io_axi1_write_addr_bits_addr;
@@ -228,6 +230,7 @@
 #define BINDAXI(a) \
   core.a(a);       \
   mif.a(a)
+#if KP_enableVector
   BINDAXI(io_axi0_write_addr_ready);
   BINDAXI(io_axi0_write_addr_valid);
   BINDAXI(io_axi0_write_addr_bits_addr);
@@ -249,6 +252,7 @@
   BINDAXI(io_axi0_read_data_bits_resp);
   BINDAXI(io_axi0_read_data_bits_id);
   BINDAXI(io_axi0_read_data_bits_data);
+#endif  // KP_enableVector
   BINDAXI(io_axi1_write_addr_ready);
   BINDAXI(io_axi1_write_addr_valid);
   BINDAXI(io_axi1_write_addr_bits_addr);