More Kelvin clean ups.

Change-Id: Iedda0ea86791bf5e0195afcaa1e105961160166d
diff --git a/hdl/chisel/src/common/Fifo4.scala b/hdl/chisel/src/common/Fifo4.scala
index a89ab0f..a01963f 100644
--- a/hdl/chisel/src/common/Fifo4.scala
+++ b/hdl/chisel/src/common/Fifo4.scala
@@ -89,8 +89,7 @@
   val iactive = Cat(io.in.bits(3).valid, io.in.bits(2).valid,
                     io.in.bits(1).valid, io.in.bits(0).valid).asUInt
 
-  val icount = io.in.bits(0).valid +& io.in.bits(1).valid +
-               io.in.bits(2).valid +& io.in.bits(3).valid
+  val icount = PopCount(iactive)
 
   // ---------------------------------------------------------------------------
   // Fifo Control.
diff --git a/hdl/chisel/src/common/Fifo4e.scala b/hdl/chisel/src/common/Fifo4e.scala
index 0d8a1cd..392e7ee 100644
--- a/hdl/chisel/src/common/Fifo4e.scala
+++ b/hdl/chisel/src/common/Fifo4e.scala
@@ -60,8 +60,7 @@
   val iactive = Cat(io.in.bits(3).valid, io.in.bits(2).valid,
                     io.in.bits(1).valid, io.in.bits(0).valid).asUInt
 
-  val icount = io.in.bits(0).valid +& io.in.bits(1).valid +
-               io.in.bits(2).valid +& io.in.bits(3).valid
+  val icount = PopCount(iactive)
 
   // ---------------------------------------------------------------------------
   // Fifo Control.
diff --git a/hdl/chisel/src/kelvin/BUILD b/hdl/chisel/src/kelvin/BUILD
index a6a2085..b206de7 100644
--- a/hdl/chisel/src/kelvin/BUILD
+++ b/hdl/chisel/src/kelvin/BUILD
@@ -65,8 +65,6 @@
     ],
 )
 
-
-
 chisel_cc_library(
     name = "core_cc_library",
     chisel_lib = ":kelvin",
diff --git a/hdl/chisel/src/kelvin/scalar/Fetch.scala b/hdl/chisel/src/kelvin/scalar/Fetch.scala
index 073f62a..cb21c9d 100644
--- a/hdl/chisel/src/kelvin/scalar/Fetch.scala
+++ b/hdl/chisel/src/kelvin/scalar/Fetch.scala
@@ -265,40 +265,10 @@
                  fetchEn(0) && !fetchEn(1) && !fetchEn(2) && !fetchEn(3),
                  !fetchEn(0) && !fetchEn(1) && !fetchEn(2) && !fetchEn(3))
 
-  val nxtInstAddr0 = instAddr(0)          // 0
-  val nxtInstAddr1 = instAddr(1)          // 4
-  val nxtInstAddr2 = instAddr(2)          // 8
-  val nxtInstAddr3 = instAddr(3)          // 12
-  val nxtInstAddr4 = instAddr(0) + 16.U   // 16
-  val nxtInstAddr5 = instAddr(1) + 16.U   // 20
-  val nxtInstAddr6 = instAddr(2) + 16.U   // 24
-  val nxtInstAddr7 = instAddr(3) + 16.U   // 28
-
-  val nxtInstAddr = Wire(Vec(4, UInt(p.instructionBits.W)))
-
-  nxtInstAddr(0) := Mux(fsel(4), nxtInstAddr4, 0.U) |
-                    Mux(fsel(3), nxtInstAddr3, 0.U) |
-                    Mux(fsel(2), nxtInstAddr2, 0.U) |
-                    Mux(fsel(1), nxtInstAddr1, 0.U) |
-                    Mux(fsel(0), nxtInstAddr0, 0.U)
-
-  nxtInstAddr(1) := Mux(fsel(4), nxtInstAddr5, 0.U) |
-                    Mux(fsel(3), nxtInstAddr4, 0.U) |
-                    Mux(fsel(2), nxtInstAddr3, 0.U) |
-                    Mux(fsel(1), nxtInstAddr2, 0.U) |
-                    Mux(fsel(0), nxtInstAddr1, 0.U)
-
-  nxtInstAddr(2) := Mux(fsel(4), nxtInstAddr6, 0.U) |
-                    Mux(fsel(3), nxtInstAddr5, 0.U) |
-                    Mux(fsel(2), nxtInstAddr4, 0.U) |
-                    Mux(fsel(1), nxtInstAddr3, 0.U) |
-                    Mux(fsel(0), nxtInstAddr2, 0.U)
-
-  nxtInstAddr(3) := Mux(fsel(4), nxtInstAddr7, 0.U) |
-                    Mux(fsel(3), nxtInstAddr6, 0.U) |
-                    Mux(fsel(2), nxtInstAddr5, 0.U) |
-                    Mux(fsel(1), nxtInstAddr4, 0.U) |
-                    Mux(fsel(0), nxtInstAddr3, 0.U)
+  val nxtInstAddrOffset = instAddr.map(x => x) ++ instAddr.map(x => x + 16.U)
+  val nxtInstAddr = (0 until 4).map(i =>
+      (0 until 5).map(
+          j => MuxOR(fsel(j), nxtInstAddrOffset(j + i))).reduce(_|_))
 
   val nxtInstIndex0 = nxtInstAddr(0)(indexMsb, indexLsb)
   val nxtInstIndex1 = nxtInstAddr(3)(indexMsb, indexLsb)
diff --git a/hdl/chisel/src/kelvin/scalar/Lsu.scala b/hdl/chisel/src/kelvin/scalar/Lsu.scala
index 65a1751..520693b 100644
--- a/hdl/chisel/src/kelvin/scalar/Lsu.scala
+++ b/hdl/chisel/src/kelvin/scalar/Lsu.scala
@@ -124,10 +124,9 @@
                       ctrl.io.count <= (n - 2).U,
                       ctrl.io.count <= (n - 1).U)
 
-  io.req(0).ready := ctrlready(0) && data.io.in.ready
-  io.req(1).ready := ctrlready(1) && data.io.in.ready
-  io.req(2).ready := ctrlready(2) && data.io.in.ready
-  io.req(3).ready := ctrlready(3) && data.io.in.ready
+  for (i <- 0 until 4) {
+    io.req(i).ready := ctrlready(i) && data.io.in.ready
+  }
 
   // Address phase must use simple logic to resolve mask for unaligned address.
   val linebit = log2Ceil(p.lsuDataBits / 8)
@@ -135,8 +134,7 @@
 
   // ---------------------------------------------------------------------------
   // Control Port Inputs.
-  ctrl.io.in.valid := io.req(0).valid || io.req(1).valid ||
-                      io.req(2).valid || io.req(3).valid
+  ctrl.io.in.valid := io.req.map(_.valid).reduce(_||_)
 
   for (i <- 0 until 4) {
     val uncached = io.busPort.addr(i)(31)
diff --git a/hdl/chisel/src/kelvin/scalar/Mlu.scala b/hdl/chisel/src/kelvin/scalar/Mlu.scala
index 94293d7..349104d 100644
--- a/hdl/chisel/src/kelvin/scalar/Mlu.scala
+++ b/hdl/chisel/src/kelvin/scalar/Mlu.scala
@@ -64,8 +64,7 @@
   val addr2 = Reg(UInt(5.W))
   val sel = Reg(UInt(4.W))
 
-  valid1 := io.req(0).valid || io.req(1).valid ||
-            io.req(2).valid || io.req(3).valid
+  valid1 := io.req.map(_.valid).reduce(_||_)
   valid2 := valid1
 
   when (io.req(0).valid) {
diff --git a/hdl/chisel/src/kelvin/scalar/Regfile.scala b/hdl/chisel/src/kelvin/scalar/Regfile.scala
index 6629505..6dfdd00 100644
--- a/hdl/chisel/src/kelvin/scalar/Regfile.scala
+++ b/hdl/chisel/src/kelvin/scalar/Regfile.scala
@@ -160,12 +160,7 @@
                     io.writeData(0).valid && io.writeData(0).addr === i.U &&
                       !io.writeMask(0).valid)
 
-    val data  = MuxOR(valid(0), io.writeData(0).data) |
-                MuxOR(valid(1), io.writeData(1).data) |
-                MuxOR(valid(2), io.writeData(2).data) |
-                MuxOR(valid(3), io.writeData(3).data) |
-                MuxOR(valid(4), io.writeData(4).data) |
-                MuxOR(valid(5), io.writeData(5).data)
+    val data  = (0 until 6).map(x => MuxOR(valid(x), io.writeData(x).data)).reduce(_|_)
 
     writeValid(i) := valid =/= 0.U
     writeData(i)  := data
diff --git a/hdl/chisel/src/kelvin/scalar/SCore.scala b/hdl/chisel/src/kelvin/scalar/SCore.scala
index 8dbdabd..3f0f678 100644
--- a/hdl/chisel/src/kelvin/scalar/SCore.scala
+++ b/hdl/chisel/src/kelvin/scalar/SCore.scala
@@ -59,8 +59,7 @@
   val dvu = Dvu(p)
 
   // Wire up the core.
-  val branchTaken = bru(0).io.taken.valid || bru(1).io.taken.valid ||
-                    bru(2).io.taken.valid || bru(3).io.taken.valid
+  val branchTaken = bru.map(x => x.io.taken.valid).reduce(_||_)
 
   // ---------------------------------------------------------------------------
   // IFlush
@@ -96,11 +95,7 @@
 
   // ---------------------------------------------------------------------------
   // Decode
-  val mask = VecInit(true.B,
-                     decode(0).io.inst.ready,
-                     decode(0).io.inst.ready && decode(1).io.inst.ready,
-                     decode(0).io.inst.ready && decode(1).io.inst.ready &&
-                       decode(2).io.inst.ready)
+  val mask = VecInit(decode.map(_.io.inst.ready).scan(true.B)(_ && _))
 
   for (i <- 0 until 4) {
     decode(i).io.inst.valid := fetch.io.inst.lanes(i).valid && mask(i)
@@ -126,22 +121,11 @@
   decode(3).io.serializeIn := decode(2).io.serializeOut
 
   // In decode update multi-issue scoreboard state.
-  val scoreboard_spec1 = decode(0).io.scoreboard.spec
-  val scoreboard_spec2 = decode(1).io.scoreboard.spec | scoreboard_spec1
-  val scoreboard_spec3 = decode(2).io.scoreboard.spec | scoreboard_spec2
-  assert(scoreboard_spec1.getWidth == 32)
-  assert(scoreboard_spec2.getWidth == 32)
-  assert(scoreboard_spec3.getWidth == 32)
-
-  decode(0).io.scoreboard.comb := regfile.io.scoreboard.comb
-  decode(0).io.scoreboard.regd := regfile.io.scoreboard.regd
-  decode(1).io.scoreboard.comb := regfile.io.scoreboard.comb | scoreboard_spec1
-  decode(1).io.scoreboard.regd := regfile.io.scoreboard.regd | scoreboard_spec1
-  decode(2).io.scoreboard.comb := regfile.io.scoreboard.comb | scoreboard_spec2
-  decode(2).io.scoreboard.regd := regfile.io.scoreboard.regd | scoreboard_spec2
-  decode(3).io.scoreboard.comb := regfile.io.scoreboard.comb | scoreboard_spec3
-  decode(3).io.scoreboard.regd := regfile.io.scoreboard.regd | scoreboard_spec3
-
+  val scoreboard_spec = decode.map(_.io.scoreboard.spec).scan(0.U)(_|_)
+  for (i <- 0 until 4) {
+    decode(i).io.scoreboard.comb := regfile.io.scoreboard.comb | scoreboard_spec(i)
+    decode(i).io.scoreboard.regd := regfile.io.scoreboard.regd | scoreboard_spec(i)
+  }
 
   decode(0).io.mactive := io.vcore.mactive
   decode(1).io.mactive := false.B
@@ -207,18 +191,11 @@
 
   // ---------------------------------------------------------------------------
   // Multiplier Unit
-  mlu.io.req(0) := decode(0).io.mlu
-  mlu.io.req(1) := decode(1).io.mlu
-  mlu.io.req(2) := decode(2).io.mlu
-  mlu.io.req(3) := decode(3).io.mlu
-  mlu.io.rs1(0) := regfile.io.readData(0)
-  mlu.io.rs1(1) := regfile.io.readData(2)
-  mlu.io.rs1(2) := regfile.io.readData(4)
-  mlu.io.rs1(3) := regfile.io.readData(6)
-  mlu.io.rs2(0) := regfile.io.readData(1)
-  mlu.io.rs2(1) := regfile.io.readData(3)
-  mlu.io.rs2(2) := regfile.io.readData(5)
-  mlu.io.rs2(3) := regfile.io.readData(7)
+  for (i <- 0 until 4) {
+    mlu.io.req(i) := decode(i).io.mlu
+    mlu.io.rs1(i) := regfile.io.readData(2 * i)
+    mlu.io.rs2(i) := regfile.io.readData((2 * i) + 1)
+  }
 
   // ---------------------------------------------------------------------------
   // Divide Unit
@@ -277,13 +254,10 @@
   regfile.io.writeData(5).addr  := lsu.io.rd.addr
   regfile.io.writeData(5).data  := lsu.io.rd.data
 
-  regfile.io.writeMask(0).valid := false.B
-  regfile.io.writeMask(1).valid := regfile.io.writeMask(0).valid ||
-                                     bru(0).io.taken.valid
-  regfile.io.writeMask(2).valid := regfile.io.writeMask(1).valid ||
-                                     bru(1).io.taken.valid
-  regfile.io.writeMask(3).valid := regfile.io.writeMask(2).valid ||
-                                     bru(2).io.taken.valid
+  val writeMask = bru.map(_.io.taken.valid).scan(false.B)(_||_)
+  for (i <- 0 until 4) {
+    regfile.io.writeMask(i).valid := writeMask(i)
+  }
 
   // ---------------------------------------------------------------------------
   // Vector Extension
@@ -342,14 +316,10 @@
                  fetch.io.inst.lanes(1).valid && fetch.io.inst.lanes(1).ready && !branchTaken,
                  fetch.io.inst.lanes(0).valid && fetch.io.inst.lanes(0).ready && !branchTaken)
 
-  debugAddr(0) := fetch.io.inst.lanes(0).addr
-  debugAddr(1) := fetch.io.inst.lanes(1).addr
-  debugAddr(2) := fetch.io.inst.lanes(2).addr
-  debugAddr(3) := fetch.io.inst.lanes(3).addr
-  debugInst(0) := fetch.io.inst.lanes(0).inst
-  debugInst(1) := fetch.io.inst.lanes(1).inst
-  debugInst(2) := fetch.io.inst.lanes(2).inst
-  debugInst(3) := fetch.io.inst.lanes(3).inst
+  for (i <- 0 until 4) {
+    debugAddr(i) := fetch.io.inst.lanes(i).addr
+    debugInst(i) := fetch.io.inst.lanes(i).inst
+  }
 
   io.debug.en := debugEn & ~debugBrch