[rvvi] Handle vd conflicts for vector tracing

- Multiple vector instructions can be dispatched concurrently with the
  same destination register. Update our retirement logic to only consume
  one write to a given register, per cycle.

Change-Id: I1acf2ac2db19d3d048f295ac754d9ffc23cbd075
diff --git a/hdl/chisel/src/kelvin/RetirementBuffer.scala b/hdl/chisel/src/kelvin/RetirementBuffer.scala
index 851e7f7..f776b7b 100644
--- a/hdl/chisel/src/kelvin/RetirementBuffer.scala
+++ b/hdl/chisel/src/kelvin/RetirementBuffer.scala
@@ -114,12 +114,15 @@
     // The entry is active if it's validly enqueued and not already complete.
     val validBufferEntry = (i.U < instBuffer.io.nEnqueued) && (!resultBuffer(i).valid)
 
-    // If the entry is active and its data dependency is met (or it has no dependency)...
-    val updated = (validBufferEntry && (scalarWriteIdxMap.reduce(_|_) || floatWriteIdxMap.reduce(_|_) || vectorWriteIdxMap.reduce(_|_) || nonWritingInstr))
     // Find the index of the first write port that provides the needed data.
     val scalarWriteIdx = PriorityEncoder(scalarWriteIdxMap)
     val floatWriteIdx = PriorityEncoder(floatWriteIdxMap)
     val vectorWriteIdx = PriorityEncoder(vectorWriteIdxMap)
+
+    // If the entry is active and its data dependency is met (or it has no dependency)...
+    // Special care here for vector, as multiple instructions are allowed to be dispatched for the same destination register.
+    // This differs from how the scalar/float scoreboards restrict dispatch.
+    val updated = (validBufferEntry && (scalarWriteIdxMap.reduce(_|_) || floatWriteIdxMap.reduce(_|_) || (vectorWriteIdxMap.reduce(_|_) && vectorWriteIdx === i.U) || nonWritingInstr))
     // Select the actual data from the winning write port.
     val writeDataScalar = io.writeDataScalar(scalarWriteIdx).bits.data
     val writeDataFloat = io.writeDataFloat.map(x => x(floatWriteIdx).bits.data).getOrElse(0.U)