[dvsim] Do weighted scheduling of jobs

This patch adds rough weights to each target (build, run, cov etc). If
the slots are fully utilized and just one becomes available, the
weighted scheduling will prefer the earlier targets than the latter. If
multiple tests pass and more slots are available, then it distrubutes
the available slots to each remaining target by their assigned weights.

The weights are set such that builds have the highest preference,
followed by coverage tasks followed by runs. The reason for doing so is
as follows:

Lets say that all build are complete and only the simulation and
coverage jobs are queued up. If all tests of AES are complete, then its
coverage tasks will end up starving until ALL tests in ALL other blocks
are complete. With weighted allocation of dispatch slots, AES coverage
tasks will complete sooner, providing us an early preview of the
results, rather than having to wait 3+ hours for the rest of the
simulation to finish.

Weights are currently set based on relative importance. We can always
adjust them as needed.

Signed-off-by: Srikrishna Iyer <sriyer@google.com>
diff --git a/util/dvsim/Deploy.py b/util/dvsim/Deploy.py
index e055b81..e209801 100644
--- a/util/dvsim/Deploy.py
+++ b/util/dvsim/Deploy.py
@@ -26,6 +26,15 @@
     # be joined with '&&' instead of a space.
     cmds_list_vars = []
 
+    # Represents the weight with which a job of this target is scheduled. These
+    # initial weights set for each of the targets below are roughly inversely
+    # proportional to their average runtimes. These are subject to change in
+    # future. Lower the runtime, the higher chance the it gets scheduled. It is
+    # useful to customize this only in case of targets that may coexist at a
+    # time.
+    # TODO: Allow these to be set in the HJson.
+    weight = 1
+
     def __str__(self):
         return (pprint.pformat(self.__dict__)
                 if log.getLogger().isEnabledFor(VERBOSE) else self.full_name)
@@ -265,6 +274,7 @@
 
     target = "build"
     cmds_list_vars = ["pre_build_cmds", "post_build_cmds"]
+    weight = 5
 
     def __init__(self, build_mode, sim_cfg):
         self.build_mode_obj = build_mode
@@ -480,6 +490,7 @@
     """Abstraction for merging coverage databases."""
 
     target = "cov_merge"
+    weight = 10
 
     def __init__(self, run_items, sim_cfg):
         # Construct the cov_db_dirs right away from the run_items. This is a
@@ -536,6 +547,7 @@
     """Abstraction for coverage report generation. """
 
     target = "cov_report"
+    weight = 10
 
     def __init__(self, merge_job, sim_cfg):
         super().__init__(sim_cfg)
diff --git a/util/dvsim/Scheduler.py b/util/dvsim/Scheduler.py
index 17b400c..c686902 100644
--- a/util/dvsim/Scheduler.py
+++ b/util/dvsim/Scheduler.py
@@ -268,11 +268,9 @@
         '''Check for running items that have finished
 
         Returns True if something changed.
-
         '''
 
         changed = False
-
         for target in self._scheduled:
             to_pass = []
             to_fail = []
@@ -319,19 +317,50 @@
         def __sum(d):
             return sum([len(d[k]) for k in d])
 
-        num_slots = min(Scheduler.max_parallel - __sum(self._running),
-                        __sum(self._queued))
-        if num_slots <= 0:
+        slots = min(Scheduler.max_parallel - __sum(self._running),
+                    __sum(self._queued))
+        if slots <= 0:
             return
 
+        # Compute how many slots to allocate to each target based on their
+        # weights.
+        sum_weight = 0
+        slots_filled = 0
+        total_weight = sum(self._queued[t][0].weight for t in self._queued
+                           if self._queued[t])
+
         for target in self._scheduled:
-            num_slots_per_target = min(
-                num_slots, Scheduler.max_parallel - len(self._running[target]))
-            if num_slots_per_target <= 0:
+            if not self._queued[target]:
                 continue
 
+            # N slots are allocated to M targets each with W(m) weights with
+            # the formula:
+            #
+            # N(m) = N * W(m) / T, where,
+            #   T is the sum total of all weights.
+            #
+            # This is however, problematic due to fractions. Even after
+            # rounding off to the nearest digit, slots may not be fully
+            # utilized (one extra left). An alternate approach that avoids this
+            # problem is as follows:
+            #
+            # N(m) = (N * S(W(m)) / T) - F(m), where,
+            #   S(W(m)) is the running sum of weights upto current target m.
+            #   F(m) is the running total of slots filled.
+            #
+            # The computed slots per target is nearly identical to the first
+            # solution, except that it prioritizes the slot allocation to
+            # targets that are earlier in the list such that in the end, all
+            # slots are fully consumed.
+            sum_weight += self._queued[target][0].weight
+            target_slots = round(
+                (slots * sum_weight) / total_weight) - slots_filled
+            if target_slots <= 0:
+                continue
+            slots_filled += target_slots
+
             to_dispatch = []
-            while self._queued[target] and num_slots_per_target > 0:
+            while self._queued[target] and target_slots > 0:
                 next_item = self._queued[target].pop(0)
                 if not self._ok_to_run(next_item):
                     self._cancel_item(next_item, cancel_successors=False)
@@ -339,8 +368,7 @@
                     continue
 
                 to_dispatch.append(next_item)
-                num_slots_per_target -= 1
-                num_slots -= 1
+                target_slots -= 1
 
             if not to_dispatch:
                 continue