[dvsim] Create jobs with dependencies instead of sub-jobs This looks a bit more like normal schedulers, where you have a big load of jobs with dependencies between them. One reason to do this sort of thing is that you simplify the process of actually running things (nothing new to kick off). The main visible change here, however, is that if a build fails then we don't run every dependent job for a second before giving up on it. (We could have retro-fitted that into the existing design, but I'm trying to move things towards a more "standard" shape as I go). With the new code, when we're about to dispatch a job, we check to see whether its dependencies have all run successfully. If not, we kill it. Note that this logic is made easier because dvsim runs in phases (called "targets" in the code). We ensure that dependencies are always in an earlier phase, so know that they will have run to completion or failed before any dependent job is started. In code, this is the assertion that dep.status is P, F or K in Scheduler.dispatch(). The only other change in this patch is to printing. Because we now have jobs for future phases/targets, we don't want to print both a "[build]: ..." and a "[run]: ..." line each time. To avoid that, we skip targets where everything is still queued if we've printed something for a previous target. Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>

commit: 381770d22de727628629322a66d83150c7f762c2 [log] [tgz]
author: Rupert Swarbrick <rswarbrick@lowrisc.org> Mon Oct 12 08:50:08 2020 +0100
committer: Srikrishna Iyer <46467186+sriyerg@users.noreply.github.com> Mon Jan 25 16:42:49 2021 -0800
tree: 9909ddaa07364d73474bfc76805f65f4884e135a
parent: ac5c79b544762d9a793bb4928b8e086fff3cc3ea [diff]
diff --git a/util/dvsim/Deploy.py b/util/dvsim/Deploy.py
index 1bf07e1..a11e1a2 100644
--- a/util/dvsim/Deploy.py
+++ b/util/dvsim/Deploy.py

@@ -32,17 +32,10 @@
     # be joined with '&&' instead of a space.
     cmds_list_vars = []
 
-    def __self_str__(self):
-        if log.getLogger().isEnabledFor(VERBOSE):
-            return pprint.pformat(self.__dict__)
-        else:
-            ret = self.cmd
-            if self.sub != []:
-                ret += "\nSub:\n" + str(self.sub)
-            return ret
-
     def __str__(self):
-        return self.__self_str__()
+        return (pprint.pformat(self.__dict__)
+                if log.getLogger().isEnabledFor(VERBOSE)
+                else self.cmd)
 
     def __init__(self, sim_cfg):
         '''Initialize common class members.'''
@@ -67,8 +60,8 @@
         # List of vars required to be exported to sub-shell
         self.exports = None
 
-        # Deploy sub commands
-        self.sub = []
+        # A list of jobs on which this job depends
+        self.dependencies = []
 
         # Process
         self.process = None
@@ -382,12 +375,6 @@
                     self.status = 'F'
                     break
 
-    # Recursively set sub-item's status if parent item fails
-    def set_sub_status(self, status):
-        for sub_item in self.sub:
-            sub_item.status = status
-            sub_item.set_sub_status(status)
-
     def link_odir(self):
         if self.status == '.':
             log.error("Method unexpectedly called!")
@@ -429,10 +416,6 @@
             if self.log_fd:
                 self.log_fd.close()
             self.status = "K"
-        # recurisvely kill sub target
-        elif len(self.sub):
-            for item in self.sub:
-                item.kill()
 
     def kill_remote_job(self):
         '''
@@ -585,7 +568,7 @@
 
     cmds_list_vars = ["pre_run_cmds", "post_run_cmds"]
 
-    def __init__(self, index, test, sim_cfg):
+    def __init__(self, index, test, build_job, sim_cfg):
         # Initialize common vars.
         super().__init__(sim_cfg)
 
@@ -615,6 +598,9 @@
             "run_fail_patterns": False
         })
 
+        if build_job is not None:
+            self.dependencies.append(build_job)
+
         self.index = index
         self.seed = RunTest.get_seed()
 
@@ -810,10 +796,12 @@
     # Register all builds with the class
     items = []
 
-    def __init__(self, sim_cfg):
+    def __init__(self, merge_job, sim_cfg):
         # Initialize common vars.
         super().__init__(sim_cfg)
 
+        self.dependencies.append(merge_job)
+
         self.target = "cov_report"
         self.pass_patterns = []
         self.fail_patterns = []

diff --git a/util/dvsim/Scheduler.py b/util/dvsim/Scheduler.py
index ccbe889..af586c1 100644
--- a/util/dvsim/Scheduler.py
+++ b/util/dvsim/Scheduler.py

@@ -105,18 +105,12 @@
                 continue
 
             if item.status != "P":
-                # Kill its sub items if item did not pass.
-                item.set_sub_status("K")
                 log.error("[%s]: [%s]: [status] [%s: %s]",
                           hms, item.target, item.identifier, item.status)
             else:
                 log.log(VERBOSE, "[%s]: [%s]: [status] [%s: %s]",
                         hms, item.target, item.identifier, item.status)
 
-            # Queue items' sub-items if it is done.
-            for sub_item in item.sub:
-                self.add_item(sub_item)
-
         return status_changed
 
     def dispatch(self):
@@ -127,12 +121,48 @@
         if not num_slots:
             return
 
-        items = self.queued_items[0:num_slots]
-        self.queued_items = self.queued_items[num_slots:]
-        self.dispatched_items.extend(items)
+        # We only dispatch things for one target at once.
+        cur_tgt = None
+        for item in self.dispatched_items:
+            if item.status == 'D':
+                cur_tgt = item.target
+                break
+
+        to_dispatch = []
+        while len(to_dispatch) < num_slots and self.queued_items:
+            next_item = self.queued_items[0]
+
+            # Keep track of the current target to make sure we dispatch things
+            # in phases.
+            if cur_tgt is None:
+                cur_tgt = next_item.target
+            if next_item.target != cur_tgt:
+                break
+
+            self.queued_items = self.queued_items[1:]
+
+            # Does next_item have any dependencies? Since we dispatch jobs by
+            # "target", we can assume that each of those dependencies appears
+            # earlier in the list than we do.
+            has_failed_dep = False
+            for dep in next_item.dependencies:
+                assert dep.status in ['P', 'F', 'K']
+                if dep.status in ['F', 'K']:
+                    has_failed_dep = True
+                    break
+
+            # If has_failed_dep then at least one of the dependencies has been
+            # cancelled or has run and failed. Give up on this item too.
+            if has_failed_dep:
+                next_item.status = 'K'
+                continue
+
+            to_dispatch.append(next_item)
+
+        self.dispatched_items.extend(to_dispatch)
 
         tgt_names = OrderedDict()
-        for item in items:
+        for item in to_dispatch:
             if item.status is None:
                 tgt_names.setdefault(item.target, []).append(item.identifier)
                 item.dispatch_cmd()
@@ -155,20 +185,26 @@
         hms = self.timer.hms()
 
         all_done = True
+        printed_something = False
         for target, tgt_status in self.status.items():
             was_done = tgt_status.done
             tgt_status.check_if_done()
             is_done = tgt_status.done
+            all_queued = tgt_status.counters['Q'] == tgt_status.counters['T']
 
             all_done &= is_done
 
-            if print_status and not (was_done and is_done):
+            should_print = (print_status and
+                            not (was_done and is_done) and
+                            not (printed_something and all_queued))
+            if should_print:
                 stats = tgt_status.counters
                 width = "0{}d".format(len(str(stats["T"])))
                 msg = "["
                 for s in stats.keys():
                     msg += s + ": {:{}}, ".format(stats[s], width)
                 msg = msg[:-2] + "]"
+                printed_something = True
                 log.info("[%s]: [%s]: %s", hms, target, msg)
         return all_done
 

diff --git a/util/dvsim/SimCfg.py b/util/dvsim/SimCfg.py
index 5e7bece..07b0ddc 100644
--- a/util/dvsim/SimCfg.py
+++ b/util/dvsim/SimCfg.py

@@ -69,10 +69,6 @@
         if item.target == "run":
             self._add_run(item)
 
-        # Recurse to any sub-items
-        for child in item.sub:
-            self._add_item(child)
-
     def _add_run(self, item):
         '''Add an entry to table for item'''
         row = self._name_to_row.get(item.name)
@@ -485,27 +481,27 @@
         tests A, B with reseed values of 5 and 2, respectively, then the list
         will be ABABAAA).
 
-        build_map is a dictionary from build name to a CompileSim object. Each
-        test is added to the CompileSim item that it depends on (signifying
-        that the test should be built once the build on which it depends is
-        done).
+        build_map is either None or a dictionary from build name to a
+        CompileSim object. If None, this means that we're in "run only" mode,
+        so there are no builds involved at all. Otherwise, the build_mode of
+        each appears in the map to signify the test's dependency on its
+        corresponding CompileSim item (test cannot run until it has been
+        compiled).
+
         '''
         tagged = []
         for test in self.run_list:
+            build_job = (build_map[test.build_mode]
+                         if build_map is not None else None)
             for idx in range(test.reseed):
-                tagged.append((idx, test, RunTest(idx, test, self)))
+                tagged.append((idx, RunTest(idx, test, build_job, self)))
 
         # Stably sort the tagged list by the 1st coordinate
         tagged.sort(key=lambda x: x[0])
 
-        # Now iterate over it again, adding tests to build_map (in the
-        # interleaved order) and collecting up the RunTest objects.
-        runs = []
-        for _, test, run in tagged:
-            build_map[test.build_mode].sub.append(run)
-            runs.append(run)
-
-        return runs
+        # Return the sorted list of RunTest objects, discarding the indices by
+        # which we sorted it.
+        return [run for _, run in tagged]
 
     def _create_deploy_objects(self):
         '''Create deploy objects from the build and run lists.
@@ -514,44 +510,48 @@
         # Create the build and run list first
         self._create_build_and_run_list()
 
-        self.builds = []
-        build_map = {}
-        for build_mode_obj in self.build_list:
-            new_build = CompileSim(build_mode_obj, self)
+        if self.run_only:
+            self.builds = []
+            self.runs = self._expand_run_list(None)
+        else:
+            self.builds = []
+            build_map = {}
+            for build_mode_obj in self.build_list:
+                new_build = CompileSim(build_mode_obj, self)
 
-            # It is possible for tests to supply different build modes, but
-            # those builds may differ only under specific circumstances, such
-            # as coverage being enabled. If coverage is not enabled, then they
-            # may be completely identical. In that case, we can save compute
-            # resources by removing the extra duplicated builds. We discard the
-            # new_build if it is equivalent to an existing one.
-            is_unique = True
-            for build in self.builds:
-                if build.is_equivalent_job(new_build):
-                    new_build = build
-                    is_unique = False
-                    break
+                # It is possible for tests to supply different build modes, but
+                # those builds may differ only under specific circumstances,
+                # such as coverage being enabled. If coverage is not enabled,
+                # then they may be completely identical. In that case, we can
+                # save compute resources by removing the extra duplicated
+                # builds. We discard the new_build if it is equivalent to an
+                # existing one.
+                is_unique = True
+                for build in self.builds:
+                    if build.is_equivalent_job(new_build):
+                        new_build = build
+                        is_unique = False
+                        break
 
-            if is_unique:
-                self.builds.append(new_build)
-            build_map[build_mode_obj] = new_build
+                if is_unique:
+                    self.builds.append(new_build)
+                build_map[build_mode_obj] = new_build
 
-        # Update all tests to use the updated (uniquified) build modes.
-        for test in self.run_list:
-            if test.build_mode.name != build_map[test.build_mode].name:
-                test.build_mode = Modes.find_mode(
-                    build_map[test.build_mode].name, self.build_modes)
+            # Update all tests to use the updated (uniquified) build modes.
+            for test in self.run_list:
+                if test.build_mode.name != build_map[test.build_mode].name:
+                    test.build_mode = Modes.find_mode(
+                        build_map[test.build_mode].name, self.build_modes)
 
-        self.runs = ([]
-                     if self.build_only else self._expand_run_list(build_map))
+            self.runs = ([] if self.build_only
+                         else self._expand_run_list(build_map))
 
-        self.deploy = self.runs if self.run_only else self.builds
+        self.deploy = self.builds + self.runs
 
         # Create cov_merge and cov_report objects
         if self.cov:
             self.cov_merge_deploy = CovMerge(self)
-            self.cov_report_deploy = CovReport(self)
-            self.cov_merge_deploy.sub.append(self.cov_report_deploy)
+            self.cov_report_deploy = CovReport(self.cov_merge_deploy, self)
 
         # Create initial set of directories before kicking off the regression.
         self._create_dirs()
@@ -566,6 +566,7 @@
             for item in self.cfgs:
                 if item.cov:
                     self.cov_deploys.append(item.cov_merge_deploy)
+                    self.cov_deploys.append(item.cov_report_deploy)
 
     # deploy additional commands as needed. We do this separated for coverage
     # since that needs to happen at the end.
commit	381770d22de727628629322a66d83150c7f762c2	[log] [tgz]
author	Rupert Swarbrick <rswarbrick@lowrisc.org>	Mon Oct 12 08:50:08 2020 +0100
committer	Srikrishna Iyer <46467186+sriyerg@users.noreply.github.com>	Mon Jan 25 16:42:49 2021 -0800
tree	9909ddaa07364d73474bfc76805f65f4884e135a
parent	ac5c79b544762d9a793bb4928b8e086fff3cc3ea [diff]