[dvsim] Remove "status" from Deploy items Before this patch, the scheduler code in dvsim had two notions of the status of a job: - Deploy.status (stored in each job) - TargetStatus.counters (a count of passes/fails) This patch removes the first of the two, to more cleanly split "run some stuff and collect the results" (done by the scheduler) from "how do I run this job?" (done by the Deploy object). There are some extra changes we have to make to get this to work. Firstly, we move the SIGINT handling code into Runner.py: this belongs around the code that actually runs something, not at the top of dvsim.py. That means that the "kill some stuff" logic can now be triggered in the main thread via a threading.Event, which avoids lots of code running in the signal handler and is probably a bit safer. Secondly, we have to explicitly return the results of running jobs from Scheduler.run() and thread them through FlowCfg.gen_results(). As it turns out, most of the subclasses of FlowCfg figure out success/failure by loading generated hjson files, so this actually only needs updating in SimCfg.py. Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>

commit: a2892a50ed57bee2ff96c6aa5cbaec2d11bacfae [log] [tgz]
author: Rupert Swarbrick <rswarbrick@lowrisc.org> Mon Oct 12 08:50:08 2020 +0100
committer: Srikrishna Iyer <46467186+sriyerg@users.noreply.github.com> Mon Jan 25 16:42:49 2021 -0800
tree: c8b635c6f545632ff302441876234487eb39e3ac
parent: 381770d22de727628629322a66d83150c7f762c2 [diff]
diff --git a/util/dvsim/Deploy.py b/util/dvsim/Deploy.py
index a11e1a2..a513786 100644
--- a/util/dvsim/Deploy.py
+++ b/util/dvsim/Deploy.py

@@ -16,6 +16,11 @@
 from utils import VERBOSE, find_and_substitute_wildcards, run_cmd
 
 
+class DeployError(Exception):
+    def __init__(self, msg):
+        self.msg = msg
+
+
 class Deploy():
     """
     Abstraction for deploying builds and runs.
@@ -66,7 +71,6 @@
         # Process
         self.process = None
         self.log_fd = None
-        self.status = None
 
         # These are mandatory class attributes that need to be extracted and
         # set from the sim_cfg object. These are explicitly used to construct
@@ -268,13 +272,11 @@
                                             stderr=f,
                                             env=exports)
             self.log_fd = f
-            self.status = "D"
             Deploy.dispatch_counter += 1
         except IOError:
-            log.error('IO Error: See %s', self.log)
             if self.log_fd:
                 self.log_fd.close()
-            self.status = "K"
+            raise DeployError('IO Error: See {}'.format(self.log))
 
     def odir_limiter(self, odir, max_odirs=-1):
         '''Function to backup previously run output directory to maintain a
@@ -329,93 +331,120 @@
             log.error("Failed to delete old run directories!")
         return dirs
 
-    def set_status(self):
-        self.status = 'P'
-        if self.dry_run is False:
-            seen_fail_pattern = False
-            for fail_pattern in self.fail_patterns:
-                # Return error message with the following 4 lines.
-                grep_cmd = "grep -m 1 -A 4 -E \'" + fail_pattern + "\' " + self.log
-                (status, rslt) = subprocess.getstatusoutput(grep_cmd)
-                if rslt:
-                    msg = "```\n{}\n```\n".format(rslt)
-                    self.fail_msg += msg
-                    log.log(VERBOSE, msg)
-                    self.status = 'F'
-                    seen_fail_pattern = True
-                    break
+    def _test_passed(self):
+        '''Return True if the job passed, False otherwise
 
-            # If fail patterns were not encountered, but the job returned with non-zero exit code
-            # for whatever reason, then show the last 10 lines of the log as the failure message,
-            # which might help with the debug.
-            if self.process.returncode != 0 and not seen_fail_pattern:
-                msg = "Last 10 lines of the log:<br>\n"
+        This is called by poll() just after the job finishes.
+
+        '''
+        if self.dry_run:
+            return True
+
+        seen_fail_pattern = False
+        for fail_pattern in self.fail_patterns:
+            # Return error message with the following 4 lines.
+            grep_cmd = "grep -m 1 -A 4 -E \'" + fail_pattern + "\' " + self.log
+            (status, rslt) = subprocess.getstatusoutput(grep_cmd)
+            if rslt:
+                msg = "```\n{}\n```\n".format(rslt)
                 self.fail_msg += msg
                 log.log(VERBOSE, msg)
-                get_fail_msg_cmd = "tail -n 10 " + self.log
-                msg = run_cmd(get_fail_msg_cmd)
-                msg = "```\n{}\n```\n".format(msg)
+                seen_fail_pattern = True
+                break
+
+        if seen_fail_pattern:
+            return False
+
+        # If no fail patterns were seen, but the job returned with non-zero
+        # exit code for whatever reason, then show the last 10 lines of the log
+        # as the failure message, which might help with the debug.
+        if self.process.returncode != 0:
+            msg = "Last 10 lines of the log:<br>\n"
+            self.fail_msg += msg
+            log.log(VERBOSE, msg)
+            get_fail_msg_cmd = "tail -n 10 " + self.log
+            msg = run_cmd(get_fail_msg_cmd)
+            msg = "```\n{}\n```\n".format(msg)
+            self.fail_msg += msg
+            log.log(VERBOSE, msg)
+            return False
+
+        # If we get here, we've not seen anything explicitly wrong, but we
+        # might have "pass patterns": patterns that must occur in the log for
+        # the run to be considered successful.
+        for pass_pattern in self.pass_patterns:
+            grep_cmd = "grep -c -m 1 -E \'" + pass_pattern + "\' " + self.log
+            (status, rslt) = subprocess.getstatusoutput(grep_cmd)
+            if rslt == "0":
+                msg = "Pass pattern {!r} not found.<br>\n".format(pass_pattern)
                 self.fail_msg += msg
                 log.log(VERBOSE, msg)
-                self.status = "F"
+                return False
 
-            # Return if status is fail - no need to look for pass patterns.
-            if self.status == 'F':
-                return
+        return True
 
-            # If fail patterns were not found, ensure pass patterns indeed were.
-            for pass_pattern in self.pass_patterns:
-                grep_cmd = "grep -c -m 1 -E \'" + pass_pattern + "\' " + self.log
-                (status, rslt) = subprocess.getstatusoutput(grep_cmd)
-                if rslt == "0":
-                    msg = "Pass pattern \"{}\" not found.<br>\n".format(
-                        pass_pattern)
-                    self.fail_msg += msg
-                    log.log(VERBOSE, msg)
-                    self.status = 'F'
-                    break
+    def _link_odir(self, status):
+        old_link = self.sim_cfg.links['D'] + "/" + self.odir_ln
+        new_link = self.sim_cfg.links[status] + "/" + self.odir_ln
+        cmd = "ln -s " + self.odir + " " + new_link + "; "
+        cmd += "rm " + old_link
+        if os.system(cmd):
+            log.error("Cmd \"%s\" could not be run", cmd)
 
-    def link_odir(self):
-        if self.status == '.':
-            log.error("Method unexpectedly called!")
-        else:
-            old_link = self.sim_cfg.links['D'] + "/" + self.odir_ln
-            new_link = self.sim_cfg.links[self.status] + "/" + self.odir_ln
-            cmd = "ln -s " + self.odir + " " + new_link + "; "
-            cmd += "rm " + old_link
-            if os.system(cmd):
-                log.error("Cmd \"%s\" could not be run", cmd)
+    def _on_finish(self, status):
+        '''Called when the process finishes or is killed'''
+        assert status in ['P', 'F', 'K']
+        if status in ['P', 'F']:
+            self._link_odir(status)
 
-    def get_status(self):
-        if self.status != "D":
-            return
-        if self.process.poll() is not None:
-            self.log_fd.close()
-            self.set_status()
+    def poll(self):
+        '''Check status of the running process
 
-            log.debug("Item %s has completed execution: %s", self.name,
-                      self.status)
-            Deploy.dispatch_counter -= 1
-            self.link_odir()
-            del self.process
+        This returns 'D', 'P' or 'F'. If 'D', the job is still running. If 'P',
+        the job finished successfully. If 'F', the job finished with an error.
+
+        This function must only be called after running self.dispatch_cmd() and
+        must not be called again once it has returned 'P' or 'F'.
+
+        '''
+        assert self.process is not None
+        if self.process.poll() is None:
+            return 'D'
+        self.log_fd.close()
+
+        status = 'P' if self._test_passed() else 'F'
+
+        log.debug("Item %s has completed execution: %s", self.name, status)
+        Deploy.dispatch_counter -= 1
+        self._on_finish(status)
+
+        del self.process
+        self.process = None
+
+        return status
 
     def kill(self):
-        '''Kill running processes.
+        '''Kill the running process.
+
+        This must be called between dispatching and reaping the process (the
+        same window as poll()).
+
         '''
-        if self.status == "D" and self.process.poll() is None:
-            self.kill_remote_job()
+        assert self.process is not None
+        self.kill_remote_job()
 
-            # Try to kill the running process. Send SIGTERM first, wait a bit,
-            # and then send SIGKILL if it didn't work.
-            self.process.terminate()
-            try:
-                self.process.wait(timeout=2)
-            except subprocess.TimeoutExpired:
-                self.process.kill()
+        # Try to kill the running process. Send SIGTERM first, wait a bit,
+        # and then send SIGKILL if it didn't work.
+        self.process.terminate()
+        try:
+            self.process.wait(timeout=2)
+        except subprocess.TimeoutExpired:
+            self.process.kill()
 
-            if self.log_fd:
-                self.log_fd.close()
-            self.status = "K"
+        if self.log_fd:
+            self.log_fd.close()
+        self.process = None
+        self._on_finish('K')
 
     def kill_remote_job(self):
         '''
@@ -630,11 +659,9 @@
         # Set identifier.
         self.identifier = self.sim_cfg.name + ":" + self.run_dir_name
 
-    def get_status(self):
-        '''Override base class get_status implementation for additional post-status
-        actions.'''
-        super().get_status()
-        if self.status not in ["D", "P"]:
+    def _on_finish(self, status):
+        super()._on_finish(status)
+        if status != 'P':
             # Delete the coverage data if available.
             if os.path.exists(self.cov_db_test_dir):
                 log.log(VERBOSE, "Deleting coverage data of failing test:\n%s",
@@ -829,28 +856,31 @@
 
         CovReport.items.append(self)
 
-    def get_status(self):
-        super().get_status()
-        # Once passed, extract the cov results summary from the dashboard.
-        if self.status == "P":
-            results, self.cov_total, ex_msg = get_cov_summary_table(
-                self.cov_report_txt, self.sim_cfg.tool)
+    def _test_passed(self):
+        # Add an extra check to Deploy._test_passed where we extract the
+        # coverage results summary for the dashboard (and fail the job if
+        # something goes wrong).
+        if not super()._test_passed():
+            return False
 
-            if not ex_msg:
-                # Succeeded in obtaining the coverage data.
-                colalign = (("center", ) * len(results[0]))
-                self.cov_results = tabulate(results,
-                                            headers="firstrow",
-                                            tablefmt="pipe",
-                                            colalign=colalign)
-            else:
-                self.fail_msg += ex_msg
-                log.error(ex_msg)
-                self.status = "F"
+        results, self.cov_total, ex_msg = get_cov_summary_table(
+            self.cov_report_txt, self.sim_cfg.tool)
 
-        if self.status == "P":
-            # Delete the cov report - not needed.
-            os.system("rm -rf " + self.log)
+        if ex_msg:
+            self.fail_msg += ex_msg
+            log.error(ex_msg)
+            return False
+
+        # Succeeded in obtaining the coverage data.
+        colalign = (("center", ) * len(results[0]))
+        self.cov_results = tabulate(results,
+                                    headers="firstrow",
+                                    tablefmt="pipe",
+                                    colalign=colalign)
+
+        # Delete the cov report - not needed.
+        os.system("rm -rf " + self.log)
+        return True
 
 
 class CovAnalyze(Deploy):

diff --git a/util/dvsim/FlowCfg.py b/util/dvsim/FlowCfg.py
index d02d3f4..d954465 100644
--- a/util/dvsim/FlowCfg.py
+++ b/util/dvsim/FlowCfg.py

@@ -380,24 +380,34 @@
             self._create_deploy_objects()
 
     def deploy_objects(self):
-        '''Public facing API for deploying all available objects.'''
-        Scheduler.run(self.deploy)
+        '''Public facing API for deploying all available objects.
 
-    def _gen_results(self, fmt="md"):
+        Runs each job and returns a map from item to status.
+
+        '''
+        return Scheduler.run(self.deploy)
+
+    def _gen_results(self, results):
         '''
         The function is called after the regression has completed. It collates the
         status of all run targets and generates a dict. It parses the testplan and
         maps the generated result to the testplan entries to generate a final table
         (list). It also prints the full list of failures for debug / triage. The
         final result is in markdown format.
+
+        results should be a dictionary mapping deployed item to result.
+
         '''
         return
 
-    def gen_results(self):
+    def gen_results(self, results):
         '''Public facing API for _gen_results().
+
+        results should be a dictionary mapping deployed item to result.
+
         '''
         for item in self.cfgs:
-            result = item._gen_results()
+            result = item._gen_results(results)
             log.info("[results]: [%s]:\n%s\n", item.name, result)
             log.info("[scratch_path]: [%s] [%s]", item.name, item.scratch_path)
             self.errors_seen |= item.errors_seen

diff --git a/util/dvsim/FpvCfg.py b/util/dvsim/FpvCfg.py
index 1add68a..c191abf 100644
--- a/util/dvsim/FpvCfg.py
+++ b/util/dvsim/FpvCfg.py

@@ -183,7 +183,7 @@
 
         return self.results_summary_md
 
-    def _gen_results(self):
+    def _gen_results(self, results):
         # This function is called after the regression and looks for
         # results.hjson file with aggregated results from the FPV run.
         # The hjson file is required to follow this format:

diff --git a/util/dvsim/LintCfg.py b/util/dvsim/LintCfg.py
index 7854bb4..f1f460c 100644
--- a/util/dvsim/LintCfg.py
+++ b/util/dvsim/LintCfg.py

@@ -87,7 +87,7 @@
         # Return only the tables
         return self.results_summary_md
 
-    def _gen_results(self):
+    def _gen_results(self, results):
         # '''
         # The function is called after the regression has completed. It looks
         # for a regr_results.hjson file with aggregated results from the lint run.

diff --git a/util/dvsim/Scheduler.py b/util/dvsim/Scheduler.py
index af586c1..51c893c 100644
--- a/util/dvsim/Scheduler.py
+++ b/util/dvsim/Scheduler.py

@@ -5,50 +5,137 @@
 
 from collections import OrderedDict
 import logging as log
-import time
+from signal import SIGINT, signal
+import threading
 
 from utils import VERBOSE
-from Deploy import Deploy
+from Deploy import Deploy, DeployError
 from Timer import Timer
 
 
-class TargetStatus:
-    '''An object to track the status of a run for a given target'''
+class TargetScheduler:
+    '''A scheduler for the jobs of a given target'''
     def __init__(self):
-        self.counters = OrderedDict()
-        self.counters['Q'] = 0
-        self.counters['D'] = 0
-        self.counters['P'] = 0
-        self.counters['F'] = 0
-        self.counters['K'] = 0
-        self.counters['T'] = 0
+        # Sets of items, split up by their current state. The sets are disjoint
+        # and their union equals the keys of self.item_to_status.
+        self._queued = set()
+        self._running = set()
+        self._passed = set()
+        self._failed = set()
+        self._killed = set()
 
-        self.done = True
+        # A map from the Deploy objects tracked by this class to their current
+        # status. This status is 'Q', 'D', 'P', 'F' or 'K', corresponding to
+        # membership in the dicts above.
+        self.item_to_status = {}
 
-        self.by_item = OrderedDict()
+        # A flag set by check_if_done if all jobs are done.
+        self._done = True
+
+    def check_status(self):
+        '''Return (was_done, is_done, has_started)'''
+        was_done = self._done
+        self._done = not (self._queued or self._running)
+        return (was_done,
+                self._done,
+                (self._running or self._passed or
+                 self._failed or self._killed))
+
+    def print_counters(self, tgt_name, hms):
+        total_cnt = len(self.item_to_status)
+        width = len(str(total_cnt))
+
+        field_fmt = '{{:0{}d}}'.format(width)
+        msg_fmt = ('[Q: {0}, D: {0}, P: {0}, F: {0}, K: {0}, T: {0}]'
+                   .format(field_fmt))
+        msg = msg_fmt.format(len(self._queued),
+                             len(self._running),
+                             len(self._passed),
+                             len(self._failed),
+                             len(self._killed),
+                             total_cnt)
+        log.info("[%s]: [%s]: %s", hms, tgt_name, msg)
 
     def add_item(self, item):
-        self.by_item[item] = 'Q'
-        self.counters['T'] += 1
-        self.counters['Q'] += 1
-        self.done = False
+        assert item not in self.item_to_status
+        self.item_to_status[item] = 'Q'
+        self._queued.add(item)
+        self._done = False
 
-    def update_item(self, item):
-        '''Update the tracked status of the item. Return true on change.'''
-        old_status = self.by_item[item]
-        if item.status == old_status:
-            return False
+    def _kill_item(self, item):
+        '''Kill a running item'''
+        self._running.remove(item)
+        item.kill()
+        self._killed.add(item)
+        self.item_to_status[item] = 'K'
 
-        self.by_item[item] = item.status
+    def dispatch(self, items):
+        '''Start (dispatch) each item in the list'''
+        for item in items:
+            assert item in self._queued
+            self._queued.remove(item)
+            self._running.add(item)
+            self.item_to_status[item] = 'D'
+            try:
+                item.dispatch_cmd()
+            except DeployError as err:
+                log.error('{}'.format(err))
+                self._kill_item(item)
 
-        self.counters[old_status] -= 1
-        self.counters[item.status] += 1
-        return True
+    def kill(self):
+        '''Kill any running items and cancel any that are waiting'''
 
-    def check_if_done(self):
-        '''Update done flag to match counters. Returns done flag.'''
-        self.done = (self.counters['Q'] == 0) and (self.counters['D'] == 0)
-        return self.done
+        # Cancel any waiting items. We take a copy of self._queued to avoid
+        # iterating over the set as we modify it.
+        for item in [item for item in self._queued]:
+            self.cancel(item)
+
+        # Kill any running items. Again, take a copy of the set to avoid
+        # modifying it while iterating over it.
+        for item in [item for item in self._running]:
+            self._kill_item(item)
+
+    def poll(self, hms):
+        '''Check for running items that have finished
+
+        Returns True if something changed.
+
+        '''
+        to_pass = []
+        to_fail = []
+
+        for item in self._running:
+            status = item.poll()
+            assert status in ['D', 'P', 'F']
+            if status == 'D':
+                # Still running
+                continue
+            elif status == 'P':
+                log.log(VERBOSE, "[%s]: [%s]: [status] [%s: P]",
+                        hms, item.target, item.identifier)
+                to_pass.append(item)
+            else:
+                log.error("[%s]: [%s]: [status] [%s: F]",
+                          hms, item.target, item.identifier)
+                to_fail.append(item)
+
+        for item in to_pass:
+            self._running.remove(item)
+            self._passed.add(item)
+            self.item_to_status[item] = 'P'
+        for item in to_fail:
+            self._running.remove(item)
+            self._failed.add(item)
+            self.item_to_status[item] = 'F'
+
+        return to_pass or to_fail
+
+    def cancel(self, item):
+        '''Cancel an item that is currently queued'''
+        assert item in self._queued
+        self._queued.remove(item)
+        self._killed.add(item)
+        self.item_to_status[item] = 'K'
 
 
 class Scheduler:
@@ -67,8 +154,8 @@
         self.dispatched_items = []
 
         # An ordered dictionary keyed by target ('build', 'run' or similar).
-        # The value for each target is a TargetStatus object.
-        self.status = OrderedDict()
+        # The value for each target is a TargetScheduler object.
+        self.schedulers = OrderedDict()
 
         for item in items:
             self.add_item(item)
@@ -77,39 +164,27 @@
         '''Add a queued item'''
         self.queued_items.append(item)
 
-        # Like setdefault, but doesn't construct a TargetStatus object
+        # Like setdefault, but doesn't construct a TargetScheduler object
         # unnecessarily.
-        tgt_status = self.status.get(item.target)
-        if tgt_status is None:
-            tgt_status = TargetStatus()
-            self.status[item.target] = tgt_status
+        tgt_scheduler = self.schedulers.get(item.target)
+        if tgt_scheduler is None:
+            tgt_scheduler = TargetScheduler()
+            self.schedulers[item.target] = tgt_scheduler
 
-        tgt_status.add_item(item)
+        tgt_scheduler.add_item(item)
 
-    def update_status(self):
-        '''Update status of dispatched items. Returns true on a change'''
+    def kill(self):
+        '''Kill any running items and cancel any that are waiting'''
+        for scheduler in self.schedulers.values():
+            scheduler.kill()
+
+    def poll(self):
+        '''Update status of running items. Returns true on a change'''
         status_changed = False
         hms = self.timer.hms()
-
-        # Get status of dispatched items
-        for item in self.dispatched_items:
-            if item.status == 'D':
-                item.get_status()
-
-            tgt_status = self.status[item.target]
-            if not tgt_status.update_item(item):
-                continue
-
-            status_changed = True
-            if item.status == "D":
-                continue
-
-            if item.status != "P":
-                log.error("[%s]: [%s]: [status] [%s: %s]",
-                          hms, item.target, item.identifier, item.status)
-            else:
-                log.log(VERBOSE, "[%s]: [%s]: [status] [%s: %s]",
-                        hms, item.target, item.identifier, item.status)
+        for scheduler in self.schedulers.values():
+            if scheduler.poll(hms):
+                status_changed = True
 
         return status_changed
 
@@ -124,11 +199,12 @@
         # We only dispatch things for one target at once.
         cur_tgt = None
         for item in self.dispatched_items:
-            if item.status == 'D':
+            if item.process is not None:
                 cur_tgt = item.target
                 break
 
         to_dispatch = []
+
         while len(to_dispatch) < num_slots and self.queued_items:
             next_item = self.queued_items[0]
 
@@ -146,34 +222,34 @@
             # earlier in the list than we do.
             has_failed_dep = False
             for dep in next_item.dependencies:
-                assert dep.status in ['P', 'F', 'K']
-                if dep.status in ['F', 'K']:
+                dep_status = self.schedulers[dep.target].item_to_status[dep]
+                assert dep_status in ['P', 'F', 'K']
+                if dep_status in ['F', 'K']:
                     has_failed_dep = True
                     break
 
             # If has_failed_dep then at least one of the dependencies has been
             # cancelled or has run and failed. Give up on this item too.
             if has_failed_dep:
-                next_item.status = 'K'
+                self.schedulers[cur_tgt].cancel(next_item)
                 continue
 
             to_dispatch.append(next_item)
 
+        if not to_dispatch:
+            return
+
+        assert cur_tgt is not None
+
         self.dispatched_items.extend(to_dispatch)
+        self.schedulers[cur_tgt].dispatch(to_dispatch)
 
-        tgt_names = OrderedDict()
-        for item in to_dispatch:
-            if item.status is None:
-                tgt_names.setdefault(item.target, []).append(item.identifier)
-                item.dispatch_cmd()
-
-        hms = self.timer.hms()
-        for target, names in tgt_names.items():
-            log.log(VERBOSE, "[%s]: [%s]: [dispatch]:\n%s",
-                    hms, target, ", ".join(names))
+        log.log(VERBOSE, "[%s]: [%s]: [dispatch]:\n%s",
+                self.timer.hms(), cur_tgt,
+                ", ".join(item.identifier for item in to_dispatch))
 
     def check_if_done(self, print_status):
-        '''Update the "done" flag for each TargetStatus object
+        '''Check whether we are finished.
 
         If print_status or we've reached a time interval then print current
         status for those that weren't known to be finished already.
@@ -186,30 +262,25 @@
 
         all_done = True
         printed_something = False
-        for target, tgt_status in self.status.items():
-            was_done = tgt_status.done
-            tgt_status.check_if_done()
-            is_done = tgt_status.done
-            all_queued = tgt_status.counters['Q'] == tgt_status.counters['T']
-
+        for target, scheduler in self.schedulers.items():
+            was_done, is_done, has_started = scheduler.check_status()
             all_done &= is_done
 
             should_print = (print_status and
                             not (was_done and is_done) and
-                            not (printed_something and all_queued))
+                            (has_started or not printed_something))
             if should_print:
-                stats = tgt_status.counters
-                width = "0{}d".format(len(str(stats["T"])))
-                msg = "["
-                for s in stats.keys():
-                    msg += s + ": {:{}}, ".format(stats[s], width)
-                msg = msg[:-2] + "]"
+                scheduler.print_counters(target, hms)
                 printed_something = True
-                log.info("[%s]: [%s]: %s", hms, target, msg)
+
         return all_done
 
     def run(self):
-        '''Run all items'''
+        '''Run all items
+
+        Returns a map from item to status.
+
+        '''
 
         # Print the legend just once (at the start of the first run)
         if Scheduler.print_legend:
@@ -217,16 +288,58 @@
                      "P: passed, F: failed, K: killed, T: total]")
             Scheduler.print_legend = False
 
-        first_time = True
-        while True:
-            changed = self.update_status()
-            self.dispatch()
-            if self.check_if_done(changed or first_time):
-                break
-            first_time = False
-            time.sleep(1)
+        # Catch one SIGINT and tell the scheduler to quit. On a second, die.
+        stop_now = threading.Event()
+        old_handler = None
+
+        def on_sigint(signal_received, frame):
+            log.info('Received SIGINT. Exiting gracefully. '
+                     'Send another to force immediate quit '
+                     '(but you may need to manually kill child processes)')
+
+            # Restore old handler to catch any second signal
+            assert old_handler is not None
+            signal(SIGINT, old_handler)
+
+            stop_now.set()
+
+        old_handler = signal(SIGINT, on_sigint)
+
+        try:
+            first_time = True
+            while True:
+                if stop_now.is_set():
+                    # We've had an interrupt. Kill any jobs that are running,
+                    # then exit.
+                    self.kill()
+                    exit(1)
+
+                changed = self.poll()
+                self.dispatch()
+                if self.check_if_done(changed or first_time):
+                    break
+                first_time = False
+
+                # This is essentially sleep(1) to wait a second between each
+                # polling loop. But we do it with a bounded wait on stop_now so
+                # that we jump back to the polling loop immediately on a
+                # signal.
+                stop_now.wait(timeout=1)
+        finally:
+            signal(SIGINT, old_handler)
+
+        # We got to the end without anything exploding. Extract and return
+        # results from the schedulers.
+        results = {}
+        for scheduler in self.schedulers.values():
+            results.update(scheduler.item_to_status)
+        return results
 
 
 def run(items):
-    '''Run the given items'''
-    Scheduler(items).run()
+    '''Run the given items.
+
+    Returns a map from item to status.
+
+    '''
+    return Scheduler(items).run()

diff --git a/util/dvsim/SimCfg.py b/util/dvsim/SimCfg.py
index 07b0ddc..77a36d7 100644
--- a/util/dvsim/SimCfg.py
+++ b/util/dvsim/SimCfg.py

@@ -52,24 +52,25 @@
     self.fail_msgs is a list of error messages, one per failing run.
 
     '''
-    def __init__(self, items):
+    def __init__(self, items, results):
         self.table = []
         self.fail_msgs = []
 
         self._name_to_row = {}
         for item in items:
-            self._add_item(item)
+            self._add_item(item, results)
 
-    def _add_item(self, item):
+    def _add_item(self, item, results):
         '''Recursively add a single item to the table of results'''
-        if item.status == "F":
+        status = results[item]
+        if status == "F":
             self.fail_msgs.append(item.fail_msg)
 
         # Runs get added to the table directly
         if item.target == "run":
-            self._add_run(item)
+            self._add_run(item, status)
 
-    def _add_run(self, item):
+    def _add_run(self, item, status):
         '''Add an entry to table for item'''
         row = self._name_to_row.get(item.name)
         if row is None:
@@ -77,7 +78,7 @@
             self.table.append(row)
             self._name_to_row[item.name] = row
 
-        if item.status == 'P':
+        if status == 'P':
             row.passing += 1
         row.total += 1
 
@@ -284,13 +285,6 @@
 
         return self.waves
 
-    def kill(self):
-        '''kill running processes and jobs gracefully
-        '''
-        super().kill()
-        for item in self.cov_deploys:
-            item.kill()
-
     # Purge the output directories. This operates on self.
     def _purge(self):
         if self.scratch_path:
@@ -574,11 +568,13 @@
         '''This is a public facing API, so we use "self.cfgs" instead of self.
         '''
         # Invoke the base class method to run the regression.
-        super().deploy_objects()
+        results = super().deploy_objects()
 
         # If coverage is enabled, then deploy the coverage tasks.
         if self.cov:
-            Scheduler.run(self.cov_deploys)
+            results.update(Scheduler.run(self.cov_deploys))
+
+        return results
 
     def _cov_analyze(self):
         '''Use the last regression coverage data to open up the GUI tool to
@@ -616,7 +612,7 @@
         for item in self.cfgs:
             item._cov_unr()
 
-    def _gen_results(self):
+    def _gen_results(self, run_results):
         '''
         The function is called after the regression has completed. It collates the
         status of all run targets and generates a dict. It parses the testplan and
@@ -630,7 +626,7 @@
         if self.cov:
             deployed_items.append(self.cov_merge_deploy)
 
-        results = Results(deployed_items)
+        results = Results(deployed_items, run_results)
 
         # Set a flag if anything failed
         if results.fail_msgs:
@@ -670,7 +666,8 @@
 
             # Append coverage results of coverage was enabled.
             if self.cov:
-                if self.cov_report_deploy.status == "P":
+                report_status = run_results[self.cov_report_deploy]
+                if report_status == "P":
                     results_str += "\n## Coverage Results\n"
                     # Link the dashboard page using "cov_report_page" value.
                     if hasattr(self, "cov_report_page"):

diff --git a/util/dvsim/dvsim.py b/util/dvsim/dvsim.py
index 869f23b..7f2fdea 100755
--- a/util/dvsim/dvsim.py
+++ b/util/dvsim/dvsim.py

@@ -28,7 +28,6 @@
 import subprocess
 import sys
 import textwrap
-from signal import SIGINT, signal
 
 import Deploy
 from Scheduler import Scheduler
@@ -186,14 +185,6 @@
     return proj_root_src, proj_root_dest
 
 
-def sigint_handler(signal_received, frame):
-    # Kill processes and background jobs.
-    log.debug('SIGINT or CTRL-C detected. Exiting gracefully')
-    cfg.kill()
-    log.info('Exit due to SIGINT or CTRL-C ')
-    exit(1)
-
-
 def copy_repo(src, dest, dry_run):
     '''Copy over the repo to a new location.
 
@@ -647,9 +638,6 @@
     global cfg
     cfg = make_cfg(args.cfg, args, proj_root)
 
-    # Handle Ctrl-C exit.
-    signal(SIGINT, sigint_handler)
-
     # List items available for run if --list switch is passed, and exit.
     if args.list is not None:
         cfg.print_list()
@@ -677,10 +665,10 @@
     if args.items != []:
         # Create deploy objects.
         cfg.create_deploy_objects()
-        cfg.deploy_objects()
+        results = cfg.deploy_objects()
 
         # Generate results.
-        cfg.gen_results()
+        cfg.gen_results(results)
 
         # Publish results
         if args.publish:
commit	a2892a50ed57bee2ff96c6aa5cbaec2d11bacfae	[log] [tgz]
author	Rupert Swarbrick <rswarbrick@lowrisc.org>	Mon Oct 12 08:50:08 2020 +0100
committer	Srikrishna Iyer <46467186+sriyerg@users.noreply.github.com>	Mon Jan 25 16:42:49 2021 -0800
tree	c8b635c6f545632ff302441876234487eb39e3ac
parent	381770d22de727628629322a66d83150c7f762c2 [diff]