blob: deb8e1a363e7c8268ce0bddd323a28a8b55a87cc [file] [log] [blame]
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
r"""Efficiently bisect FPGA regressions with the bitstream cache.
This tool bisects while minimizing the number of calls to the slow command.
Under the hood, it actually runs up to two git bisects. The first bisect only
tests commits that have cached bitstreams. The optional second bisect narrows
down ambiguous results. (For more information on bisecting, see `man 1
git-bisect` or <https://git-scm.com/docs/git-bisect>.)
In the first phase, the tool classifies commits as good/bad/skip with the
user-provided `--fast-command`. In the second phase, it uses `--slow-command`.
These commands classify the current commit via their exit status. Exiting with 0
indicates that the commit is good. Exiting with 125 indicates the commit cannot
be tested. Exiting with any other status indicates that the commit is bad.
Run bitstream_bisect.py with Bazel like so:
./bazelisk.sh run //util/fpga:bitstream_bisect -- \
--good HEAD~30 \
--bad HEAD \
--fast-command "./bazelisk.sh test //sw/device/tests:uart_smoketest_fpga_cw310_rom" \
--slow-command "./bazelisk.sh test --define bitstream=vivado \
//sw/device/tests:uart_smoketest_fpga_cw310_rom"
"""
import abc
import enum
import os
import re
import shlex
import string
import subprocess
from typing import Dict, List, Optional, Set, Tuple
import rich
import rich.console
import typer
from rules.scripts.bitstreams_workspace import BitstreamCache
class CommitHash(str):
"""A string representation of a Git commit hash.
The given string must contain only hex digits. It is canonicalized to
lowercase, but not canonicalized relative to the current git repository. As
such, two CommitHash objects may fail to compare equal even though git would
say they refer to the same commit.
"""
def __new__(cls, s):
t = super().__new__(cls, s).lower()
if len(t) == 0 or len(set(t) - set(string.hexdigits)) > 0:
raise Exception("Invalid CommitHash input: '{}'".format(t))
return t
class CommitJudgment(enum.Enum):
GOOD = enum.auto()
BAD = enum.auto()
SKIP = enum.auto()
class BisectLog:
"""A container and parser for the results of `git bisect log`.
"""
class ParserBug(Exception):
pass
def __init__(self, log: str):
"""Parse `log` and initialize the object."""
lines = [s.rstrip() for s in log.splitlines()]
self.judgments = BisectLog._extract_judgments(lines)
self.candidates = BisectLog._extract_candidates(lines)
@staticmethod
def _extract_judgments(
lines: List[str]) -> List[Tuple[CommitHash, CommitJudgment]]:
out = []
for line in lines:
if line == '' or line.startswith("#"):
continue
if line == 'git bisect start':
continue
match = re.match("^git bisect (good|bad|skip) ([0-9a-f]+)$", line)
if match is None:
raise BisectLog.ParserBug(
"Failed to parse line: '{}'".format(line))
assert len(match.groups()) == 2
judgment, commit = match.groups()
if judgment == 'good':
out.append((CommitHash(commit), CommitJudgment.GOOD))
elif judgment == 'bad':
out.append((CommitHash(commit), CommitJudgment.BAD))
elif judgment == 'skip':
out.append((CommitHash(commit), CommitJudgment.SKIP))
else:
raise BisectLog.ParserBug("Unreachable")
return out
@staticmethod
def _extract_candidates(lines: List[str]) -> List[CommitHash]:
# Scan through the log to find a line indicating that there is a single
# first bad commit.
re_first_bad = re.compile(r"^# first bad commit: \[([0-9a-f]+)\] .*$")
for i, line in enumerate(lines):
match = re_first_bad.match(line)
if match is None:
continue
assert len(match.groups()) == 1
first_bad = match.group(1)
return [CommitHash(first_bad)]
# Scan through the log to find a sequence of possible first bad commits.
LOG_LINE_ONLY_SKIPPED = "# only skipped commits left to test"
candidate_lines = []
for i, line in reversed(list(enumerate(lines))):
# When only skipped commits remain, expect a sequence of "possible
# first bad commit" lines.
if line == LOG_LINE_ONLY_SKIPPED:
candidate_lines = lines[i + 1:]
break
re_possible_first_bad = re.compile(
r"^# possible first bad commit: \[([0-9a-f]+)\] .*$")
candidates = []
for line in candidate_lines:
match = re_possible_first_bad.match(line)
if match is None:
continue
assert len(match.groups()) == 1
commit = match.group(1)
candidates.append(CommitHash(commit))
return candidates
class GitControllerABC(abc.ABC):
"""An abstract Git interface that will never run the system `git`."""
@abc.abstractmethod
def _git(self, *args: str) -> Tuple[int, str, str]:
"""Runs git with `args`. Returns (returncode, stdout, stderr)."""
pass
@abc.abstractmethod
def _git_no_capture_output(self, *args: str) -> int:
"""Like `_git()`, but doesn't capture stdout/stderr."""
pass
def _git_checked(self, *args: str) -> Tuple[str, str]:
"""Like _git(), but raises an exception when the returncode != 0."""
returncode, stdout, stderr = self._git(*args)
if returncode != 0:
lines = [
"Git exited with {}".format(returncode),
"---stdout---",
stdout,
"---stderr---",
stderr,
]
raise Exception("\n".join(lines))
return (stdout, stderr)
def rev_parse(self, commitish: str) -> CommitHash:
stdout, _ = self._git_checked("rev-parse", commitish)
return CommitHash(stdout.rstrip())
def bisect_start(self):
self._git_checked("bisect", "start")
def bisect_reset(self):
self._git_checked("bisect", "reset")
def bisect_good(self, rev: CommitHash):
self._git_checked("bisect", "good", rev)
def bisect_bad(self, rev: CommitHash):
self._git_checked("bisect", "bad", rev)
def bisect_skip(self, *revs: CommitHash):
self._git_checked("bisect", "skip", *revs)
def bisect_run(self, *args: str) -> BisectLog:
"""Executes `git bisect run` without capturing stdout/stderr."""
# Git's exit status is non-zero when bisect results are ambiguous.
_ = self._git_no_capture_output("bisect", "run", *args)
return self.bisect_log()
def bisect_log(self) -> BisectLog:
stdout, _ = self._git_checked("bisect", "log")
return BisectLog(stdout)
def bisect_view(self) -> List[Tuple[CommitHash, str]]:
stdout, _ = self._git_checked("bisect", "view", "--oneline",
"--no-abbrev-commit")
out = []
for line in stdout.splitlines():
commit, tail = line.split(" ", 1)
out.append((CommitHash(commit), tail))
return out
class GitController(GitControllerABC):
"""A concrete git controller that actually executes git commands."""
def __init__(self, git_binary: str, verbose: bool):
self._git_binary = git_binary
self._verbose = verbose
self._console = rich.console.Console()
self._env: Dict[str, str] = dict(os.environ)
# Prevent `git bisect view` from launching the `gitk` GUI.
self._env.pop("DISPLAY", None)
def _git(self, *args: str) -> Tuple[int, str, str]:
command = [self._git_binary] + list(args)
proc = subprocess.run(command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=self._env,
encoding="utf-8")
if self._verbose or proc.returncode != 0:
print()
command_str = " ".join(command)
rich.print(
"Command [green]{}[/green] exited with status {}".format(
rich.markup.escape(command_str), proc.returncode))
rich.print(
rich.panel.Panel(rich.markup.escape(proc.stdout),
title="git stdout"))
rich.print(
rich.panel.Panel(rich.markup.escape(proc.stderr),
title="git stderr"))
return (proc.returncode, proc.stdout, proc.stderr)
def _git_no_capture_output(self, *args: str) -> int:
command = [self._git_binary] + list(args)
command_str = " ".join(command)
command_str = rich.markup.escape(command_str)
self._console.rule(
"Streaming output from [bold]{}".format(command_str))
proc = subprocess.run(["git"] + list(args), env=self._env)
self._console.rule("Git exited with status {}".format(proc.returncode))
self._console.print()
return proc.returncode
class BisectSession:
def __init__(self,
git: GitControllerABC,
cache_keys: Set[CommitHash],
visualize: bool = False):
self._git = git
self._cache_keys: Set[CommitHash] = cache_keys
self._console = rich.console.Console()
self._visualize = visualize
def run(self,
good_commit: CommitHash,
bad_commit: CommitHash,
fast_command: List[str],
slow_command: Optional[List[str]] = None) -> BisectLog:
"""Run an unattended bisect session in one or two phases.
This method is conceptually equivalent to `git bisect run`. Unlike plain
`git bisect`, it runs in two phases. The first phase only operates on
cached bitstreams. This will only get us so far, so it optionally can
fall back to a slower command, e.g. one that builds the bitstreams it
tests.
Args:
good_commit:
Commit where `fast_command` and `slow_command` are known to succeed.
bad_commit:
Commit where `fast_command` and `slow_command` are known to fail.
fast_command:
A list of strings that represent a git-bisect script and args. These
are passed to `git bisect run` in the first phase.
slow_command:
Just like `fast_command`, but for the second phase. If this arg is
None, we skip the second phase.
Returns:
Parsed output from `git bisect log`, run after the final bisect phase.
"""
self._git.bisect_start()
self._git.bisect_good(good_commit)
self._git.bisect_bad(bad_commit)
# Skip commits that aren't in the bitstream cache.
uncached_commits = [c for c, _ in self._git.bisect_view() if c not in self._cache_keys]
if uncached_commits != []:
self._git.bisect_skip(*uncached_commits)
self._maybe_visualize("Only testing commits with cached bitstreams")
bisect_log = self._git.bisect_run(*fast_command)
# Skip the second phase when results are unambiguous.
if len(bisect_log.candidates) == 1:
self._maybe_visualize("Final results")
self._git.bisect_reset()
return bisect_log
# Prepare a new git bisect session where we will run the slow script
# without the benefit of the bitstream cache. Replay all the "good" and
# "bad" commands, but none of the "skip" commands.
if slow_command is None:
self._git.bisect_reset()
return bisect_log
self._git.bisect_reset()
self._git.bisect_start()
# Replay only "good" and "bad" decisions from the previous phase. It's
# tempting to unconditionally skip cached commits, but it's better to
# simply reuse judgments from the previous phase. It's possible that the
# user-supplied `fast_command` chose to skip some cached commits for
# reasons outside the scope of this tool, and we now need to test them
# in the second phase.
for commit, judgment in bisect_log.judgments:
if judgment == CommitJudgment.GOOD:
self._git.bisect_good(commit)
elif judgment == CommitJudgment.BAD:
self._git.bisect_bad(commit)
self._maybe_visualize("Narrowing down ambiguous results")
bisect_log = self._git.bisect_run(*slow_command)
self._maybe_visualize("Final results")
self._git.bisect_reset()
return bisect_log
def _maybe_visualize(self, label: str):
"""Prints a table summarizing the current range of commits."""
if not self._visualize:
return
table = rich.table.Table(
title="Bitstream Bisect Status: [bold]{}".format(label))
table.add_column("Commit")
table.add_column("Description")
table.add_column("Cached")
table.add_column("Judgment")
bisect_log = self._git.bisect_log()
judgment_dict: Dict[CommitHash,
CommitJudgment] = dict(bisect_log.judgments)
for commit, description in self._git.bisect_view():
judgment = judgment_dict.get(commit)
table.add_row(commit, rich.markup.escape(description),
"Cached" if commit in self._cache_keys else "",
judgment.name if judgment else "")
self._console.print(table)
app = typer.Typer(pretty_exceptions_enable=False,
rich_markup_mode="rich",
add_completion=False)
@app.command(help=__doc__)
def main(
good: str = typer.Option(..., help="Commit-ish for known-good revision."),
bad: str = typer.Option(..., help="Commit-ish for known-bad revision."),
fast_command: str = typer.Option(
..., help="Command for testing commits with cached bitstreams."),
slow_command: Optional[str] = typer.Option(
None, help="Command that tests remaining commits in second phase."),
git_binary: str = typer.Option("/usr/bin/git", help="Path to Git binary."),
verbose: bool = typer.Option(
False, help="Log stdout/stderr of every git command."),
):
# Escape the Bazel sandbox. Without this step, git would not find the .git/
# directory. Note that this works nicely with git worktrees.
workspace_dir = os.environ['BUILD_WORKSPACE_DIRECTORY']
os.chdir(workspace_dir)
# Create a Git controller capable of invoking the system's `git` binary. Use
# it to canonicalize potentially-symbolic "commitish" strings so we don't
# misinterpret them when a different commit is checked out.
git = GitController(git_binary, verbose)
good_commit = git.rev_parse(good)
bad_commit = git.rev_parse(bad)
print("Canonicalized good revision:", good_commit)
print("Canonicalized bad revision: ", bad_commit)
# NOTE: This will generate network traffic!
# Fetch a listing of the current bitstream cache.
bitstream_cache = BitstreamCache.MakeWithDefaults()
bitstream_cache.GetBitstreamsAvailable(refresh=True)
bitstream_cache_keys = set(bitstream_cache.available.keys())
fast_command_pieces = shlex.split(fast_command)
slow_command_pieces = shlex.split(
slow_command) if slow_command is not None else None
session = BisectSession(git, bitstream_cache_keys, visualize=True)
session.run(good_commit,
bad_commit,
fast_command_pieces,
slow_command=slow_command_pieces)
return 0
if __name__ == '__main__':
app()