Start work on a new incremental LLVM integrate script. (#14318)
diff --git a/build_tools/scripts/integrate/auto_integrate.py b/build_tools/scripts/integrate/auto_integrate.py
new file mode 100644
index 0000000..f3db1b5
--- /dev/null
+++ b/build_tools/scripts/integrate/auto_integrate.py
@@ -0,0 +1,252 @@
+#!/usr/bin/env python
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""Prototype of an LLVM auto-integrate script.
+
+WARNING: This script is a WIP that is being developed while stellaraccident@
+does LLVM integrates. If this warning is still present without activity after
+about Aug-2023, then consider it defunct.
+
+This script attempts to define a human-augmented workflow for staying up to
+date with LLVM. Roughly, it encourages the following flow:
+
+1. Integrator `start`'s a new integrate branch, which will reset llvm-project
+ to the next affecting change (currently defined as anything touching
+ MLIR but can be expanded).
+2. The integrator or automation periodically issues a `next` command, which
+ will advance LLVM to the next affecting commit (or an arbitrary future
+ commit).
+3. If the CI on the integrate branch signals a failure, then the integrator
+ should apply patches as appropriate to turn it green or manually decide
+ to advance anyway (i.e. around a breakage/revert).
+4. When convenient, land the integrate branch into main and start a new one.
+
+The integrate branch is expected to live and accumulate patches over several
+days and is pushed to main based on a human decision. Since it is just a branch,
+normal git commands can be used to navigate around trouble spots.
+
+Current verbs:
+
+* `start`: Starts a new integrate branch.
+* `next`: Advances the current branch to the next affecting LLVM commit.
+* `status`: Shows the status of the LLVM dependency, including
+ reverse-chronological commit summaries of the delta between where we are and
+ upstream main.
+
+Future enhancements:
+
+* Carried patches to LLVM.
+* On `next`, we should see if we need to merge from `main` and do so.
+* Consult special `llvm-patch/{commit}` branches for pre-integrate patches and
+ apply them when we have integrated the given patch.
+"""
+from typing import Optional, Tuple
+
+import argparse
+from datetime import date
+import sys
+import textwrap
+
+import iree_utils
+
+LLVM_REPO_DIR = iree_utils.get_submodule_root("llvm-project")
+TRACK_PATHS = ("mlir",)
+
+
+class CurrentState:
+ """Current state of the llvm-project integrate."""
+
+ def __init__(self, args):
+ self.args = args
+ self.current_iree_branch = iree_utils.git_current_branch()
+ self.current_commit, self.current_summary = iree_utils.git_current_commit(
+ repo_dir=LLVM_REPO_DIR
+ )
+ # The common commit between the llvm-project submodule and upstream.
+ self.merge_base_commit = iree_utils.git_merge_base(
+ self.current_commit, "upstream/main", repo_dir=LLVM_REPO_DIR
+ )
+ # Whether the current llvm-project commit is clean (True) or
+ # carries patches (False).
+ self.is_clean = self.merge_base_commit == self.current_commit
+ # List of (commit, desc) tuples in reverse chronological order for
+ # commits that upstream is ahead.
+ self.new_commits = iree_utils.git_log_range(
+ refs=("upstream/main", f"^{self.merge_base_commit}"),
+ paths=TRACK_PATHS,
+ repo_dir=LLVM_REPO_DIR,
+ )
+
+ def find_next_commit(self) -> Tuple[str, str]:
+ """Finds the next LLVM commit to advance to.
+
+ Returns (commit, desc).
+ """
+ if self.args.advance_to:
+ for commit, desc in self.new_commits:
+ if commit == self.args.advance_to:
+ return commit, desc
+ else:
+ raise ValueError(
+ f"Requested advance to commit {self.args.advance_to} not found"
+ )
+ else:
+ if not self.new_commits:
+ raise ValueError(f"No new commits")
+ else:
+ return next(reversed(self.new_commits))
+
+ def index_of_next_commit(self, needle_commit: str) -> int:
+ for i, (new_commit, desc) in enumerate(reversed(self.new_commits)):
+ if new_commit == needle_commit:
+ return i
+ return -1
+
+
+def do_start(args):
+ fetch(args)
+ state = CurrentState(args)
+ if not state.is_clean:
+ raise RuntimeError("Current branch state is unclean. Not implemented yet.")
+ if not state.new_commits:
+ print(f"Up to date! Not starting.")
+ return
+
+ next_commit, next_desc = state.find_next_commit()
+ print(f"==> Starting new integrate")
+ # Create branch.
+ branch_name = args.branch_name
+ if not branch_name:
+ branch_name = f"increment-llvm-{date.today().strftime('%Y%m%d')}"
+ print(f" Creating branch {branch_name} (override with --branch-name=)")
+ iree_utils.git_create_branch(
+ branch_name,
+ checkout=True,
+ ref="HEAD",
+ force=args.reuse_branch,
+ )
+ iree_utils.git_reset(next_commit, repo_dir=LLVM_REPO_DIR)
+ iree_utils.git_create_commit(
+ message=(
+ f"Start LLVM integrate ({len(state.new_commits)} commits behind)\n\n"
+ f"Advance LLVM to {next_commit}: {next_desc}"
+ ),
+ add_all=True,
+ )
+ print("Pushing...")
+ iree_utils.git_push_branch("origin", branch_name, force=args.reuse_branch)
+
+
+def do_next(args):
+ fetch(args)
+ state = CurrentState(args)
+ if state.current_iree_branch == "main":
+ raise RuntimeError("Cannot run auto_integrate next from main branch!")
+
+ # TODO: Check if a merge from main is needed and do it.
+ if not state.is_clean:
+ raise RuntimeError("Current branch state is unclean. Not implemented yet.")
+ if not state.new_commits:
+ print(f"Up to date! Not starting.")
+ return
+
+ next_commit, next_desc = state.find_next_commit()
+ index_commit = state.index_of_next_commit(next_commit)
+ print(
+ f"==> Advancing to next LLVM commit ({index_commit} "
+ f"of {len(state.new_commits)}):"
+ )
+ print(f" {next_commit}: {next_desc}")
+ iree_utils.git_reset(next_commit, repo_dir=LLVM_REPO_DIR)
+ iree_utils.git_create_commit(
+ message=(
+ f"Advance LLVM to {next_commit}: {next_desc} "
+ f"({index_commit} of {len(state.new_commits)})"
+ ),
+ add_all=True,
+ )
+ print("Pushing...")
+ iree_utils.git_exec(["push"])
+
+
+def do_status(args):
+ fetch(args)
+ state = CurrentState(args)
+ print(f"==> llvm-project is currently at {state.current_summary}:")
+ if state.is_clean:
+ print(f" : Current commit is clean (no patches)")
+ else:
+ # TODO: Also get the merge base with --independent to get the carried
+ # patches.
+ print(
+ f" : Current commit has diverging patches with base {state.merge_base_commit}"
+ )
+
+ # Compute the different commits.
+ print(
+ f"==> {len(state.new_commits)} affecting commits between upstream head and current:"
+ )
+ for commit, desc in state.new_commits:
+ print(f" {commit}: {desc}")
+
+
+def fetch(args):
+ print("==> Fetching origin and upstream revisions...")
+ setup_remotes(args)
+ iree_utils.git_fetch(repository="origin")
+ iree_utils.git_fetch(repository="origin", repo_dir=LLVM_REPO_DIR)
+ iree_utils.git_fetch(repository="upstream", repo_dir=LLVM_REPO_DIR)
+
+
+def setup_remotes(args):
+ # We need to know what the real upstream repo is.
+ iree_utils.git_setup_remote(
+ "upstream", "https://github.com/llvm/llvm-project.git", repo_dir=LLVM_REPO_DIR
+ )
+
+
+def main(args):
+ if args.sub_command == "next":
+ do_next(args)
+ elif args.sub_command == "start":
+ do_start(args)
+ elif args.sub_command == "status":
+ do_status(args)
+ else:
+ raise ValueError(f"Unrecognized sub-command {args.sub_command}")
+
+
+def parse_arguments(argv):
+ parser = argparse.ArgumentParser(description="IREE LLVM-bump-inator")
+ subparsers = parser.add_subparsers(
+ help="sub-command help", required=True, dest="sub_command"
+ )
+ next_parser = subparsers.add_parser("next")
+ next_parser.add_argument(
+ "advance_to", default=None, help="Advance to the given LLVM commit"
+ )
+ start_parser = subparsers.add_parser("start")
+ start_parser.add_argument(
+ "--branch-name", help="Integrate branch to create", default=None
+ )
+ start_parser.add_argument(
+ "advance_to", default=None, help="Advance to the given LLVM commit"
+ )
+ start_parser.add_argument(
+ "--reuse-branch",
+ help="Allow re-use of an existing branch",
+ action="store_true",
+ default=False,
+ )
+ status_parser = subparsers.add_parser("status")
+
+ args = parser.parse_args(argv)
+ return args
+
+
+if __name__ == "__main__":
+ main(parse_arguments(sys.argv[1:]))
diff --git a/build_tools/scripts/integrate/iree_utils.py b/build_tools/scripts/integrate/iree_utils.py
index 21e5454..0c662cb 100644
--- a/build_tools/scripts/integrate/iree_utils.py
+++ b/build_tools/scripts/integrate/iree_utils.py
@@ -4,6 +4,8 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+from typing import List
+
import os
import re
import shlex
@@ -176,6 +178,30 @@
return parts[0], output
+def git_log_range(refs=(), *, repo_dir=None, paths=()) -> List[Tuple[str, str]]:
+ """Does a `git log ref1 ref2 -- paths.
+
+ Returns a list of tuples of (commit, desc).
+ """
+ args = ["log", "--pretty=format:%H %s (%an on %ci)"] + list(refs)
+ if paths:
+ args.append("--")
+ args.extend(list(paths))
+ output = git_exec(args, repo_dir=repo_dir, capture_output=True)
+ lines = output.splitlines()
+ results = []
+ for line in lines:
+ commit, desc = line.split(" ", maxsplit=1)
+ results.append((commit, desc))
+ return results
+
+
+def git_merge_base(ref1, ref2, *, repo_dir=None) -> str:
+ return git_exec(
+ ["merge-base", ref1, ref2], quiet=True, capture_output=True, repo_dir=repo_dir
+ ).strip()
+
+
def git_create_commit(*, message, add_all=False, repo_dir=None):
if add_all:
git_exec(["add", "-A"], repo_dir=repo_dir)
@@ -201,6 +227,15 @@
return [extract_branch(l) for l in lines]
+def git_current_branch(*, repo_dir=None):
+ return git_exec(
+ ["rev-parse", "--abbrev-ref", "HEAD"],
+ repo_dir=repo_dir,
+ quiet=True,
+ capture_output=True,
+ ).strip()
+
+
def git_exec(args, *, repo_dir=None, quiet=False, capture_output=False):
full_args = ["git"] + args
full_args_quoted = [shlex.quote(a) for a in full_args]