| #!/usr/bin/env python |
| """Exports a submodule head to a branch in one of our fork repos. |
| |
| If patches have been applied to a local submodule for which we |
| host a fork repository, this script will "export" those patches |
| to specially maintained branches in the fork such that the |
| head commit is available indefinitely and can be used safely |
| as a submodule commit. |
| |
| This will create branches in the fork named like: |
| sm-iree-{branch} |
| |
| (i.e. all historical patch commits for `main` will be on |
| sm-iree-main or if they derive from a specific integrate or |
| branch on sm-iree-integrate_....) |
| |
| The forks have branch protection enabled for branches starting |
| with "sm-" so that they cannot be force pushed. |
| |
| This is all accomplished by creating special merge commits and |
| associating trees with them explicitly. Most folks don't need |
| to understand that: they just need to know that if you run |
| this script, the current commit at your submodule head will |
| be available "forever". |
| |
| Usage: |
| ./scripts/export_submodule_head.py third_party/llvm-project |
| ./scripts/export_submodule_head.py third_party/stablehlo |
| ./scripts/export_submodule_head.py third_party/torch-mlir |
| """ |
| from typing import Optional |
| |
| import argparse |
| from pathlib import Path |
| import re |
| import shlex |
| import subprocess |
| import sys |
| |
| |
| def export_submodule_head(args, submodule_rel_path: str): |
| super_repo_path = args.repo_path |
| super_repo_name = args.repo_path.name |
| super_branch = git_current_branch(repo_dir=args.repo_path) |
| print(f"Super-repo '{super_repo_name}' is on branch '{super_branch}'") |
| submodule_path = super_repo_path / submodule_rel_path |
| print(f"Operating on submodule {submodule_path}") |
| check_origin_update_help(submodule_path) |
| git_fetch(repository="origin", repo_dir=submodule_path) |
| submodule_head, submodule_summary = git_current_commit(repo_dir=submodule_path) |
| print(f"Submodule at {submodule_head}\n {submodule_summary}") |
| submodule_merge_base = git_merge_base( |
| submodule_head, "origin/main", repo_dir=submodule_path |
| ) |
| if submodule_merge_base == submodule_head: |
| print("Submodule commit is upstream. Nothing to do.") |
| return 0 |
| |
| submodule_branch = args.submodule_branch or f"sm-{super_repo_name}-{super_branch}" |
| print( |
| f"Submodule merge base {submodule_merge_base} diverges from upstream. Will persist on {submodule_branch}." |
| ) |
| |
| # Get the remote topic head. |
| remote_topic_head = git_remote_head( |
| "origin", f"refs/heads/{submodule_branch}", repo_dir=submodule_path |
| ) |
| |
| # Early exit if precisely at this commit. |
| if remote_topic_head == submodule_head: |
| print(f"Submodule branch {submodule_branch} is already at {submodule_head}") |
| return 0 |
| |
| # If the branch does not exist, just push to it and exit. |
| if not remote_topic_head: |
| print(f"Submodule branch {submodule_branch} does not exist. Pushing.") |
| git_exec( |
| ["push", "origin", f"{submodule_head}:refs/heads/{submodule_branch}"], |
| repo_dir=submodule_path, |
| ) |
| print("PLEASE IGNORE ANY NOTICE ABOUT CREATING A PR") |
| return 0 |
| |
| # Check if the submodule_head is an ancestor of the current remote_topic_head |
| # and exit if so (it is already reachable). |
| try: |
| git_exec( |
| ["merge-base", "--is-ancestor", submodule_head, remote_topic_head], |
| repo_dir=submodule_path, |
| ) |
| print( |
| f"Commit {submodule_head} is reachable from remote branch {submodule_branch}. Doing nothing." |
| ) |
| return |
| except subprocess.CalledProcessError as e: |
| # If not an ancestor, returncode will be 1. On general error, it will be |
| # something else. |
| if e.returncode != 1: |
| raise |
| |
| # Create a splice commit that is based on the tree of the current submodule head |
| # and has parents of the current submodule head and the remote topic head. |
| # Note that the current branch is not touched, the commit is just created in the |
| # ether. We can push it to the remote topic branch to complete the splice. |
| print(f"Submodule head {submodule_head} is not on {submodule_branch}. Splicing.") |
| splice_commit = git_exec( |
| [ |
| "commit-tree", |
| submodule_head + "^{tree}", |
| "-p", |
| submodule_head, |
| "-p", |
| remote_topic_head, |
| "-m", |
| f"Splice submodule rebase {submodule_head} onto {remote_topic_head}", |
| ], |
| repo_dir=submodule_path, |
| capture_output=True, |
| ).strip() |
| print(f"Created splice commit {splice_commit}: pushing") |
| git_exec( |
| ["push", "origin", f"{splice_commit}:refs/heads/{submodule_branch}"], |
| repo_dir=submodule_path, |
| ) |
| |
| |
| def git_current_commit(*, repo_dir=None) -> tuple[str, str]: |
| output = git_exec( |
| ["log", "-n", "1", "--pretty=format:%H %s (%an on %ci)"], |
| capture_output=True, |
| repo_dir=repo_dir, |
| quiet=True, |
| ) |
| output = output.strip() |
| parts = output.split(" ") |
| # Return commit, full_summary |
| return parts[0], output |
| |
| |
| def git_current_branch(*, repo_dir=None): |
| return git_exec( |
| ["rev-parse", "--abbrev-ref", "HEAD"], |
| repo_dir=repo_dir, |
| quiet=True, |
| capture_output=True, |
| ).strip() |
| |
| |
| def check_origin_update_help(repo_dir): |
| existing_url = git_exec( |
| ["remote", "get-url", "--push", "origin"], |
| capture_output=True, |
| repo_dir=repo_dir, |
| quiet=True, |
| ) |
| existing_url = existing_url.strip() |
| if existing_url.startswith("https://github.com/"): |
| new_url = existing_url.replace("https://github.com/", "git@github.com:", 1) |
| print( |
| "Your push URL is for GitHub HTTPS. Just in case if you are only set up " |
| "to push with SSH, here is a one-liner to update it:" |
| ) |
| print(f" (cd {repo_dir} && git remote set-url --push origin {new_url})") |
| return False |
| return True |
| |
| |
| def git_fetch(*, repository=None, ref=None, repo_dir=None): |
| args = ["fetch"] |
| if repository: |
| args.append(repository) |
| if ref is not None: |
| args.append(ref) |
| git_exec(args, repo_dir=repo_dir) |
| |
| |
| def git_merge_base(ref1, ref2, *, repo_dir=None) -> str: |
| return git_exec( |
| ["merge-base", ref1, ref2], quiet=True, capture_output=True, repo_dir=repo_dir |
| ).strip() |
| |
| |
| def git_remote_head(remote: str, head: str, repo_dir=None) -> Optional[str]: |
| # Get the remote head (i.e. "refs/heads/main") commit or None. |
| args = ["ls-remote", "--heads", remote, head] |
| output = git_exec(args, capture_output=True, repo_dir=repo_dir) |
| lines = output.strip().splitlines(keepends=False) |
| if not lines: |
| return None |
| |
| def extract_commit(line): |
| parts = re.split("\\s+", line) |
| commit = parts[0] |
| return commit |
| |
| return next(extract_commit(l) for l in lines) |
| |
| |
| def git_exec(args, *, repo_dir, quiet=False, capture_output=False): |
| full_args = ["git"] + args |
| full_args_quoted = [shlex.quote(a) for a in full_args] |
| if not quiet: |
| print(f" ++ EXEC: (cd {repo_dir} && {' '.join(full_args_quoted)})") |
| if capture_output: |
| return subprocess.check_output(full_args, cwd=repo_dir).decode("utf-8") |
| else: |
| subprocess.check_call(full_args, cwd=repo_dir) |
| |
| |
| def main(args): |
| return export_submodule_head(args, args.submodule) |
| |
| |
| def parse_arguments(argv): |
| repo_root = Path(__file__).resolve().parent.parent.parent |
| parser = argparse.ArgumentParser(description="Submodule exporter") |
| parser.add_argument("--repo-path", default=repo_root, type=Path) |
| parser.add_argument("--submodule-branch") |
| parser.add_argument("submodule") |
| args = parser.parse_args(argv) |
| return args |
| |
| |
| if __name__ == "__main__": |
| main(parse_arguments(sys.argv[1:])) |