Add scripting to update mlir-hlo to match LLVM commit (#4348)

This updates the MLIR-HLO submodule in a similar manner to the TF
submodule, just with slightly less parsing because the LLVM commit
information is in its own file.
diff --git a/.github/workflows/update_llvm_dependent_submodules.yml b/.github/workflows/update_llvm_dependent_submodules.yml
index cefb441..e6bd183 100644
--- a/.github/workflows/update_llvm_dependent_submodules.yml
+++ b/.github/workflows/update_llvm_dependent_submodules.yml
@@ -42,6 +42,7 @@
           echo "LLVM_SHA=$(git submodule status third_party/llvm-project | awk '{print $1}' | cut -c -12)" >> $GITHUB_ENV
           echo "TF_SHA=$(git submodule status third_party/tensorflow | awk '{print $1}' | cut -c -12)" >> $GITHUB_ENV
           echo "LLVM_BAZEL_SHA=$(git submodule status third_party/llvm-bazel | awk '{print $1}' | cut -c -12)" >> $GITHUB_ENV
+          echo "MLIR_HLO_SHA=$(git submodule status third_party/mlir_hlo | awk '{print $1}' | cut -c -12)" >> $GITHUB_ENV
       - name: Creating Pull Request
         uses: peter-evans/create-pull-request@v3
         with:
@@ -56,6 +57,8 @@
               [${{ env.LLVM_BAZEL_SHA }}](https://github.com/google/llvm-bazel/commit/${{ env.LLVM_BAZEL_SHA }})
             - TensorFlow to
             [${{ env.TF_SHA }}](https://github.com/tensorflow/tensorflow/commit/${{ env.TF_SHA }})
+            - MLIR-HLO to
+            [${{ env.MLIR_HLO_SHA }}](https://github.com/tensorflow/mlir-hlo/commit/${MLIR_HLO_SHA?})
 
             `./scripts/git/update_to_llvm_syncpoint.py`
 
diff --git a/scripts/git/update_llvm_dependent_submodules.sh b/scripts/git/update_llvm_dependent_submodules.sh
index 1890412..2fcaee6 100755
--- a/scripts/git/update_llvm_dependent_submodules.sh
+++ b/scripts/git/update_llvm_dependent_submodules.sh
@@ -46,6 +46,7 @@
 LLVM_SHA="$(git submodule status third_party/llvm-project | awk '{print $1}' | cut -c -12)"
 LLVM_BAZEL_SHA="$(git submodule status third_party/llvm-bazel | awk '{print $1}' | cut -c -12)"
 TF_SHA="$(git submodule status third_party/tensorflow | awk '{print $1}' | cut -c -12)"
+MLIR_HLO_SHA="$(git submodule status third_party/mlir_hlo | awk '{print $1}' | cut -c -12)"
 
 TITLE="Synchronize submodules with LLVM at llvm/llvm-project@${LLVM_SHA?}"
 BODY="$(cat <<-EOF
@@ -55,6 +56,8 @@
   [${LLVM_BAZEL_SHA?}](https://github.com/google/llvm-bazel/commit/${LLVM_BAZEL_SHA?})
 - TensorFlow to
   [${TF_SHA?}](https://github.com/tensorflow/tensorflow/commit/${TF_SHA?})
+- MLIR-HLO to
+  [${MLIR_HLO_SHA?}](https://github.com/tensorflow/mlir-hlo/commit/${MLIR_HLO_SHA?})
 
 \`${CMD?}\`
 EOF
diff --git a/scripts/git/update_to_llvm_syncpoint.py b/scripts/git/update_to_llvm_syncpoint.py
index f95bb86..ce13689 100755
--- a/scripts/git/update_to_llvm_syncpoint.py
+++ b/scripts/git/update_to_llvm_syncpoint.py
@@ -16,18 +16,19 @@
 # pylint: disable=missing-docstring
 """Updates LLVM-dependent submodules based on the current LLVM commit.
 
-Updates the third_party/llvm-bazel and third_party/tensorflow submodules to
-commits that match the LLVM commit in third_party/llvm-project submodule. We
-have special conditions around these submodules since they are synced as part of
-the integration of LLVM into Google's source repository. See
+Updates the third_party/llvm-bazel, third_party/tensorflow, and
+third_party/mlir-hlo submodules to commits that match the LLVM commit in the
+third_party/llvm-project submodule. We have special conditions around these
+submodules since they are synced as part of the integration of LLVM into
+Google's source repository. See
 https://google.github.io/iree/developing-iree/repository-management#the-special-relationship-with-llvm-and-tensorflow.
 
 Typical usage:
   Syntax: ./scripts/git/update_to_llvm_syncpoint.py
 
   By default, this will update llvm-bazel to the tag corresponding to the
-  current LLVM commit and update TensorFlow to the most recent commit that has a
-  matching LLVM commit.
+  current LLVM commit and update TensorFlow and MLIR-HLO to the most recent
+  commit that has a matching LLVM commit.
 """
 
 import argparse
@@ -55,6 +56,11 @@
         "Update to the most recent commit with a matching version of LLVM",
 }
 
+TF_WORKSPACE_FILEPATH = "tensorflow/workspace.bzl"
+TF_WORKSPACE_LLVM_COMMIT_REGEXP = re.compile(
+    r"""\s*LLVM_COMMIT\s*=\s*"(.+)"\s*""", flags=re.MULTILINE)
+MLIR_HLO_LLVM_VERSION_FILEPATH = "build_tools/llvm_version.txt"
+
 
 def parse_arguments():
   parser = argparse.ArgumentParser()
@@ -86,6 +92,15 @@
                       help=("Update TensorFlow to this rev, or a named option:"
                             f" {COMMIT_OPTIONS}"),
                       default=LATEST_MATCHING_COMMIT_OPTION)
+  parser.add_argument("--mlir_hlo_path",
+                      help="Path to the tensorflow sources "
+                      "(default to third_party/tensorflow)",
+                      default=None)
+  parser.add_argument("--mlir_hlo_rev",
+                      "--mlir_hlo_commit",
+                      help=("Update mlir-hlo to this rev, or a named option:"
+                            f" {COMMIT_OPTIONS}"),
+                      default=LATEST_MATCHING_COMMIT_OPTION)
   parser.add_argument(
       "--validate",
       help="Validate that the selected commits all match the LLVM commit",
@@ -101,12 +116,14 @@
     args.repo = utils.find_git_toplevel()
 
   # Set some defaults.
-  if not args.tensorflow_path:
-    args.tensorflow_path = os.path.join(args.repo, "third_party", "tensorflow")
   if not args.llvm_path:
     args.llvm_path = os.path.join(args.repo, "third_party", "llvm-project")
   if not args.llvm_bazel_path:
     args.llvm_bazel_path = os.path.join(args.repo, "third_party", "llvm-bazel")
+  if not args.tensorflow_path:
+    args.tensorflow_path = os.path.join(args.repo, "third_party", "tensorflow")
+  if not args.mlir_hlo_path:
+    args.mlir_hlo_path = os.path.join(args.repo, "third_party", "mlir-hlo")
 
   return args
 
@@ -117,25 +134,17 @@
   print(f"  LLVM Path: {args.llvm_path}")
   print(f"  LLVM Bazel Path: {args.llvm_bazel_path}")
   print(f"  TensorFlow Path: {args.tensorflow_path}")
+  print(f"  MLIR-HLO Path: {args.tensorflow_path}")
 
   current_llvm_commit = parse_rev(args.llvm_path, "HEAD")
   current_llvm_bazel_commit = parse_rev(args.llvm_bazel_path, "HEAD")
   current_tf_commit = parse_rev(args.tensorflow_path, "HEAD")
+  current_mlir_hlo_commit = parse_rev(args.mlir_hlo_path, "HEAD")
   print("Current Commits:")
   print(f"  llvm = {current_llvm_commit}")
   print(f"  llvm_bazel = {current_llvm_bazel_commit}")
   print(f"  tensorflow = {current_tf_commit}")
-
-  # Update TensorFlow
-  new_tf_commit = find_new_tf_commit(args.tensorflow_path, current_llvm_commit,
-                                     args.tensorflow_rev)
-  print("\n*** Updating TensorFlow to", new_tf_commit, "***")
-  utils.execute(["git", "checkout", new_tf_commit], cwd=args.tensorflow_path)
-  stage_path(args.repo, args.tensorflow_path)
-
-  validate_tf_commit(current_llvm_commit,
-                     args.tensorflow_path,
-                     exit_on_failure=args.validate)
+  print(f"  mlir-hlo = {current_mlir_hlo_commit}")
 
   # Update LLVM-Bazel
   new_llvm_bazel_commit = find_new_llvm_bazel_commit(args.llvm_bazel_path,
@@ -150,6 +159,32 @@
                              args.llvm_bazel_path,
                              exit_on_failure=args.validate)
 
+  # Update TensorFlow
+  new_tf_commit = find_new_commit_from_version_file(args.tensorflow_path,
+                                                    TF_WORKSPACE_FILEPATH,
+                                                    current_llvm_commit,
+                                                    args.tensorflow_rev)
+  print("\n*** Updating TensorFlow to", new_tf_commit, "***")
+  utils.execute(["git", "checkout", new_tf_commit], cwd=args.tensorflow_path)
+  stage_path(args.repo, args.tensorflow_path)
+
+  validate_tf_commit(current_llvm_commit,
+                     args.tensorflow_path,
+                     exit_on_failure=args.validate)
+
+  # Update MLIR-HLO
+  new_mlir_hlo_commit = find_new_commit_from_version_file(
+      args.mlir_hlo_path, MLIR_HLO_LLVM_VERSION_FILEPATH, current_llvm_commit,
+      args.mlir_hlo_rev)
+  print("\n*** Updating MLIR-HLO to", new_mlir_hlo_commit, "***")
+  utils.execute(["git", "checkout", new_mlir_hlo_commit],
+                cwd=args.mlir_hlo_path)
+  stage_path(args.repo, args.mlir_hlo_path)
+
+  validate_mlir_hlo_commit(current_llvm_commit,
+                           args.mlir_hlo_path,
+                           exit_on_failure=args.validate)
+
   # Export SUBMODULE_VERSIONS.
   print()  # Add line break.
   submodule_versions.export_versions(args.repo)
@@ -201,24 +236,17 @@
       cwd=llvm_bazel_path).stdout.split()[0].lstrip("+-")
 
 
-def find_new_tf_commit(tensorflow_path, llvm_commit, tf_rev):
-  utils.execute(["git", "fetch"], cwd=tensorflow_path)
+def find_llvm_commit_changes_to_file(repo_path,
+                                     filepath,
+                                     llvm_commit,
+                                     branch="origin/master"):
+  """Finds commits where the occurrence of the given LLVM commit hash changes.
 
-  if tf_rev not in COMMIT_OPTIONS:
-    return parse_rev(tensorflow_path, tf_rev)
-
-  if tf_rev == KEEP_COMMIT_OPTION:
-    return parse_rev(tensorflow_path, "HEAD")
-
-  if tf_rev == REMOTE_HEAD_COMMIT_OPTION:
-    return parse_rev(tensorflow_path, "origin/master")
-
-  # Find commits where the number of occurrences of the given LLVM commit hash
-  # changes. In normal cases, there should be at most two commits that match
-  # this:
-  # 1. The commit that first introduced the new hash in the TF workspace file.
-  # 2. The commit that changed it to a new hash afterwards.
-  tf_integrate_commit_options = utils.execute(
+  In normal cases, there should be at most two commits that match this:
+    1. The commit that first introduced the new hash in the file.
+    2. The commit that changed it to a new hash afterwards.
+  """
+  commits = utils.execute(
       [
           "git",
           "log",
@@ -232,34 +260,57 @@
           # https://git-scm.com/docs/git-log#Documentation/git-log.txt--Sltstringgt
           "-S",
           llvm_commit,
-          # Search along the master branch
-          "origin/master",
-          # Only look in the TF workspace file where the llvm_commit is recorded
+          # Search along the appropriate branch
+          branch,
+          # Only look in the specified file where the llvm_commit is recorded
           "--",
-          "tensorflow/workspace.bzl"
+          filepath,
       ],
       capture_output=True,
-      cwd=tensorflow_path).stdout.split()
-  if len(tf_integrate_commit_options) > 2:
+      cwd=repo_path).stdout.split()
+
+  if len(commits) > 2:
     raise RuntimeError(
-        f"Expected one or two TF commits to involve LLVM commit {llvm_commit},"
-        f" but got {len(tf_integrate_commit_options)}")
+        f"Expected one or two commits in {repo_path} {branch} {filepath} to "
+        f" involve LLVM commit {llvm_commit}, but got {len(commits)}")
 
-  if not tf_integrate_commit_options:
+  if not commits:
     raise RuntimeError(
-        f"TF does not have any references to LLVM commit {llvm_commit}."
-        " Maybe TF export is behind?")
+        f"{repo_path} {branch} {filepath} does not have any references to LLVM"
+        f" commit {llvm_commit}. Maybe  export is behind?")
 
-  if tf_rev == INTEGRATE_COMMIT_OPTION:
-    return tf_integrate_commit_options[-1]
+  return commits
 
-  assert tf_rev == LATEST_MATCHING_COMMIT_OPTION
-  if len(tf_integrate_commit_options) == 1:
+
+def find_new_commit_from_version_file(repo_path,
+                                      version_filepath,
+                                      llvm_commit,
+                                      rev,
+                                      branch="origin/master"):
+  utils.execute(["git", "fetch"], cwd=repo_path)
+
+  if rev not in COMMIT_OPTIONS:
+    return parse_rev(repo_path, rev)
+
+  if rev == KEEP_COMMIT_OPTION:
+    return parse_rev(repo_path, "HEAD")
+
+  if rev == REMOTE_HEAD_COMMIT_OPTION:
+    return parse_rev(repo_path, branch)
+
+  commit_options = find_llvm_commit_changes_to_file(repo_path, version_filepath,
+                                                    llvm_commit)
+
+  if rev == INTEGRATE_COMMIT_OPTION:
+    return commit_options[-1]
+
+  assert rev == LATEST_MATCHING_COMMIT_OPTION
+  if len(commit_options) == 1:
     # There hasn't been a subsequent integrate, use remote head.
-    return parse_rev(tensorflow_path, "origin/master")
+    return parse_rev(repo_path, branch)
 
   # Use the commit one before the one that changed away from this LLVM version.
-  return parse_rev(tensorflow_path, f"{tf_integrate_commit_options[0]}^")
+  return parse_rev(repo_path, f"{commit_options[0]}^")
 
 
 def validate_tf_commit(llvm_commit, tensorflow_path, exit_on_failure=True):
@@ -277,20 +328,37 @@
   # TensorFlow keeps its commit in workspace.bzl on a line like:
   # LLVM_COMMIT = "..."
   # Yeah. This is how we do it.
-  workspace_path = os.path.join(tensorflow_path, "tensorflow", "workspace.bzl")
-  pattern_text = r"""\s*LLVM_COMMIT\s*=\s*"(.+)"\s*"""
-  pattern = re.compile(pattern_text, flags=re.MULTILINE)
+  workspace_path = os.path.join(tensorflow_path, TF_WORKSPACE_FILEPATH)
+
   for line in open(workspace_path, "r", encoding="UTF-8"):
-    m = re.match(pattern, line)
+    m = re.match(TF_WORKSPACE_LLVM_COMMIT_REGEXP, line)
     if m:
       return m.group(1)
 
   print(f"ERROR: Could not find LLVM commit in {workspace_path}.")
   print("Please file a bug)")
-  print("Expected pattern match for:", pattern_text)
+  print("Expected pattern match for:", TF_WORKSPACE_LLVM_COMMIT_REGEXP.pattern)
   sys.exit(1)
 
 
+def validate_mlir_hlo_commit(llvm_commit, mlir_hlo_path, exit_on_failure=True):
+  mlir_hlo_llvm_commit = find_mlir_hlo_llvm_commit(mlir_hlo_path)
+
+  matches = mlir_hlo_llvm_commit == llvm_commit
+  if not matches:
+    print("WARNING: LLVM commit in mlir-hlo does not match that in IREE"
+          f" ({mlir_hlo_llvm_commit} vs {llvm_commit})")
+    if exit_on_failure:
+      sys.exit(1)
+
+
+def find_mlir_hlo_llvm_commit(mlir_hlo_path):
+  llvm_version_file_path = os.path.join(mlir_hlo_path,
+                                        MLIR_HLO_LLVM_VERSION_FILEPATH)
+  with open(llvm_version_file_path, "r", encoding="UTF-8") as f:
+    return f.read().strip()
+
+
 def stage_path(repo_path, to_stage):
   # TODO(laurenzo): Move to utils.py.
   utils.execute(["git", "add", to_stage], cwd=repo_path)