Initial commit of package based CI and regression testing. (#14793)
This patch lays a lot of new track:
* New CI jobs that are rooted in building deployable packages and having
those be used by subsequent jobs (for testing).
* New variant of the building scripts that will eventually subsume the
old once we rebase package building on top of this workflow (like it is
done for downstreams).
* A new `regression_test` project (currently under experimental) that
has the seeds for being able to handle "big" tests and a wider variety
of combinations than we presently have in other places like `e2e`.
* An initial set of tests targeting both CPU and AMD/Vulkan for llama2
7b f16/i4 to commemorate the current work the team has been doing. The
tests are not yet super inspired, just verifying that it compiles and
does in fact run, but I will expand them in a followup once the CI can
guide me.
* A regression testing job for CPU. Will add one for AMD GPU shortly
once I finish setting up the runner.
The regression_test project should be suitable for the development
workflow too, but it needs a bit more turns and mileage on it. Consider
this a WIP that I'll be holding carefully for some days to get it ready
for general use.
diff --git a/build_tools/pkgci/README.md b/build_tools/pkgci/README.md
new file mode 100644
index 0000000..8121ba2
--- /dev/null
+++ b/build_tools/pkgci/README.md
@@ -0,0 +1,12 @@
+# PkgCI Scripts
+
+This directory contains scripts and configuration for the "new" CI, which
+is based on building packages and then flowing those to followon jobs.
+
+The traditional CI attempted to do all steps as various kinds of source
+builds at head vs a split package/test style of workflow. It can mostly
+be found in the `cmake` directory but is also scattered around.
+
+This directory generally corresponds to "pkgci_" prefixed workflows. Over
+time, as this CI flow takes over more of the CI pipeline, the traditional
+CI will be reduced to outlier jobs and policy checks.
diff --git a/build_tools/pkgci/build_linux_packages.sh b/build_tools/pkgci/build_linux_packages.sh
new file mode 100755
index 0000000..b82fefe
--- /dev/null
+++ b/build_tools/pkgci/build_linux_packages.sh
@@ -0,0 +1,255 @@
+#!/bin/bash
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# build_linux_packages.sh
+# One stop build of IREE Python packages for Linux. The Linux build is
+# complicated because it has to be done via a docker container that has
+# an LTS glibc version, all Python packages and other deps.
+# This script handles all of those details.
+#
+# Usage:
+# Build everything (all packages, all python versions):
+# ./build_tools/python_deploy/build_linux_packages.sh
+#
+# Build specific Python versions and packages to custom directory:
+# override_python_versions="cp38-cp38 cp39-cp39" \
+# packages="iree-runtime" \
+# output_dir="/tmp/wheelhouse" \
+# ./build_tools/python_deploy/build_linux_packages.sh
+#
+# Valid Python versions match a subdirectory under /opt/python in the docker
+# image. Typically:
+# cp38-cp38 cp39-cp39 cp310-cp310
+#
+# Valid packages:
+# iree-runtime
+# iree-compiler
+#
+# Note that this script is meant to be run on CI and it will pollute both the
+# output directory and in-tree build/ directories (under runtime/ and
+# compiler/) with docker created, root owned builds. Sorry - there is
+# no good way around it.
+#
+# It can be run on a workstation but recommend using a git worktree dedicated
+# to packaging to avoid stomping on development artifacts.
+set -eu -o errtrace
+
+# Function to find the directory the ".git" directory is in.
+# We do this instead of using git directly because `git` may complain about
+# operating in a directory owned by another user.
+function find_git_dir_parent() {
+ curr_dir="${PWD}"
+
+ # Loop until we reach the root directory
+ while [ "${curr_dir}" != "/" ]; do
+ # Check if there is a ".git" directory in the current directory
+ if [ -d "${curr_dir}/.git" ]; then
+ # Return the path to the directory containing the ".git" directory
+ echo "${curr_dir}"
+ return
+ fi
+
+ # Move up one directory
+ curr_dir="$(dirname "${curr_dir}")"
+ done
+
+ # If we reach the root directory and there is no ".git" directory, return an empty string
+ echo ""
+}
+
+this_dir="$(cd $(dirname $0) && pwd)"
+script_name="$(basename $0)"
+repo_root=$(cd "${this_dir}" && find_git_dir_parent)
+manylinux_docker_image="${manylinux_docker_image:-$(uname -m | awk '{print ($1 == "aarch64") ? "quay.io/pypa/manylinux_2_28_aarch64" : "ghcr.io/nod-ai/manylinux_x86_64:main" }')}"
+python_versions="${override_python_versions:-cp311-cp311}"
+output_dir="${output_dir:-${this_dir}/wheelhouse}"
+cache_dir="${cache_dir:-}"
+packages="${packages:-iree-runtime iree-compiler}"
+package_suffix="${package_suffix:-}"
+toolchain_suffix="${toolchain_suffix:-release}"
+
+function run_on_host() {
+ echo "Running on host"
+ echo "Launching docker image ${manylinux_docker_image}"
+
+ # Canonicalize paths.
+ mkdir -p "${output_dir}"
+ output_dir="$(cd "${output_dir}" && pwd)"
+ echo "Outputting to ${output_dir}"
+ extra_args=""
+ if ! [ -z "$cache_dir" ]; then
+ echo "Setting up host cache dir ${cache_dir}"
+ mkdir -p "${cache_dir}/ccache"
+ mkdir -p "${cache_dir}/pip"
+ extra_args="${extra_args} -v ${cache_dir}:${cache_dir} -e cache_dir=${cache_dir}"
+ fi
+ docker run --rm \
+ -v "${repo_root}:${repo_root}" \
+ -v "${output_dir}:${output_dir}" \
+ -e __MANYLINUX_BUILD_WHEELS_IN_DOCKER=1 \
+ -e "override_python_versions=${python_versions}" \
+ -e "packages=${packages}" \
+ -e "package_suffix=${package_suffix}" \
+ -e "output_dir=${output_dir}" \
+ -e "toolchain_suffix=${toolchain_suffix}" \
+ ${extra_args} \
+ "${manylinux_docker_image}" \
+ -- "${this_dir}/${script_name}"
+}
+
+function run_in_docker() {
+ echo "Running in docker"
+ echo "Marking git safe.directory"
+ git config --global --add safe.directory '*'
+
+ echo "Using python versions: ${python_versions}"
+ local orig_path="${PATH}"
+
+ # Configure toolchain.
+ export CMAKE_TOOLCHAIN_FILE="${this_dir}/linux_toolchain_${toolchain_suffix}.cmake"
+ echo "Using CMake toolchain ${CMAKE_TOOLCHAIN_FILE}"
+ if ! [ -f "$CMAKE_TOOLCHAIN_FILE" ]; then
+ echo "CMake toolchain not found (wrong toolchain_suffix?)"
+ exit 1
+ fi
+
+ # Configure caching.
+ if [ -z "$cache_dir" ]; then
+ echo "Cache directory not configured. No caching will take place."
+ else
+ mkdir -p "${cache_dir}"
+ cache_dir="$(cd ${cache_dir} && pwd)"
+ echo "Caching build artifacts to ${cache_dir}"
+ export CCACHE_DIR="${cache_dir}/ccache"
+ export CCACHE_MAXSIZE="2G"
+ export CMAKE_C_COMPILER_LAUNCHER=ccache
+ export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+ # Configure pip cache dir.
+ # We make it two levels down from within the container because pip likes
+ # to know that it is owned by the current user.
+ export PIP_CACHE_DIR="${cache_dir}/pip/in/container"
+ mkdir -p "${PIP_CACHE_DIR}"
+ chown -R "$(whoami)" "${cache_dir}/pip"
+ fi
+
+ # Build phase.
+ set -o xtrace
+ install_native_deps
+ for package in ${packages}; do
+ echo "******************** BUILDING PACKAGE ${package} ********************"
+ for python_version in ${python_versions}; do
+ python_dir="/opt/python/${python_version}"
+ if ! [ -x "${python_dir}/bin/python" ]; then
+ echo "ERROR: Could not find python: ${python_dir} (skipping)"
+ continue
+ fi
+ export PATH="${python_dir}/bin:${orig_path}"
+ echo ":::: Python version $(python --version)"
+ prepare_python
+ # replace dashes with underscores
+ package_suffix="${package_suffix//-/_}"
+ case "${package}" in
+ iree-runtime)
+ clean_wheels "iree_runtime${package_suffix}" "${python_version}"
+ build_iree_runtime
+ run_audit_wheel "iree_runtime${package_suffix}" "${python_version}"
+ ;;
+ iree-compiler)
+ clean_wheels "iree_compiler${package_suffix}" "${python_version}"
+ build_iree_compiler
+ run_audit_wheel "iree_compiler${package_suffix}" "${python_version}"
+ ;;
+ *)
+ echo "Unrecognized package '${package}'"
+ exit 1
+ ;;
+ esac
+ done
+ done
+
+ set +o xtrace
+ echo "******************** BUILD COMPLETE ********************"
+ echo "Generated binaries:"
+ ls -l "${output_dir}"
+ if ! [ -z "$cache_dir" ]; then
+ echo "ccache stats:"
+ ccache --show-stats
+ fi
+}
+
+function build_wheel() {
+ python -m pip wheel --disable-pip-version-check -v -w "${output_dir}" "${repo_root}/$@"
+}
+
+function build_iree_runtime() {
+ # We install the needed build deps below for the tools.
+ IREE_RUNTIME_BUILD_TRACY=ON IREE_RUNTIME_BUILD_TRACY_TOOLS=ON \
+ build_wheel runtime/
+}
+
+function build_iree_compiler() {
+ build_wheel compiler/
+}
+
+function run_audit_wheel() {
+ local wheel_basename="$1"
+ local python_version="$2"
+ # Force wildcard expansion here
+ generic_wheel="$(echo "${output_dir}/${wheel_basename}-"*"-${python_version}-linux_$(uname -m).whl")"
+ ls "${generic_wheel}"
+ echo ":::: Auditwheel ${generic_wheel}"
+ auditwheel repair -w "${output_dir}" "${generic_wheel}"
+ rm -v "${generic_wheel}"
+}
+
+function clean_wheels() {
+ local wheel_basename="$1"
+ local python_version="$2"
+ echo ":::: Clean wheels ${wheel_basename} ${python_version}"
+ rm -f -v "${output_dir}/${wheel_basename}-"*"-${python_version}-"*".whl"
+}
+
+function prepare_python() {
+ # The 0.17 series of patchelf can randomly corrupt executables. Fixes
+ # have landed but not yet been released. Consider removing this pin
+ # once 0.19 is released. We just override the system version with
+ # a pip side load.
+ pip install patchelf==0.16.1.0
+ hash -r
+ echo "patchelf version: $(patchelf --version) (0.17 is bad: https://github.com/NixOS/patchelf/issues/446)"
+}
+
+function install_native_deps() {
+ echo ":::: Install Native Deps"
+
+ # Get the output of uname -m
+ uname_m=$(uname -m)
+
+ # Check if the output is aarch64
+
+ if [[ "$uname_m" == "aarch64" ]]; then
+ echo "The architecture is aarch64 and we use manylinux 2_28 so install deps"
+ yum install -y epel-release
+ yum update -y
+ # Required for Tracy
+ yum install -y capstone-devel tbb-devel libzstd-devel
+ elif [[ "$uname_m" == "x86_64" ]]; then
+ # Check if the output is x86_64
+ echo "Running on an architecture which has deps in docker image."
+ else
+ echo "The architecture is unknown. Exiting"
+ exit 1
+ fi
+}
+
+
+# Trampoline to the docker container if running on the host.
+if [ -z "${__MANYLINUX_BUILD_WHEELS_IN_DOCKER-}" ]; then
+ run_on_host "$@"
+else
+ run_in_docker "$@"
+fi
diff --git a/build_tools/pkgci/linux_toolchain_release.cmake b/build_tools/pkgci/linux_toolchain_release.cmake
new file mode 100644
index 0000000..0678623
--- /dev/null
+++ b/build_tools/pkgci/linux_toolchain_release.cmake
@@ -0,0 +1,34 @@
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# The CI builds with this custom toolchain on Linux by
+# setting the variable CMAKE_TOOLCHAIN_FILE.
+# It does several things:
+# * Enables thin archives to make debug symbol builds more efficient.
+# * Hardcodes to link with LLD and GDB indexes.
+# * Enables split dwarf debug builds.
+# * Hardcodes to build with clang.
+#
+# We have some other jobs which verify/build with different compiler
+# options, but those are handled with a one-off.
+
+message(STATUS "Enabling IREE Release toolchain")
+set(CMAKE_C_COMPILER "clang")
+set(CMAKE_CXX_COMPILER "clang++")
+
+set(CMAKE_C_ARCHIVE_APPEND "<CMAKE_AR> qT <TARGET> <LINK_FLAGS> <OBJECTS>")
+set(CMAKE_CXX_ARCHIVE_APPEND "<CMAKE_AR> qT <TARGET> <LINK_FLAGS> <OBJECTS>")
+set(CMAKE_C_ARCHIVE_CREATE "<CMAKE_AR> crT <TARGET> <LINK_FLAGS> <OBJECTS>")
+set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> crT <TARGET> <LINK_FLAGS> <OBJECTS>")
+
+set(CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld -Wl,--gdb-index")
+set(CMAKE_MODULE_LINKER_FLAGS_INIT "-fuse-ld=lld -Wl,--gdb-index")
+set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld -Wl,--gdb-index")
+
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -gsplit-dwarf -ggnu-pubnames")
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -gsplit-dwarf -ggnu-pubnames")
+set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -gsplit-dwarf -ggnu-pubnames")
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -gsplit-dwarf -ggnu-pubnames")
diff --git a/build_tools/pkgci/linux_toolchain_release_asserts.cmake b/build_tools/pkgci/linux_toolchain_release_asserts.cmake
new file mode 100644
index 0000000..4179aa4
--- /dev/null
+++ b/build_tools/pkgci/linux_toolchain_release_asserts.cmake
@@ -0,0 +1,19 @@
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+message(STATUS "Enabling release assertions")
+include(${CMAKE_CURRENT_LIST_DIR}/linux_toolchain_release.cmake)
+
+string(REPLACE "-DNDEBUG" "" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
+string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
+string(REPLACE "-DNDEBUG" "" CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO}")
+string(REPLACE "-DNDEBUG" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
+
+message(STATUS "Release C/CXX flags:")
+message(STATUS " CMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}")
+message(STATUS " CMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}")
+message(STATUS " CMAKE_C_FLAGS_RELWITHDEBINFO=${CMAKE_C_FLAGS_RELWITHDEBINFO}")
+message(STATUS " CMAKE_CXX_FLAGS_RELWITHDEBINFO=${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
diff --git a/build_tools/pkgci/setup_venv.py b/build_tools/pkgci/setup_venv.py
new file mode 100755
index 0000000..193800a
--- /dev/null
+++ b/build_tools/pkgci/setup_venv.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""Sets up a Python venv with compiler/runtime from a workflow run.
+
+There are two modes in which to use this script:
+
+* Within a workflow, an artifact action will typically be used to fetch
+ relevant package artifacts. Specify the fetch location with
+ `--artifact-path=`.
+
+* Locally, the `--fetch-gh-workflow=WORKFLOW_ID` can be used instead in order
+ to download and setup the venv in one step.
+
+You must have the `gh` command line tool installed and authenticated if you
+will be fetching artifacts.
+"""
+
+from typing import Optional, Dict, Tuple
+
+import argparse
+import functools
+from glob import glob
+import json
+import os
+import sys
+from pathlib import Path
+import platform
+import subprocess
+import sys
+import tempfile
+import zipfile
+
+
+@functools.lru_cache
+def list_gh_artifacts(run_id: str) -> Dict[str, str]:
+ print(f"Fetching artifacts for workflow run {run_id}")
+ base_path = f"/repos/openxla/iree"
+ output = subprocess.check_output(
+ [
+ "gh",
+ "api",
+ "-H",
+ "Accept: application/vnd.github+json",
+ "-H",
+ "X-GitHub-Api-Version: 2022-11-28",
+ f"{base_path}/actions/runs/{run_id}/artifacts",
+ ]
+ )
+ data = json.loads(output)
+ # Uncomment to debug:
+ # print(json.dumps(data, indent=2))
+ artifacts = {
+ rec["name"]: f"{base_path}/actions/artifacts/{rec['id']}/zip"
+ for rec in data["artifacts"]
+ }
+ print("Found artifacts:")
+ for k, v in artifacts.items():
+ print(f" {k}: {v}")
+ return artifacts
+
+
+def fetch_gh_artifact(api_path: str, file: Path):
+ print(f"Downloading artifact {api_path}")
+ contents = subprocess.check_output(
+ [
+ "gh",
+ "api",
+ "-H",
+ "Accept: application/vnd.github+json",
+ "-H",
+ "X-GitHub-Api-Version: 2022-11-28",
+ api_path,
+ ]
+ )
+ file.write_bytes(contents)
+
+
+def find_venv_python(venv_path: Path) -> Optional[Path]:
+ paths = [venv_path / "bin" / "python", venv_path / "Scripts" / "python.exe"]
+ for p in paths:
+ if p.exists():
+ return p
+ return None
+
+
+def parse_arguments(argv=None):
+ parser = argparse.ArgumentParser(description="Setup venv")
+ parser.add_argument("--artifact-path", help="Path in which to find/fetch artifacts")
+ parser.add_argument(
+ "--fetch-gh-workflow", help="Fetch artifacts from a GitHub workflow"
+ )
+ parser.add_argument(
+ "--compiler-variant",
+ default="",
+ help="Package variant to install for the compiler ('', 'asserts')",
+ )
+ parser.add_argument(
+ "--runtime-variant",
+ default="",
+ help="Package variant to install for the runtime ('', 'asserts')",
+ )
+ parser.add_argument(
+ "venv_dir", type=Path, help="Directory in which to create the venv"
+ )
+ args = parser.parse_args(argv)
+ return args
+
+
+def main(args):
+ # Make sure we have an artifact path if fetching.
+ if not args.artifact_path and args.fetch_gh_workflow:
+ with tempfile.TemporaryDirectory() as td:
+ args.artifact_path = td
+ return main(args)
+
+ # Find the regression suite project.
+ rs_dir = (
+ (Path(__file__).resolve().parent.parent.parent)
+ / "experimental"
+ / "regression_suite"
+ )
+ if not rs_dir.exists():
+ print(f"Could not find regression_suite project: {rs_dir}")
+ return 1
+
+ artifact_prefix = f"{platform.system().lower()}_{platform.machine()}"
+ wheels = []
+ for package_stem, variant in [
+ ("iree-compiler", args.compiler_variant),
+ ("iree-runtime", args.runtime_variant),
+ ]:
+ wheels.append(
+ find_wheel_for_variants(args, artifact_prefix, package_stem, variant)
+ )
+ print("Installing wheels:", wheels)
+
+ # Set up venv.
+ venv_path = args.venv_dir
+ python_exe = find_venv_python(venv_path)
+
+ if not python_exe:
+ print(f"Creating venv at {str(venv_path)}")
+ subprocess.check_call([sys.executable, "-m", "venv", str(venv_path)])
+ python_exe = find_venv_python(venv_path)
+ if not python_exe:
+ raise RuntimeError("Error creating venv")
+
+ # Install each of the built wheels without deps or consulting an index.
+ # This is because we absolutely don't want this falling back to anything
+ # but what we said.
+ for artifact_path, package_name in wheels:
+ cmd = [
+ str(python_exe),
+ "-m",
+ "pip",
+ "install",
+ "--no-deps",
+ "--no-index",
+ "-f",
+ str(artifact_path),
+ "--force-reinstall",
+ package_name,
+ ]
+ print(f"Running command: {' '.join([str(c) for c in cmd])}")
+ subprocess.check_call(cmd)
+
+ # Now install the regression suite project, which will bring in any
+ # deps.
+ cmd = [
+ str(python_exe),
+ "-m",
+ "pip",
+ "install",
+ "--force-reinstall",
+ "-e",
+ str(rs_dir) + os.sep,
+ ]
+ print(f"Running command: {' '.join(cmd)}")
+ subprocess.check_call(cmd)
+
+ return 0
+
+
+def find_wheel_for_variants(
+ args, artifact_prefix: str, package_stem: str, variant: str
+) -> Tuple[Path, str]:
+ artifact_path = Path(args.artifact_path)
+ package_suffix = "" if variant == "" else f"-{variant}"
+ package_name = f"{package_stem}{package_suffix}"
+
+ def has_package():
+ norm_package_name = package_name.replace("-", "_")
+ pattern = str(artifact_path / f"{norm_package_name}-*.whl")
+ files = glob(pattern)
+ return bool(files)
+
+ if has_package():
+ return (artifact_path, package_name)
+
+ if not args.fetch_gh_workflow:
+ raise RuntimeError(
+ f"Could not find package {package_name} to install from {artifact_path}"
+ )
+
+ # Fetch.
+ artifact_path.mkdir(parents=True, exist_ok=True)
+ artifact_suffix = "" if variant == "" else f"_{variant}"
+ artifact_name = f"{artifact_prefix}_release{artifact_suffix}_packages"
+ artifact_file = artifact_path / f"{artifact_name}.zip"
+ if not artifact_file.exists():
+ print(f"Package {package_name} not found. Fetching from {artifact_name}...")
+ artifacts = list_gh_artifacts(args.fetch_gh_workflow)
+ if artifact_name not in artifacts:
+ raise RuntimeError(
+ f"Could not find required artifact {artifact_name} in run {args.fetch_gh_workflow}"
+ )
+ fetch_gh_artifact(artifacts[artifact_name], artifact_file)
+ print(f"Extracting {artifact_file}")
+ with zipfile.ZipFile(artifact_file) as zip_ref:
+ zip_ref.extractall(artifact_path)
+
+ # Try again.
+ if not has_package():
+ raise RuntimeError(f"Could not find {package_name} in {artifact_path}")
+ return (artifact_path, package_name)
+
+
+if __name__ == "__main__":
+ sys.exit(main(parse_arguments()))