.github/workflows/pkgci_regression_test.yml - 3p/openxla/iree - Git at Google

 # Copyright 2023 The IREE Authors
 #
 # Licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 name: PkgCI Regression Test
 on:
   workflow_call:
     inputs:
       artifact_run_id:
         type: string
         default: ""
   workflow_dispatch:
     inputs:
       artifact_run_id:
         type: string
         default: ""

 jobs:
   test_models:
     name: "test_models :: ${{ matrix.name }}"
     runs-on: ${{ matrix.runs-on }}
     strategy:
       fail-fast: false

       # Note: these jobs should use persistent runners with local caches.
       # Downloading test files (50GB+) without a cache can take 20+ minutes.
       matrix:
         include:
           # CPU
           - name: cpu_llvm_task
             models-config-file: models_cpu_llvm_task.json
             runs-on:
               - self-hosted # must come first
               - persistent-cache
               - Linux
               - X64

           # AMD GPU
           - name: amdgpu_rocm_mi250_gfx90a
             models-config-file: models_gpu_rocm_gfx90a.json
             runs-on: nodai-amdgpu-mi250-x86-64
           - name: amdgpu_rocm_mi300_gfx942
             models-config-file: models_gpu_rocm_gfx942.json
             runs-on: nodai-amdgpu-mi300-x86-64
           - name: amdgpu_vulkan
             models-config-file: models_gpu_vulkan.json
             runs-on: nodai-amdgpu-w7900-x86-64

           # NVIDIA GPU
           # None at the moment. Could maybe use the persistent a100 runners:
           #   - self-hosted # must come first
           #   - runner-group=${{ needs.setup.outputs.runner-group }}
           #   - environment=${{ needs.setup.outputs.runner-env }}
           #   - a100
           #   - os-family=Linux
           # (note: would need to plumb the presubmit/postsubmit runner-group through to here too)
     env:
       PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
       IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite
       MODELS_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.models-config-file }}
       VENV_DIR: ${{ github.workspace }}/venv
     steps:
       - name: Checking out IREE repository
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
         with:
           submodules: false
       - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.1.0
         with:
           # Must match the subset of versions built in pkgci_build_packages.
           python-version: "3.11"
       - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
           name: linux_x86_64_release_packages
           path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
       - name: Setup venv
         run: |
           ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
             --artifact-path=${PACKAGE_DOWNLOAD_DIR} \
             --fetch-gh-workflow=${{ inputs.artifact_run_id }}

       # Out of tree tests
       - name: Check out external TestSuite repository
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
         with:
           repository: nod-ai/SHARK-TestSuite
           ref: f5615ab29da491c0047146258dfa3a0c40c735e5
           path: SHARK-TestSuite
           submodules: false
           lfs: true
       - name: Install external TestSuite Python requirements
         run: |
           source ${VENV_DIR}/bin/activate
           python3 -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt
           pip install --no-compile --pre --upgrade -e SHARK-TestSuite/common_tools
       - name: Download remote files for real weight model tests
         run: |
           source ${VENV_DIR}/bin/activate
           python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir iree_tests/pytorch/models
           python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir iree_tests/sharktank

       - name: Run external tests - models with real weights
         if: "matrix.models-config-file != '' && !cancelled()"
         run: |
           source ${VENV_DIR}/bin/activate
           pytest \
             SHARK-TestSuite/iree_tests/pytorch/models \
             SHARK-TestSuite/iree_tests/sharktank \
             -rA \
             -k real_weights \
             --no-skip-tests-missing-files \
             --capture=no \
             --log-cli-level=info \
             --timeout=240 \
             --durations=0 \
             --config-files=${MODELS_CONFIG_FILE_PATH}

   test_regression_suite:
     name: "test_regression_suite :: ${{ matrix.name }}"
     runs-on: ${{ matrix.runs-on }}
     strategy:
       fail-fast: false

       # Note: these jobs should use persistent runners with local caches.
       # Downloading test files (50GB+) without a cache can take 20+ minutes.
       matrix:
         include:
           # CPU
           - name: cpu_llvm_task
             models-config-file: models_cpu_llvm_task.json
             backend: cpu
             runs-on:
               - self-hosted # must come first
               - persistent-cache
               - Linux
               - X64

           # AMD GPU
           - name: amdgpu_rocm_mi250_gfx90a
             rocm-chip: gfx90a
             backend: rocm
             runs-on: nodai-amdgpu-mi250-x86-64
           - name: amdgpu_rocm_mi300_gfx942
             rocm-chip: gfx942
             backend: rocm
             runs-on: nodai-amdgpu-mi300-x86-64
     env:
       PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
       IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite
       VENV_DIR: ${{ github.workspace }}/venv
       TEST_OUTPUT_ARTIFACTS: ${{ github.workspace }}/model_output_artifacts
     steps:
       - name: Checking out IREE repository
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
         with:
           submodules: false
       - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.1.0
         with:
           # Must match the subset of versions built in pkgci_build_packages.
           python-version: "3.11"
       - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
           name: linux_x86_64_release_packages
           path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
       - name: Setup venv
         run: |
           ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
             --artifact-path=${PACKAGE_DOWNLOAD_DIR} \
             --fetch-gh-workflow=${{ inputs.artifact_run_id }}
           source ${VENV_DIR}/bin/activate
           pip install -e experimental/regression_suite

       # TODO(#17344): regenerate .mlirbc files, test plat_rdna3_rocm on rocm
       # # In-tree tests
       # - name: Run experimental/regression_suite tests
       #   run: |
       #     source ${VENV_DIR}/bin/activate
       #     pytest \
       #       -rA -s -m "plat_host_cpu and presubmit" \
       #       experimental/regression_suite

       - name: "Running SDXL special model tests"
         if: "!cancelled()"
         run: |
           source ${VENV_DIR}/bin/activate
           pytest ./experimental/regression_suite/shark-test-suite-models/sdxl \
             -k ${{ matrix.backend }} \
             -rpfE \
             --capture=no \
             --log-cli-level=info \
             --timeout=240 \
             --durations=0
         env:
           ROCM_CHIP: ${{ matrix.rocm-chip }}

       - name: "Running SD3 special model tests"
         if: "!cancelled()"
         run: |
           source ${VENV_DIR}/bin/activate
           pytest ./experimental/regression_suite/shark-test-suite-models/sd3 \
             -k ${{ matrix.backend }} \
             -rpfE \
             --capture=no \
             --log-cli-level=info \
             --timeout=240 \
             --durations=0
         env:
           ROCM_CHIP: ${{ matrix.rocm-chip }}

       # Note: mi250 benchmark times are more lenient than mi300 (allowing about
       # 10% deviation from observed averages), since the mi250 runners we use
       # are more unstable and we care most about peak performance on mi300.
       - name: "Running SDXL rocm pipeline benchmark (mi250)"
         if: contains(matrix.name, 'rocm_mi250_gfx90a')
         run: |
           source ${VENV_DIR}/bin/activate
           pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \
             --goldentime-rocm-e2e-ms 1616.0 \
             --goldentime-rocm-unet-ms 419.0 \
             --goldentime-rocm-clip-ms 18.5 \
             --goldentime-rocm-vae-ms 337.0 \
             --goldendispatch-rocm-unet 1531 \
             --goldendispatch-rocm-clip 1139 \
             --goldendispatch-rocm-vae 246 \
             --goldensize-rocm-unet-bytes 2280000  \
             --goldensize-rocm-clip-bytes 860000 \
             --goldensize-rocm-vae-bytes 840000 \
             --rocm-chip gfx90a \
             --timeout=240 \
             --log-cli-level=info \
             --retries 7
           echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY
           rm job_summary.md

       - name: "Running SDXL rocm pipeline benchmark (mi300)"
         if: contains(matrix.name, 'rocm_mi300_gfx942')
         run: |
           source ${VENV_DIR}/bin/activate
           pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \
             --goldentime-rocm-e2e-ms 372.0 \
             --goldentime-rocm-unet-ms 95.0 \
             --goldentime-rocm-clip-ms 15.5 \
             --goldentime-rocm-vae-ms 80.0 \
             --goldendispatch-rocm-unet 1531 \
             --goldendispatch-rocm-clip 1139 \
             --goldendispatch-rocm-vae 246 \
             --goldensize-rocm-unet-bytes 2270000 \
             --goldensize-rocm-clip-bytes 860000  \
             --goldensize-rocm-vae-bytes 840000 \
             --goldentime-rocm-punet-int8-fp16-ms 55 \
             --goldendispatch-rocm-punet-int8-fp16 1284 \
             --goldensize-rocm-punet-int8-fp16-bytes 2560000 \
             --goldentime-rocm-punet-int8-fp8-ms 59 \
             --goldendispatch-rocm-punet-int8-fp8 1564 \
             --goldensize-rocm-punet-int8-fp8-bytes 2800000 \
             --rocm-chip gfx942 \
             --log-cli-level=info \
             --timeout=240 \
             --retries 7
           echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY
	# Copyright 2023 The IREE Authors
	#
	# Licensed under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

	name: PkgCI Regression Test
	on:
	workflow_call:
	inputs:
	artifact_run_id:
	type: string
	default: ""
	workflow_dispatch:
	inputs:
	artifact_run_id:
	type: string
	default: ""

	jobs:
	test_models:
	name: "test_models :: ${{ matrix.name }}"
	runs-on: ${{ matrix.runs-on }}
	strategy:
	fail-fast: false

	# Note: these jobs should use persistent runners with local caches.
	# Downloading test files (50GB+) without a cache can take 20+ minutes.
	matrix:
	include:
	# CPU
	- name: cpu_llvm_task
	models-config-file: models_cpu_llvm_task.json
	runs-on:
	- self-hosted # must come first
	- persistent-cache
	- Linux
	- X64

	# AMD GPU
	- name: amdgpu_rocm_mi250_gfx90a
	models-config-file: models_gpu_rocm_gfx90a.json
	runs-on: nodai-amdgpu-mi250-x86-64
	- name: amdgpu_rocm_mi300_gfx942
	models-config-file: models_gpu_rocm_gfx942.json
	runs-on: nodai-amdgpu-mi300-x86-64
	- name: amdgpu_vulkan
	models-config-file: models_gpu_vulkan.json
	runs-on: nodai-amdgpu-w7900-x86-64

	# NVIDIA GPU
	# None at the moment. Could maybe use the persistent a100 runners:
	# - self-hosted # must come first
	# - runner-group=${{ needs.setup.outputs.runner-group }}
	# - environment=${{ needs.setup.outputs.runner-env }}
	# - a100
	# - os-family=Linux
	# (note: would need to plumb the presubmit/postsubmit runner-group through to here too)
	env:
	PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
	IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite
	MODELS_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.models-config-file }}
	VENV_DIR: ${{ github.workspace }}/venv
	steps:
	- name: Checking out IREE repository
	uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
	with:
	submodules: false
	- uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.1.0
	with:
	# Must match the subset of versions built in pkgci_build_packages.
	python-version: "3.11"
	- uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
	with:
	name: linux_x86_64_release_packages
	path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
	- name: Setup venv
	run: \|
	./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
	--artifact-path=${PACKAGE_DOWNLOAD_DIR} \
	--fetch-gh-workflow=${{ inputs.artifact_run_id }}

	# Out of tree tests
	- name: Check out external TestSuite repository
	uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
	with:
	repository: nod-ai/SHARK-TestSuite
	ref: f5615ab29da491c0047146258dfa3a0c40c735e5
	path: SHARK-TestSuite
	submodules: false
	lfs: true
	- name: Install external TestSuite Python requirements
	run: \|
	source ${VENV_DIR}/bin/activate
	python3 -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt
	pip install --no-compile --pre --upgrade -e SHARK-TestSuite/common_tools
	- name: Download remote files for real weight model tests
	run: \|
	source ${VENV_DIR}/bin/activate
	python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir iree_tests/pytorch/models
	python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir iree_tests/sharktank

	- name: Run external tests - models with real weights
	if: "matrix.models-config-file != '' && !cancelled()"
	run: \|
	source ${VENV_DIR}/bin/activate
	pytest \
	SHARK-TestSuite/iree_tests/pytorch/models \
	SHARK-TestSuite/iree_tests/sharktank \
	-rA \
	-k real_weights \
	--no-skip-tests-missing-files \
	--capture=no \
	--log-cli-level=info \
	--timeout=240 \
	--durations=0 \
	--config-files=${MODELS_CONFIG_FILE_PATH}

	test_regression_suite:
	name: "test_regression_suite :: ${{ matrix.name }}"
	runs-on: ${{ matrix.runs-on }}
	strategy:
	fail-fast: false

	# Note: these jobs should use persistent runners with local caches.
	# Downloading test files (50GB+) without a cache can take 20+ minutes.
	matrix:
	include:
	# CPU
	- name: cpu_llvm_task
	models-config-file: models_cpu_llvm_task.json
	backend: cpu
	runs-on:
	- self-hosted # must come first
	- persistent-cache
	- Linux
	- X64

	# AMD GPU
	- name: amdgpu_rocm_mi250_gfx90a
	rocm-chip: gfx90a
	backend: rocm
	runs-on: nodai-amdgpu-mi250-x86-64
	- name: amdgpu_rocm_mi300_gfx942
	rocm-chip: gfx942
	backend: rocm
	runs-on: nodai-amdgpu-mi300-x86-64
	env:
	PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
	IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite
	VENV_DIR: ${{ github.workspace }}/venv
	TEST_OUTPUT_ARTIFACTS: ${{ github.workspace }}/model_output_artifacts
	steps:
	- name: Checking out IREE repository
	uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
	with:
	submodules: false
	- uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.1.0
	with:
	# Must match the subset of versions built in pkgci_build_packages.
	python-version: "3.11"
	- uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
	with:
	name: linux_x86_64_release_packages
	path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
	- name: Setup venv
	run: \|
	./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
	--artifact-path=${PACKAGE_DOWNLOAD_DIR} \
	--fetch-gh-workflow=${{ inputs.artifact_run_id }}
	source ${VENV_DIR}/bin/activate
	pip install -e experimental/regression_suite

	# TODO(#17344): regenerate .mlirbc files, test plat_rdna3_rocm on rocm
	# # In-tree tests
	# - name: Run experimental/regression_suite tests
	# run: \|
	# source ${VENV_DIR}/bin/activate
	# pytest \
	# -rA -s -m "plat_host_cpu and presubmit" \
	# experimental/regression_suite

	- name: "Running SDXL special model tests"
	if: "!cancelled()"
	run: \|
	source ${VENV_DIR}/bin/activate
	pytest ./experimental/regression_suite/shark-test-suite-models/sdxl \
	-k ${{ matrix.backend }} \
	-rpfE \
	--capture=no \
	--log-cli-level=info \
	--timeout=240 \
	--durations=0
	env:
	ROCM_CHIP: ${{ matrix.rocm-chip }}

	- name: "Running SD3 special model tests"
	if: "!cancelled()"
	run: \|
	source ${VENV_DIR}/bin/activate
	pytest ./experimental/regression_suite/shark-test-suite-models/sd3 \
	-k ${{ matrix.backend }} \
	-rpfE \
	--capture=no \
	--log-cli-level=info \
	--timeout=240 \
	--durations=0
	env:
	ROCM_CHIP: ${{ matrix.rocm-chip }}

	# Note: mi250 benchmark times are more lenient than mi300 (allowing about
	# 10% deviation from observed averages), since the mi250 runners we use
	# are more unstable and we care most about peak performance on mi300.
	- name: "Running SDXL rocm pipeline benchmark (mi250)"
	if: contains(matrix.name, 'rocm_mi250_gfx90a')
	run: \|
	source ${VENV_DIR}/bin/activate
	pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \
	--goldentime-rocm-e2e-ms 1616.0 \
	--goldentime-rocm-unet-ms 419.0 \
	--goldentime-rocm-clip-ms 18.5 \
	--goldentime-rocm-vae-ms 337.0 \
	--goldendispatch-rocm-unet 1531 \
	--goldendispatch-rocm-clip 1139 \
	--goldendispatch-rocm-vae 246 \
	--goldensize-rocm-unet-bytes 2280000 \
	--goldensize-rocm-clip-bytes 860000 \
	--goldensize-rocm-vae-bytes 840000 \
	--rocm-chip gfx90a \
	--timeout=240 \
	--log-cli-level=info \
	--retries 7
	echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY
	rm job_summary.md

	- name: "Running SDXL rocm pipeline benchmark (mi300)"
	if: contains(matrix.name, 'rocm_mi300_gfx942')
	run: \|
	source ${VENV_DIR}/bin/activate
	pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \
	--goldentime-rocm-e2e-ms 372.0 \
	--goldentime-rocm-unet-ms 95.0 \
	--goldentime-rocm-clip-ms 15.5 \
	--goldentime-rocm-vae-ms 80.0 \
	--goldendispatch-rocm-unet 1531 \
	--goldendispatch-rocm-clip 1139 \
	--goldendispatch-rocm-vae 246 \
	--goldensize-rocm-unet-bytes 2270000 \
	--goldensize-rocm-clip-bytes 860000 \
	--goldensize-rocm-vae-bytes 840000 \
	--goldentime-rocm-punet-int8-fp16-ms 55 \
	--goldendispatch-rocm-punet-int8-fp16 1284 \
	--goldensize-rocm-punet-int8-fp16-bytes 2560000 \
	--goldentime-rocm-punet-int8-fp8-ms 59 \
	--goldendispatch-rocm-punet-int8-fp8 1564 \
	--goldensize-rocm-punet-int8-fp8-bytes 2800000 \
	--rocm-chip gfx942 \
	--log-cli-level=info \
	--timeout=240 \
	--retries 7
	echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY