.github/workflows/pkgci_regression_test.yml - 3p/openxla/iree - Git at Google

 # Copyright 2023 The IREE Authors
 #
 # Licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 name: PkgCI Regression Test
 on:
   workflow_call:
     inputs:
       artifact_run_id:
         type: string
         default: ""
   workflow_dispatch:
     inputs:
       artifact_run_id:
         type: string
         default: ""

 jobs:
   test_regression_suite:
     name: "test_regression_suite :: ${{ matrix.name }}"
     runs-on: ${{ matrix.runs-on }}
     strategy:
       fail-fast: false

       # Note: these jobs should use persistent runners with local caches.
       # Downloading test files (50GB+) without a cache can take 20+ minutes.
       matrix:
         include:
           # CPU
           - name: cpu_llvm_task
             models-config-file: models_cpu_llvm_task.json
             backend: cpu
             runs-on:
               - self-hosted # must come first
               - persistent-cache
               - Linux
               - X64

           # AMD GPU
           - name: amdgpu_rocm_mi250_gfx90a
             rocm-chip: gfx90a
             backend: rocm
             runs-on: nodai-amdgpu-mi250-x86-64
           - name: amdgpu_rocm_mi300_gfx942
             rocm-chip: gfx942
             backend: rocm
             runs-on: nodai-amdgpu-mi300-x86-64
     env:
       PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
       IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite
       VENV_DIR: ${{ github.workspace }}/venv
       TEST_OUTPUT_ARTIFACTS: ${{ github.workspace }}/model_output_artifacts
     steps:
       - name: Checking out IREE repository
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
           submodules: false
       - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
         with:
           # Must match the subset of versions built in pkgci_build_packages.
           python-version: "3.11"
       - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
         with:
           name: linux_x86_64_release_packages
           path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
       - name: Setup venv
         run: |
           ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
             --artifact-path=${PACKAGE_DOWNLOAD_DIR} \
             --fetch-gh-workflow=${{ inputs.artifact_run_id }}
           source ${VENV_DIR}/bin/activate
           pip install -e experimental/regression_suite

       # TODO(#17344): regenerate .mlirbc files, test plat_rdna3_rocm on rocm
       # # In-tree tests
       # - name: Run experimental/regression_suite tests
       #   run: |
       #     source ${VENV_DIR}/bin/activate
       #     pytest \
       #       -rA -s -m "plat_host_cpu and presubmit" \
       #       experimental/regression_suite

       - name: "Running SDXL special model tests"
         if: "!cancelled()"
         run: |
           source ${VENV_DIR}/bin/activate
           pytest ./experimental/regression_suite/shark-test-suite-models/sdxl \
             -k ${{ matrix.backend }} \
             -rpfE \
             --capture=no \
             --log-cli-level=info \
             --timeout=600 \
             --durations=0
         env:
           ROCM_CHIP: ${{ matrix.rocm-chip }}

       - name: "Running SD3 special model tests"
         if: "!cancelled()"
         run: |
           source ${VENV_DIR}/bin/activate
           pytest ./experimental/regression_suite/shark-test-suite-models/sd3 \
             -k ${{ matrix.backend }} \
             -rpfE \
             --capture=no \
             --log-cli-level=info \
             --timeout=600 \
             --durations=0
         env:
           ROCM_CHIP: ${{ matrix.rocm-chip }}

       # Note: mi250 benchmark times are more lenient than mi300 (allowing about
       # 30% deviation from observed averages), since the mi250 runners we use
       # are more unstable and we care most about peak performance on mi300.
       - name: "Running SDXL rocm pipeline benchmark (mi250)"
         if: contains(matrix.name, 'rocm_mi250_gfx90a')
         run: |
           source ${VENV_DIR}/bin/activate
           pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \
             --goldentime-tolerance-multiplier 1.3 \
             --goldentime-rocm-e2e-ms 1100.0 \
             --goldentime-rocm-unet-ms 255.0 \
             --goldentime-rocm-clip-ms 14.5 \
             --goldentime-rocm-vae-ms 310.0 \
             --goldendispatch-rocm-unet 1602 \
             --goldendispatch-rocm-clip 1139 \
             --goldendispatch-rocm-vae 246 \
             --goldensize-rocm-unet-bytes 2280000  \
             --goldensize-rocm-clip-bytes 860000 \
             --goldensize-rocm-vae-bytes 840000 \
             --rocm-chip gfx90a \
             --timeout=600 \
             --log-cli-level=info \
             --retries 7
           echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY
           rm job_summary.md

       # Note: allowing 10% deviation from observed averages here to account for
       # different runner conditions.
       - name: "Running SDXL rocm pipeline benchmark (mi300)"
         if: contains(matrix.name, 'rocm_mi300_gfx942')
         run: |
           source ${VENV_DIR}/bin/activate
           pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \
             --goldentime-tolerance-multiplier 1.1 \
             --goldentime-rocm-e2e-ms 325.0 \
             --goldentime-rocm-unet-ms 80.0 \
             --goldentime-rocm-clip-ms 15.0 \
             --goldentime-rocm-vae-ms 75.0 \
             --goldendispatch-rocm-unet 1602 \
             --goldendispatch-rocm-clip 1139 \
             --goldendispatch-rocm-vae 246 \
             --goldensize-rocm-unet-bytes 2270000 \
             --goldensize-rocm-clip-bytes 860000  \
             --goldensize-rocm-vae-bytes 840000 \
             --goldentime-rocm-punet-int8-fp16-ms 50.0 \
             --goldentime-rocm-punet-int8-fp8-ms 52.0 \
             --goldendispatch-rocm-punet-int8-fp16 1424 \
             --goldendispatch-rocm-punet-int8-fp8 1704 \
             --goldensize-rocm-punet-int8-fp8-bytes 2800000 \
             --goldensize-rocm-punet-int8-fp16-bytes 2560000 \
             --rocm-chip gfx942 \
             --log-cli-level=info \
             --timeout=600 \
             --retries 7
           echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY
	# Copyright 2023 The IREE Authors
	#
	# Licensed under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

	name: PkgCI Regression Test
	on:
	workflow_call:
	inputs:
	artifact_run_id:
	type: string
	default: ""
	workflow_dispatch:
	inputs:
	artifact_run_id:
	type: string
	default: ""

	jobs:
	test_regression_suite:
	name: "test_regression_suite :: ${{ matrix.name }}"
	runs-on: ${{ matrix.runs-on }}
	strategy:
	fail-fast: false

	# Note: these jobs should use persistent runners with local caches.
	# Downloading test files (50GB+) without a cache can take 20+ minutes.
	matrix:
	include:
	# CPU
	- name: cpu_llvm_task
	models-config-file: models_cpu_llvm_task.json
	backend: cpu
	runs-on:
	- self-hosted # must come first
	- persistent-cache
	- Linux
	- X64

	# AMD GPU
	- name: amdgpu_rocm_mi250_gfx90a
	rocm-chip: gfx90a
	backend: rocm
	runs-on: nodai-amdgpu-mi250-x86-64
	- name: amdgpu_rocm_mi300_gfx942
	rocm-chip: gfx942
	backend: rocm
	runs-on: nodai-amdgpu-mi300-x86-64
	env:
	PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
	IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite
	VENV_DIR: ${{ github.workspace }}/venv
	TEST_OUTPUT_ARTIFACTS: ${{ github.workspace }}/model_output_artifacts
	steps:
	- name: Checking out IREE repository
	uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
	with:
	submodules: false
	- uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
	with:
	# Must match the subset of versions built in pkgci_build_packages.
	python-version: "3.11"
	- uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
	with:
	name: linux_x86_64_release_packages
	path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
	- name: Setup venv
	run: \|
	./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
	--artifact-path=${PACKAGE_DOWNLOAD_DIR} \
	--fetch-gh-workflow=${{ inputs.artifact_run_id }}
	source ${VENV_DIR}/bin/activate
	pip install -e experimental/regression_suite

	# TODO(#17344): regenerate .mlirbc files, test plat_rdna3_rocm on rocm
	# # In-tree tests
	# - name: Run experimental/regression_suite tests
	# run: \|
	# source ${VENV_DIR}/bin/activate
	# pytest \
	# -rA -s -m "plat_host_cpu and presubmit" \
	# experimental/regression_suite

	- name: "Running SDXL special model tests"
	if: "!cancelled()"
	run: \|
	source ${VENV_DIR}/bin/activate
	pytest ./experimental/regression_suite/shark-test-suite-models/sdxl \
	-k ${{ matrix.backend }} \
	-rpfE \
	--capture=no \
	--log-cli-level=info \
	--timeout=600 \
	--durations=0
	env:
	ROCM_CHIP: ${{ matrix.rocm-chip }}

	- name: "Running SD3 special model tests"
	if: "!cancelled()"
	run: \|
	source ${VENV_DIR}/bin/activate
	pytest ./experimental/regression_suite/shark-test-suite-models/sd3 \
	-k ${{ matrix.backend }} \
	-rpfE \
	--capture=no \
	--log-cli-level=info \
	--timeout=600 \
	--durations=0
	env:
	ROCM_CHIP: ${{ matrix.rocm-chip }}

	# Note: mi250 benchmark times are more lenient than mi300 (allowing about
	# 30% deviation from observed averages), since the mi250 runners we use
	# are more unstable and we care most about peak performance on mi300.
	- name: "Running SDXL rocm pipeline benchmark (mi250)"
	if: contains(matrix.name, 'rocm_mi250_gfx90a')
	run: \|
	source ${VENV_DIR}/bin/activate
	pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \
	--goldentime-tolerance-multiplier 1.3 \
	--goldentime-rocm-e2e-ms 1100.0 \
	--goldentime-rocm-unet-ms 255.0 \
	--goldentime-rocm-clip-ms 14.5 \
	--goldentime-rocm-vae-ms 310.0 \
	--goldendispatch-rocm-unet 1602 \
	--goldendispatch-rocm-clip 1139 \
	--goldendispatch-rocm-vae 246 \
	--goldensize-rocm-unet-bytes 2280000 \
	--goldensize-rocm-clip-bytes 860000 \
	--goldensize-rocm-vae-bytes 840000 \
	--rocm-chip gfx90a \
	--timeout=600 \
	--log-cli-level=info \
	--retries 7
	echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY
	rm job_summary.md

	# Note: allowing 10% deviation from observed averages here to account for
	# different runner conditions.
	- name: "Running SDXL rocm pipeline benchmark (mi300)"
	if: contains(matrix.name, 'rocm_mi300_gfx942')
	run: \|
	source ${VENV_DIR}/bin/activate
	pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \
	--goldentime-tolerance-multiplier 1.1 \
	--goldentime-rocm-e2e-ms 325.0 \
	--goldentime-rocm-unet-ms 80.0 \
	--goldentime-rocm-clip-ms 15.0 \
	--goldentime-rocm-vae-ms 75.0 \
	--goldendispatch-rocm-unet 1602 \
	--goldendispatch-rocm-clip 1139 \
	--goldendispatch-rocm-vae 246 \
	--goldensize-rocm-unet-bytes 2270000 \
	--goldensize-rocm-clip-bytes 860000 \
	--goldensize-rocm-vae-bytes 840000 \
	--goldentime-rocm-punet-int8-fp16-ms 50.0 \
	--goldentime-rocm-punet-int8-fp8-ms 52.0 \
	--goldendispatch-rocm-punet-int8-fp16 1424 \
	--goldendispatch-rocm-punet-int8-fp8 1704 \
	--goldensize-rocm-punet-int8-fp8-bytes 2800000 \
	--goldensize-rocm-punet-int8-fp16-bytes 2560000 \
	--rocm-chip gfx942 \
	--log-cli-level=info \
	--timeout=600 \
	--retries 7
	echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY