| # Copyright 2023 The IREE Authors |
| # |
| # Licensed under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| name: PkgCI Regression Test |
| on: |
| workflow_call: |
| inputs: |
| artifact_run_id: |
| type: string |
| default: "" |
| workflow_dispatch: |
| inputs: |
| artifact_run_id: |
| type: string |
| default: "" |
| |
| jobs: |
| test_onnx: |
| name: "test_onnx :: ${{ matrix.name }}" |
| runs-on: ${{ matrix.runs-on }} |
| strategy: |
| fail-fast: false |
| matrix: |
| include: |
| # CPU |
| - name: cpu_llvm_sync |
| config-file: onnx_ops_cpu_llvm_sync.json |
| numprocesses: auto |
| runs-on: ubuntu-20.04 |
| |
| # AMD GPU |
| - name: amdgpu_rocm_rdna3 |
| numprocesses: 1 |
| config-file: onnx_ops_gpu_rocm_rdna3.json |
| runs-on: nodai-amdgpu-w7900-x86-64 |
| - name: amdgpu_vulkan |
| numprocesses: 4 |
| config-file: onnx_ops_gpu_vulkan.json |
| runs-on: nodai-amdgpu-w7900-x86-64 |
| |
| # NVIDIA GPU |
| - name: nvidiagpu_cuda |
| config-file: onnx_ops_gpu_cuda.json |
| numprocesses: 4 |
| runs-on: |
| - self-hosted # must come first |
| - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }} |
| - environment=prod |
| - gpu # TODO(scotttodd): qualify further with vendor/model |
| - os-family=Linux |
| - name: nvidiagpu_vulkan |
| config-file: onnx_ops_gpu_vulkan.json |
| numprocesses: 4 |
| runs-on: |
| - self-hosted # must come first |
| - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }} |
| - environment=prod |
| - gpu # TODO(scotttodd): qualify further with vendor/model |
| - os-family=Linux |
| env: |
| PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages |
| CONFIG_FILE_PATH: tests/external/iree-test-suites/onnx_ops/${{ matrix.config-file }} |
| NUMPROCESSES: ${{ matrix.numprocesses }} |
| LOG_FILE_PATH: /tmp/test_onnx_ops_${{ matrix.name }}_logs.json |
| VENV_DIR: ${{ github.workspace }}/venv |
| steps: |
| - name: Checking out IREE repository |
| uses: actions/checkout@v4.1.7 |
| with: |
| submodules: false |
| - uses: actions/setup-python@v5.1.0 |
| with: |
| # Must match the subset of versions built in pkgci_build_packages. |
| python-version: "3.11" |
| - uses: actions/download-artifact@v4.1.7 |
| with: |
| name: linux_x86_64_release_packages |
| path: ${{ env.PACKAGE_DOWNLOAD_DIR }} |
| - name: Setup venv |
| run: | |
| ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \ |
| --artifact-path=${PACKAGE_DOWNLOAD_DIR} \ |
| --fetch-gh-workflow=${{ inputs.artifact_run_id }} |
| |
| - name: Checkout test suites repository |
| uses: actions/checkout@v4.1.7 |
| with: |
| repository: iree-org/iree-test-suites |
| ref: 323c2df477544d789c56e4dfda9b047580b77cbf |
| path: iree-test-suites |
| - name: Install ONNX ops test suite requirements |
| run: | |
| source ${VENV_DIR}/bin/activate |
| python -m pip install -r iree-test-suites/onnx_ops/requirements.txt |
| - name: Run ONNX ops test suite |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest iree-test-suites/onnx_ops/ \ |
| -rpfE \ |
| --numprocesses ${NUMPROCESSES} \ |
| --timeout=30 \ |
| --durations=20 \ |
| --config-files=${CONFIG_FILE_PATH} \ |
| --report-log=${LOG_FILE_PATH} |
| - name: "Updating config file with latest XFAIL lists" |
| if: failure() |
| run: | |
| source ${VENV_DIR}/bin/activate |
| python iree-test-suites/onnx_ops/update_config_xfails.py \ |
| --log-file=${LOG_FILE_PATH} \ |
| --config-file=${CONFIG_FILE_PATH} |
| cat ${CONFIG_FILE_PATH} |
| - name: "Uploading new config file" |
| if: failure() |
| uses: actions/upload-artifact@v4.3.3 |
| with: |
| name: ${{ matrix.config-file }} |
| path: ${{ env.CONFIG_FILE_PATH }} |
| |
| test_models: |
| name: "test_models :: ${{ matrix.name }}" |
| runs-on: ${{ matrix.runs-on }} |
| strategy: |
| fail-fast: false |
| |
| # Note: these jobs should use persistent runners with local caches. |
| # Downloading test files (50GB+) without a cache can take 20+ minutes. |
| matrix: |
| include: |
| # CPU |
| - name: cpu_llvm_task |
| models-config-file: models_cpu_llvm_task.json |
| runs-on: |
| - self-hosted # must come first |
| - persistent-cache |
| - Linux |
| - X64 |
| |
| # AMD GPU |
| - name: amdgpu_rocm_mi250_gfx90a |
| models-config-file: models_gpu_rocm_gfx90a.json |
| runs-on: nodai-amdgpu-mi250-x86-64 |
| - name: amdgpu_rocm_mi300_gfx942 |
| models-config-file: models_gpu_rocm_gfx942.json |
| runs-on: nodai-amdgpu-mi300-x86-64 |
| - name: amdgpu_vulkan |
| models-config-file: models_gpu_vulkan.json |
| runs-on: nodai-amdgpu-w7900-x86-64 |
| |
| # NVIDIA GPU |
| # None at the moment. Could maybe use the persistent a100 runners: |
| # - self-hosted # must come first |
| # - runner-group=${{ needs.setup.outputs.runner-group }} |
| # - environment=${{ needs.setup.outputs.runner-env }} |
| # - a100 |
| # - os-family=Linux |
| # (note: would need to plumb the presubmit/postsubmit runner-group through to here too) |
| env: |
| PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages |
| IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite |
| MODELS_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.models-config-file }} |
| VENV_DIR: ${{ github.workspace }}/venv |
| steps: |
| - name: Checking out IREE repository |
| uses: actions/checkout@v4.1.7 |
| with: |
| submodules: false |
| - uses: actions/setup-python@v5.1.0 |
| with: |
| # Must match the subset of versions built in pkgci_build_packages. |
| python-version: "3.11" |
| - uses: actions/download-artifact@v4.1.7 |
| with: |
| name: linux_x86_64_release_packages |
| path: ${{ env.PACKAGE_DOWNLOAD_DIR }} |
| - name: Setup venv |
| run: | |
| ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \ |
| --artifact-path=${PACKAGE_DOWNLOAD_DIR} \ |
| --fetch-gh-workflow=${{ inputs.artifact_run_id }} |
| |
| # Out of tree tests |
| - name: Check out external TestSuite repository |
| uses: actions/checkout@v4.1.7 |
| with: |
| repository: nod-ai/SHARK-TestSuite |
| ref: f5615ab29da491c0047146258dfa3a0c40c735e5 |
| path: SHARK-TestSuite |
| submodules: false |
| lfs: true |
| - name: Install external TestSuite Python requirements |
| run: | |
| source ${VENV_DIR}/bin/activate |
| python3 -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt |
| pip install --no-compile --pre --upgrade -e SHARK-TestSuite/common_tools |
| - name: Download remote files for real weight model tests |
| run: | |
| source ${VENV_DIR}/bin/activate |
| python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir iree_tests/pytorch/models |
| python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir iree_tests/sharktank |
| |
| - name: Run external tests - models with real weights |
| if: "matrix.models-config-file != '' && !cancelled()" |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest \ |
| SHARK-TestSuite/iree_tests/pytorch/models \ |
| SHARK-TestSuite/iree_tests/sharktank \ |
| -rA \ |
| -k real_weights \ |
| --no-skip-tests-missing-files \ |
| --capture=no \ |
| --log-cli-level=info \ |
| --timeout=1200 \ |
| --durations=0 \ |
| --config-files=${MODELS_CONFIG_FILE_PATH} |
| |
| test_regression_suite: |
| name: "test_regression_suite :: ${{ matrix.name }}" |
| runs-on: ${{ matrix.runs-on }} |
| strategy: |
| fail-fast: false |
| |
| # Note: these jobs should use persistent runners with local caches. |
| # Downloading test files (50GB+) without a cache can take 20+ minutes. |
| matrix: |
| include: |
| # CPU |
| - name: cpu_llvm_task |
| models-config-file: models_cpu_llvm_task.json |
| backend: cpu |
| runs-on: |
| - self-hosted # must come first |
| - persistent-cache |
| - Linux |
| - X64 |
| |
| # AMD GPU |
| - name: amdgpu_rocm_mi250_gfx90a |
| rocm-chip: gfx90a |
| backend: rocm |
| runs-on: nodai-amdgpu-mi250-x86-64 |
| - name: amdgpu_rocm_mi300_gfx942 |
| rocm-chip: gfx942 |
| backend: rocm |
| runs-on: nodai-amdgpu-mi300-x86-64 |
| env: |
| PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages |
| IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite |
| VENV_DIR: ${{ github.workspace }}/venv |
| TEST_OUTPUT_ARTIFACTS: ${{ github.workspace }}/model_output_artifacts |
| steps: |
| - name: Checking out IREE repository |
| uses: actions/checkout@v4.1.7 |
| with: |
| submodules: false |
| - uses: actions/setup-python@v5.1.0 |
| with: |
| # Must match the subset of versions built in pkgci_build_packages. |
| python-version: "3.11" |
| - uses: actions/download-artifact@v4.1.7 |
| with: |
| name: linux_x86_64_release_packages |
| path: ${{ env.PACKAGE_DOWNLOAD_DIR }} |
| - name: Setup venv |
| run: | |
| ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \ |
| --artifact-path=${PACKAGE_DOWNLOAD_DIR} \ |
| --fetch-gh-workflow=${{ inputs.artifact_run_id }} |
| source ${VENV_DIR}/bin/activate |
| pip install -e experimental/regression_suite |
| |
| # TODO(#17344): regenerate .mlirbc files, test plat_rdna3_rocm on rocm |
| # # In-tree tests |
| # - name: Run experimental/regression_suite tests |
| # run: | |
| # source ${VENV_DIR}/bin/activate |
| # pytest \ |
| # -rA -s -m "plat_host_cpu and presubmit" \ |
| # experimental/regression_suite |
| |
| - name: "Running SDXL special model tests" |
| if: "!cancelled()" |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest ./experimental/regression_suite/shark-test-suite-models/sdxl \ |
| -k ${{ matrix.backend }} \ |
| -rpfE \ |
| --capture=no \ |
| --log-cli-level=info \ |
| --timeout=1200 \ |
| --durations=0 |
| env: |
| ROCM_CHIP: ${{ matrix.rocm-chip }} |
| |
| - name: "Running SD3 special model tests" |
| if: "!cancelled()" |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest ./experimental/regression_suite/shark-test-suite-models/sd3 \ |
| -k ${{ matrix.backend }} \ |
| -rpfE \ |
| --capture=no \ |
| --log-cli-level=info \ |
| --timeout=1200 \ |
| --durations=0 |
| env: |
| ROCM_CHIP: ${{ matrix.rocm-chip }} |
| |
| # Note: mi250 benchmark times are more lenient than mi300 (allowing about |
| # 10% deviation from observed averages), since the mi250 runners we use |
| # are more unstable and we care most about peak performance on mi300. |
| - name: "Running SDXL rocm pipeline benchmark (mi250)" |
| if: contains(matrix.name, 'rocm_mi250_gfx90a') |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \ |
| --goldentime-rocm-e2e-ms 1616.0 \ |
| --goldentime-rocm-unet-ms 419.0 \ |
| --goldentime-rocm-clip-ms 18.5 \ |
| --goldentime-rocm-vae-ms 337.0 \ |
| --goldendispatch-rocm-unet 1551 \ |
| --goldendispatch-rocm-clip 1225 \ |
| --goldendispatch-rocm-vae 247 \ |
| --goldensize-rocm-unet-bytes 2280000 \ |
| --goldensize-rocm-clip-bytes 860000 \ |
| --goldensize-rocm-vae-bytes 840000 \ |
| --rocm-chip gfx90a \ |
| --log-cli-level=info \ |
| --retries 7 |
| echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY |
| rm job_summary.md |
| |
| - name: "Running SDXL rocm pipeline benchmark (mi300)" |
| if: contains(matrix.name, 'rocm_mi300_gfx942') |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \ |
| --goldentime-rocm-e2e-ms 372.0 \ |
| --goldentime-rocm-unet-ms 95.0 \ |
| --goldentime-rocm-clip-ms 15.5 \ |
| --goldentime-rocm-vae-ms 80.0 \ |
| --goldendispatch-rocm-unet 1551 \ |
| --goldendispatch-rocm-clip 1225 \ |
| --goldendispatch-rocm-vae 247 \ |
| --goldensize-rocm-unet-bytes 2270000 \ |
| --goldensize-rocm-clip-bytes 860000 \ |
| --goldensize-rocm-vae-bytes 840000 \ |
| --rocm-chip gfx942 \ |
| --log-cli-level=info \ |
| --retries 7 |
| echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY |