| # Copyright 2023 The IREE Authors |
| # |
| # Licensed under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| name: PkgCI Regression Test |
| on: |
| workflow_call: |
| inputs: |
| artifact_run_id: |
| type: string |
| default: "" |
| workflow_dispatch: |
| inputs: |
| artifact_run_id: |
| type: string |
| default: "" |
| |
| jobs: |
| test_onnx: |
| name: "test_onnx :: ${{ matrix.name }}" |
| runs-on: ${{ matrix.runs-on }} |
| strategy: |
| fail-fast: false |
| matrix: |
| include: |
| # CPU |
| - name: cpu_llvm_sync |
| config-file: onnx_cpu_llvm_sync.json |
| numprocesses: auto |
| runs-on: ubuntu-20.04 |
| |
| # AMD GPU |
| - name: amdgpu_rocm_rdna3 |
| numprocesses: 1 |
| config-file: onnx_gpu_rocm_rdna3.json |
| runs-on: nodai-amdgpu-w7900-x86-64 |
| - name: amdgpu_vulkan |
| numprocesses: 4 |
| config-file: onnx_gpu_vulkan.json |
| runs-on: nodai-amdgpu-w7900-x86-64 |
| |
| # NVIDIA GPU |
| - name: nvidiagpu_cuda |
| config-file: onnx_gpu_cuda.json |
| numprocesses: 4 |
| runs-on: |
| - self-hosted # must come first |
| - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }} |
| - environment=prod |
| - gpu # TODO(scotttodd): qualify further with vendor/model |
| - os-family=Linux |
| - name: nvidiagpu_vulkan |
| config-file: onnx_gpu_vulkan.json |
| numprocesses: 4 |
| runs-on: |
| - self-hosted # must come first |
| - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }} |
| - environment=prod |
| - gpu # TODO(scotttodd): qualify further with vendor/model |
| - os-family=Linux |
| env: |
| PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages |
| IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts |
| CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.config-file }} |
| NUMPROCESSES: ${{ matrix.numprocesses }} |
| LOG_FILE_PATH: /tmp/iree_tests_onnx_${{ matrix.name }}_logs.json |
| VENV_DIR: ${{ github.workspace }}/venv |
| steps: |
| - name: Checking out IREE repository |
| uses: actions/checkout@v4.1.7 |
| with: |
| submodules: false |
| - uses: actions/setup-python@v5.1.0 |
| with: |
| # Must match the subset of versions built in pkgci_build_packages. |
| python-version: "3.11" |
| - uses: actions/download-artifact@v4.1.7 |
| with: |
| name: linux_x86_64_release_packages |
| path: ${{ env.PACKAGE_DOWNLOAD_DIR }} |
| - name: Setup venv |
| run: | |
| ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \ |
| --artifact-path=${PACKAGE_DOWNLOAD_DIR} \ |
| --fetch-gh-workflow=${{ inputs.artifact_run_id }} |
| |
| - name: Check out external TestSuite repository |
| uses: actions/checkout@v4.1.7 |
| with: |
| repository: nod-ai/SHARK-TestSuite |
| ref: 3603a453b3777fac9af4506a3dc0b3d87587fd47 |
| path: SHARK-TestSuite |
| submodules: false |
| lfs: false |
| - name: Install external TestSuite Python requirements |
| run: | |
| source ${VENV_DIR}/bin/activate |
| python -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt |
| |
| - name: Run external tests - ONNX test suite |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest SHARK-TestSuite/iree_tests/onnx/ \ |
| -rpfE \ |
| --numprocesses ${NUMPROCESSES} \ |
| --timeout=30 \ |
| --durations=20 \ |
| --no-skip-tests-missing-files \ |
| --config-files=${CONFIG_FILE_PATH} \ |
| --report-log=${LOG_FILE_PATH} |
| - name: "Updating config file with latest XFAIL lists" |
| if: failure() |
| run: | |
| source ${VENV_DIR}/bin/activate |
| python SHARK-TestSuite/iree_tests/update_config_xfails.py \ |
| --log-file=${LOG_FILE_PATH} \ |
| --config-file=${CONFIG_FILE_PATH} |
| cat ${CONFIG_FILE_PATH} |
| - name: "Uploading new config file" |
| if: failure() |
| uses: actions/upload-artifact@v4.3.3 |
| with: |
| name: ${{ matrix.config-file }} |
| path: ${{ env.CONFIG_FILE_PATH }} |
| |
| test_models: |
| name: "test_models :: ${{ matrix.name }}" |
| runs-on: ${{ matrix.runs-on }} |
| strategy: |
| fail-fast: false |
| |
| # Note: these jobs should use persistent runners with local caches. |
| # Downloading test files (50GB+) without a cache can take 20+ minutes. |
| matrix: |
| include: |
| # CPU |
| - name: cpu_llvm_task |
| models-config-file: models_cpu_llvm_task.json |
| sdxl-unet-config-file: sdxl_scheduled_unet_cpu_llvm_task.json |
| sdxl-vae-config-file: sdxl_vae_decode_cpu_llvm_task.json |
| sdxl-clip-config-file: sdxl_prompt_encoder_cpu_llvm_task.json |
| runs-on: nodai-amdgpu-w7900-x86-64 |
| |
| # AMD GPU |
| - name: amdgpu_rocm_mi250_gfx90a |
| models-config-file: models_gpu_rocm_gfx90a.json |
| models-extra-flags-config-file: models_gpu_rocm_gfx90a_additional_flags.json |
| sdxl-unet-config-file: sdxl_scheduled_unet_gpu_rocm_gfx90a.json |
| sdxl-vae-config-file: sdxl_vae_decode_gpu_rocm_gfx90a.json |
| sdxl-clip-config-file: sdxl_prompt_encoder_gpu_rocm_gfx90a.json |
| runs-on: nodai-amdgpu-mi250-x86-64 |
| - name: amdgpu_rocm_mi300_gfx942 |
| models-config-file: models_gpu_rocm_gfx942.json |
| models-extra-flags-config-file: models_gpu_rocm_gfx942_additional_flags.json |
| sdxl-unet-config-file: sdxl_scheduled_unet_gpu_rocm_gfx942.json |
| sdxl-vae-config-file: sdxl_vae_decode_gpu_rocm_gfx942.json |
| sdxl-clip-config-file: sdxl_prompt_encoder_gpu_rocm_gfx942.json |
| runs-on: nodai-amdgpu-mi300-x86-64 |
| - name: amdgpu_vulkan |
| models-config-file: models_gpu_vulkan.json |
| runs-on: nodai-amdgpu-w7900-x86-64 |
| |
| # NVIDIA GPU |
| # None at the moment. Could maybe use the persistent a100 runners: |
| # - self-hosted # must come first |
| # - runner-group=${{ needs.setup.outputs.runner-group }} |
| # - environment=${{ needs.setup.outputs.runner-env }} |
| # - a100 |
| # - os-family=Linux |
| # (note: would need to plumb the presubmit/postsubmit runner-group through to here too) |
| env: |
| PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages |
| IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts |
| IREE_TEST_FILES: ~/iree_tests_cache |
| IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite |
| MODELS_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.models-config-file }} |
| MODELS_EXTRA_FLAGS_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.models-extra-flags-config-file }} |
| SDXL_UNET_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.sdxl-unet-config-file }} |
| SDXL_CLIP_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.sdxl-clip-config-file }} |
| SDXL_VAE_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.sdxl-vae-config-file }} |
| VENV_DIR: ${{ github.workspace }}/venv |
| LD_LIBRARY_PATH: /home/esaimana/Python-3.11.9 |
| steps: |
| # TODO(saienduri): Find alternative to this temporary step that manipulates permission of github actions |
| # directory to be able to clean after every PR |
| - name: Pre Checkout MI300 Step |
| if: contains(matrix.name, 'gfx942') |
| run: | |
| sudo chmod -R 777 ~/actions-runner/_work |
| - name: Checking out IREE repository |
| uses: actions/checkout@v4.1.7 |
| with: |
| submodules: false |
| - uses: actions/setup-python@v5.1.0 |
| with: |
| # Must match the subset of versions built in pkgci_build_packages. |
| python-version: "3.11" |
| - uses: actions/download-artifact@v4.1.7 |
| with: |
| name: linux_x86_64_release_packages |
| path: ${{ env.PACKAGE_DOWNLOAD_DIR }} |
| - name: Setup venv |
| run: | |
| ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \ |
| --artifact-path=${PACKAGE_DOWNLOAD_DIR} \ |
| --fetch-gh-workflow=${{ inputs.artifact_run_id }} |
| |
| # TODO(#17344): regenerate .mlirbc files, test plat_rdna3_rocm on rocm |
| # # In-tree tests |
| # - name: Run experimental/regression_suite tests |
| # run: | |
| # source ${VENV_DIR}/bin/activate |
| # pytest \ |
| # -rA -s -m "plat_host_cpu and presubmit" \ |
| # experimental/regression_suite |
| |
| # Out of tree tests |
| - name: Check out external TestSuite repository |
| uses: actions/checkout@v4.1.7 |
| with: |
| repository: nod-ai/SHARK-TestSuite |
| ref: 3603a453b3777fac9af4506a3dc0b3d87587fd47 |
| path: SHARK-TestSuite |
| submodules: false |
| lfs: true |
| - name: Install external TestSuite Python requirements |
| run: | |
| source ${VENV_DIR}/bin/activate |
| python -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt |
| - name: Download remote files for real weight model tests |
| run: | |
| source ${VENV_DIR}/bin/activate |
| python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir pytorch/models |
| python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir sharktank |
| |
| - name: Run external tests - models with real weights |
| if: "matrix.models-config-file != '' && !cancelled()" |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest \ |
| SHARK-TestSuite/iree_tests/pytorch/models \ |
| SHARK-TestSuite/iree_tests/sharktank \ |
| -rpfE \ |
| -k real_weights \ |
| --no-skip-tests-missing-files \ |
| --capture=no \ |
| --log-cli-level=info \ |
| --timeout=1200 \ |
| --durations=0 \ |
| --config-files=${MODELS_CONFIG_FILE_PATH} |
| |
| - name: Run external tests - models with real weights and additional flags |
| if: "matrix.models-extra-flags-config-file != '' && !cancelled()" |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest SHARK-TestSuite/iree_tests/pytorch/models \ |
| -rpfE \ |
| -k real_weights \ |
| --no-skip-tests-missing-files \ |
| --capture=no \ |
| --log-cli-level=info \ |
| --timeout=1200 \ |
| --durations=0 \ |
| --config-files=${MODELS_EXTRA_FLAGS_CONFIG_FILE_PATH} |
| |
| - name: "Run external tests - SDXL scheduled unet" |
| if: "matrix.sdxl-unet-config-file != '' && !cancelled()" |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest SHARK-TestSuite/iree_tests/pytorch/models/sdxl-scheduled-unet-3-tank \ |
| -rpfE \ |
| -k real_weights \ |
| --no-skip-tests-missing-files \ |
| --capture=no \ |
| --log-cli-level=info \ |
| --timeout=1200 \ |
| --durations=0 \ |
| --config-files=${SDXL_UNET_CONFIG_FILE_PATH} |
| |
| - name: "Run external tests - SDXL prompt encoder" |
| if: "matrix.sdxl-clip-config-file != '' && !cancelled()" |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest SHARK-TestSuite/iree_tests/pytorch/models/sdxl-prompt-encoder-tank \ |
| -rpfE \ |
| -k real_weights \ |
| --no-skip-tests-missing-files \ |
| --capture=no \ |
| --log-cli-level=info \ |
| --timeout=1200 \ |
| --durations=0 \ |
| --config-files=${SDXL_CLIP_CONFIG_FILE_PATH} |
| |
| - name: "Run external tests - SDXL vae decode" |
| if: "matrix.sdxl-vae-config-file != '' && !cancelled()" |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest SHARK-TestSuite/iree_tests/pytorch/models/sdxl-vae-decode-tank \ |
| -rpfE \ |
| -k real_weights \ |
| --no-skip-tests-missing-files \ |
| --capture=no \ |
| --log-cli-level=info \ |
| --timeout=1200 \ |
| --durations=0 \ |
| --config-files=${SDXL_VAE_CONFIG_FILE_PATH} |
| |
| - name: "Running SDXL rocm pipeline benchmark (mi250)" |
| if: contains(matrix.name, 'rocm_mi250_gfx90a') |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest SHARK-TestSuite/iree_tests/benchmarks/sdxl/benchmark_sdxl_rocm.py \ |
| --goldentime-rocm-e2e-ms 1336.0 \ |
| --goldentime-rocm-unet-ms 340.0 \ |
| --goldentime-rocm-clip-ms 17.5 \ |
| --goldentime-rocm-vae-ms 300.0 \ |
| --goldendispatch-rocm-unet 1714 \ |
| --goldendispatch-rocm-clip 1569 \ |
| --goldendispatch-rocm-vae 248 \ |
| --goldensize-rocm-unet-bytes 2073609 \ |
| --goldensize-rocm-clip-bytes 783720 \ |
| --goldensize-rocm-vae-bytes 764909 \ |
| --gpu-number 6 \ |
| --rocm-chip gfx90a \ |
| --log-cli-level=info \ |
| --retries 7 |
| echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY |
| rm job_summary.md |
| |
| - name: "Running SDXL rocm pipeline benchmark (mi300)" |
| if: contains(matrix.name, 'rocm_mi300_gfx942') |
| run: | |
| source ${VENV_DIR}/bin/activate |
| pytest SHARK-TestSuite/iree_tests/benchmarks/sdxl/benchmark_sdxl_rocm.py \ |
| --goldentime-rocm-e2e-ms 320 \ |
| --goldentime-rocm-unet-ms 77 \ |
| --goldentime-rocm-clip-ms 15 \ |
| --goldentime-rocm-vae-ms 74 \ |
| --goldendispatch-rocm-unet 1714 \ |
| --goldendispatch-rocm-clip 1569 \ |
| --goldendispatch-rocm-vae 248 \ |
| --goldensize-rocm-unet-bytes 2054938 \ |
| --goldensize-rocm-clip-bytes 780328 \ |
| --goldensize-rocm-vae-bytes 758509 \ |
| --gpu-number 0 \ |
| --rocm-chip gfx942 \ |
| --log-cli-level=info \ |
| --retries 7 |
| echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY |