blob: f00f158380d3abf35764690502d1b3f83ee63add [file] [log] [blame]
# Copyright 2023 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
name: PkgCI Regression Test (AMDGPU ROCm)
on:
workflow_call:
inputs:
artifact_run_id:
type: string
default: ""
workflow_dispatch:
inputs:
artifact_run_id:
type: string
default: ""
jobs:
# TODO(#17370): re-enable when nodai-amdgpu-w7900-x86-64 runner is stable
# linux_x86_64:
# name: Linux (x86_64)
# runs-on: nodai-amdgpu-w7900-x86-64
# env:
# PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
# IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts
# VENV_DIR: ${{ github.workspace }}/venv
# steps:
# - name: Checking out IREE repository
# uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
# with:
# submodules: false
# - uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
# with:
# # Must match the subset of versions built in pkgci_build_packages.
# python-version: '3.11'
# - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # v3.0.2
# with:
# name: linux_x86_64_release_packages
# path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
# - name: Setup venv
# run: |
# ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
# --artifact-path=${PACKAGE_DOWNLOAD_DIR} \
# --fetch-gh-workflow=${{ inputs.artifact_run_id }}
# # TODO(#17344): regenerate .mlirbc files
# # # In-tree tests
# # - name: Run experimental/regression_suite tests
# # run: |
# # source ${VENV_DIR}/bin/activate
# # export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/hip/lib
# # pytest \
# # -rA -s -m "plat_rdna3_rocm and presubmit" \
# # experimental/regression_suite
# # Out of tree tests
# # TODO(scotttodd): Increase parallelism when supported by the HIP HAL
# # driver and/or test runner machine.
# - name: Checking out external TestSuite repository
# uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
# with:
# repository: nod-ai/SHARK-TestSuite
# ref: 337083616ae6f596c0206a9edd1c47e8afc0e400
# path: SHARK-TestSuite
# submodules: false
# - name: Installing external TestSuite Python requirements
# run: |
# source ${VENV_DIR}/bin/activate
# python -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt
# - name: Run external tests - ONNX test suite
# run: |
# source ${VENV_DIR}/bin/activate
# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/hip/lib
# pytest SHARK-TestSuite/iree_tests/onnx/ \
# -rpfE --timeout=30 --retries=2 --durations=20 \
# --config-files=build_tools/pkgci/external_test_suite/onnx_gpu_rocm_rdna3.json \
# --no-skip-tests-missing-files \
# --report-log=/tmp/iree_tests_onnx_gpu_rocm_rdna3_logs.json
# - name: "Updating config file with latest XFAIL lists"
# if: failure()
# run: |
# source ${VENV_DIR}/bin/activate
# python SHARK-TestSuite/iree_tests/update_config_xfails.py \
# --log-file=/tmp/iree_tests_onnx_gpu_rocm_rdna3_logs.json \
# --config-file=build_tools/pkgci/external_test_suite/onnx_gpu_rocm_rdna3.json
# cat build_tools/pkgci/external_test_suite/onnx_gpu_rocm_rdna3.json
# - name: "Uploading new config file"
# if: failure()
# uses: actions/upload-artifact@v4
# with:
# name: "onnx_gpu_rocm_rdna3.json"
# path: "build_tools/pkgci/external_test_suite/onnx_gpu_rocm_rdna3.json"
linux_x86_64_rocm_models:
name: MI250 - Models
runs-on: nodai-amdgpu-mi250-x86-64
env:
PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
IREERS_ARTIFACT_DIR: ${{ github.workspace }}/artifacts
VENV_DIR: ${{ github.workspace }}/venv
IREE_TEST_FILES: ~/iree_tests_cache
IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite
steps:
- name: Checking out IREE repository
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
submodules: false
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
with:
# Must match the subset of versions built in pkgci_build_packages.
python-version: '3.11'
- uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # v3.0.2
with:
name: linux_x86_64_release_packages
path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
- name: Setup venv
run: |
./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
--artifact-path=${PACKAGE_DOWNLOAD_DIR} \
--fetch-gh-workflow=${{ inputs.artifact_run_id }}
# Out of tree tests
- name: Check out external TestSuite repository
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
repository: nod-ai/SHARK-TestSuite
ref: 337083616ae6f596c0206a9edd1c47e8afc0e400
path: SHARK-TestSuite
submodules: false
lfs: true
- name: Install external TestSuite Python requirements
run: |
source ${VENV_DIR}/bin/activate
python -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt
- name: Download remote files for real weight model tests
run: |
source ${VENV_DIR}/bin/activate
python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir pytorch/models
- name: Run external tests - Models with real weights
run: |
source ${VENV_DIR}/bin/activate
pytest SHARK-TestSuite/iree_tests/pytorch/models \
-rpfE \
-k real_weights \
--no-skip-tests-missing-files \
--capture=no \
--timeout=1200 \
--retries 2 \
--retry-delay 5 \
--durations=0 \
--config-files=build_tools/pkgci/external_test_suite/gpu_rocm_models_gfx90a.json
- name: Run external tests - Models with real weights and additional flags
run: |
source ${VENV_DIR}/bin/activate
pytest SHARK-TestSuite/iree_tests/pytorch/models \
-rpfE \
-k real_weights \
--no-skip-tests-missing-files \
--capture=no \
--timeout=1200 \
--retries 2 \
--retry-delay 5 \
--durations=0 \
--config-files=build_tools/pkgci/external_test_suite/gpu_rocm_models_additional_flags_gfx90a.json
- name: "Running real weight model tests scheduled unet"
run: |
source ${VENV_DIR}/bin/activate
pytest SHARK-TestSuite/iree_tests/pytorch/models/sdxl-scheduled-unet-3-tank \
-rpfE \
-k real_weights \
--no-skip-tests-missing-files \
--capture=no \
--timeout=1200 \
--retries 2 \
--retry-delay 5 \
--durations=0 \
--config-files=build_tools/pkgci/external_test_suite/sdxl_scheduled_unet_gpu_rocm_gfx90a.json
- name: "Running SDXL rocm pipeline benchmark"
run: |
source ${VENV_DIR}/bin/activate
bash SHARK-TestSuite/iree_tests/benchmarks/benchmark_sdxl_rocm.sh