Enable MI300 CI testing. (#17842)
This commit enables mi300 gpu and model testing.
ci-exactly: build_all, test_amd_mi300, build_packages, regression_test
---------
Signed-off-by: saienduri <saimanas.enduri@amd.com>
Co-authored-by: Scott Todd <scott.todd0@gmail.com>
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8c69c43..74b3d90 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -430,6 +430,50 @@
run: |
./build_tools/cmake/ctest_all.sh ${BUILD_DIR}
+ test_amd_mi300:
+ needs: [setup, build_all]
+ if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_amd_mi300')
+ env:
+ BUILD_DIR: build-tests
+ INSTALL_DIR: ${{ needs.build_all.outputs.install-dir }}
+ INSTALL_DIR_ARCHIVE: ${{ needs.build_all.outputs.install-dir-archive }}
+ INSTALL_DIR_GCS_URL: ${{ needs.build_all.outputs.install-dir-gcs-url }}
+ IREE_CPU_DISABLE: 1
+ IREE_VULKAN_DISABLE: 1
+ IREE_CUDA_DISABLE: 1
+ IREE_HIP_DISABLE: 0
+ IREE_HIP_TEST_TARGET_CHIP: "gfx942"
+ LD_LIBRARY_PATH: /home/esaimana/Python-3.11.9
+ runs-on: nodai-amdgpu-mi300-x86-64
+ steps:
+ - name: Pre Checkout MI300 Step
+ if: contains(matrix.name, 'gfx942')
+ run: |
+ sudo chmod -R 777 ~/actions-runner/_work
+ - name: "Checking out repository"
+ uses: actions/checkout@v4.1.7
+ - name: "Checking out runtime submodules"
+ run: ./build_tools/scripts/git/update_runtime_submodules.sh
+ - name: "Downloading install dir archive"
+ run: wget "${INSTALL_DIR_GCS_URL}" -O "${INSTALL_DIR_ARCHIVE}"
+ - name: "Extracting install directory"
+ run: tar -xf "${INSTALL_DIR_ARCHIVE}"
+ - name: "Building tests"
+ run: |
+ ./build_tools/pkgci/build_tests_using_package.sh ${INSTALL_DIR}
+ - name: "Running GPU tests"
+ env:
+ IREE_CTEST_LABEL_REGEX: ^requires-gpu|^driver=hip$
+ IREE_NVIDIA_SM80_TESTS_DISABLE: 1
+ IREE_MULTI_DEVICE_TESTS_DISABLE: 0
+ IREE_AMD_RDNA3_TESTS_DISABLE: 1
+ IREE_NVIDIA_GPU_TESTS_DISABLE: 0
+ IREE_CUDA_DISABLE: 1
+ IREE_CPU_DISABLE: 1
+ IREE_HIP_DISABLE: 0
+ run: |
+ ./build_tools/cmake/ctest_all.sh ${BUILD_DIR}
+
test_amd_w7900:
needs: [setup, build_all]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_amd_w7900')
@@ -939,6 +983,7 @@
- test_nvidia_gpu
- test_nvidia_a100
- test_amd_mi250
+ - test_amd_mi300
- test_amd_w7900
# Configurations
diff --git a/.github/workflows/pkgci_regression_test.yml b/.github/workflows/pkgci_regression_test.yml
index 99ee321..9050382 100644
--- a/.github/workflows/pkgci_regression_test.yml
+++ b/.github/workflows/pkgci_regression_test.yml
@@ -144,13 +144,20 @@
runs-on: nodai-amdgpu-w7900-x86-64
# AMD GPU
- - name: amdgpu_rocm_gfx90a
+ - name: amdgpu_rocm_mi250_gfx90a
models-config-file: models_gpu_rocm_gfx90a.json
models-extra-flags-config-file: models_gpu_rocm_gfx90a_additional_flags.json
sdxl-unet-config-file: sdxl_scheduled_unet_gpu_rocm_gfx90a.json
sdxl-vae-config-file: sdxl_vae_decode_gpu_rocm_gfx90a.json
sdxl-clip-config-file: sdxl_prompt_encoder_gpu_rocm_gfx90a.json
runs-on: nodai-amdgpu-mi250-x86-64
+ - name: amdgpu_rocm_mi300_gfx942
+ models-config-file: models_gpu_rocm_gfx942.json
+ models-extra-flags-config-file: models_gpu_rocm_gfx942_additional_flags.json
+ sdxl-unet-config-file: sdxl_scheduled_unet_gpu_rocm_gfx942.json
+ sdxl-vae-config-file: sdxl_vae_decode_gpu_rocm_gfx942.json
+ sdxl-clip-config-file: sdxl_prompt_encoder_gpu_rocm_gfx942.json
+ runs-on: nodai-amdgpu-mi300-x86-64
- name: amdgpu_vulkan
models-config-file: models_gpu_vulkan.json
runs-on: nodai-amdgpu-w7900-x86-64
@@ -174,7 +181,14 @@
SDXL_CLIP_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.sdxl-clip-config-file }}
SDXL_VAE_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.sdxl-vae-config-file }}
VENV_DIR: ${{ github.workspace }}/venv
+ LD_LIBRARY_PATH: /home/esaimana/Python-3.11.9
steps:
+ # TODO(saienduri): Find alternative to this temporary step that manipulates permission of github actions
+ # directory to be able to clean after every PR
+ - name: Pre Checkout MI300 Step
+ if: contains(matrix.name, 'gfx942')
+ run: |
+ sudo chmod -R 777 ~/actions-runner/_work
- name: Checking out IREE repository
uses: actions/checkout@v4.1.7
with:
@@ -293,8 +307,8 @@
--durations=0 \
--config-files=${SDXL_VAE_CONFIG_FILE_PATH}
- - name: "Running SDXL rocm pipeline benchmark"
- if: contains(matrix.name, 'rocm')
+ - name: "Running SDXL rocm pipeline benchmark (mi250)"
+ if: contains(matrix.name, 'rocm_mi250_gfx90a')
run: |
source ${VENV_DIR}/bin/activate
pytest SHARK-TestSuite/iree_tests/benchmarks/sdxl/benchmark_sdxl_rocm.py \
@@ -313,3 +327,25 @@
--log-cli-level=info \
--retries 7
echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY
+ rm job_summary.md
+
+ - name: "Running SDXL rocm pipeline benchmark (mi300)"
+ if: contains(matrix.name, 'rocm_mi300_gfx942')
+ run: |
+ source ${VENV_DIR}/bin/activate
+ pytest SHARK-TestSuite/iree_tests/benchmarks/sdxl/benchmark_sdxl_rocm.py \
+ --goldentime-rocm-e2e-ms 320 \
+ --goldentime-rocm-unet-ms 77 \
+ --goldentime-rocm-clip-ms 15 \
+ --goldentime-rocm-vae-ms 74 \
+ --goldendispatch-rocm-unet 1714 \
+ --goldendispatch-rocm-clip 1569 \
+ --goldendispatch-rocm-vae 248 \
+ --goldensize-rocm-unet-bytes 2054938 \
+ --goldensize-rocm-clip-bytes 780328 \
+ --goldensize-rocm-vae-bytes 758509 \
+ --gpu-number 0 \
+ --rocm-chip gfx942 \
+ --log-cli-level=info \
+ --retries 7
+ echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY
diff --git a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json
new file mode 100644
index 0000000..5d451f0
--- /dev/null
+++ b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942.json
@@ -0,0 +1,28 @@
+{
+ "config_name": "gpu_rocm",
+ "iree_compile_flags": [
+ "--iree-hal-target-backends=rocm",
+ "--iree-rocm-target-chip=gfx942",
+ "--iree-input-demote-f64-to-f32"
+ ],
+ "iree_run_module_flags": [
+ "--device=hip"
+ ],
+ "skip_compile_tests": [
+ "pytorch/models/sdxl-scheduled-unet-3-tank",
+ "pytorch/models/sdxl-prompt-encoder-tank",
+ "pytorch/models/sdxl-vae-decode-tank"
+ ],
+ "skip_run_tests": [],
+ "expected_compile_failures": [
+ // TODO(#17344): need to regenerate .mlirbc
+ "pytorch/models/opt-125M",
+ "pytorch/models/resnet50",
+ "pytorch/models/sdxl-vae-decode-tank",
+
+ // error: 'builtin.module' op failed to run transform dialect passes
+ // (transform spec file is specific to SDXL?)
+ "sharktank/llama/open-llama-3b-v2-f16"
+ ],
+ "expected_run_failures": []
+}
diff --git a/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942_additional_flags.json b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942_additional_flags.json
new file mode 100644
index 0000000..28950d0
--- /dev/null
+++ b/build_tools/pkgci/external_test_suite/models_gpu_rocm_gfx942_additional_flags.json
@@ -0,0 +1,25 @@
+{
+ "config_name": "gpu_rocm",
+ "iree_compile_flags": [
+ "--iree-hal-target-backends=rocm",
+ "--iree-rocm-target-chip=gfx942",
+ "--iree-input-demote-f64-to-f32",
+ "--iree-opt-const-eval=false",
+ "--iree-codegen-transform-dialect-library=${IREE_TEST_PATH_EXTENSION}/attention_and_matmul_spec.mlir"
+ ],
+ "iree_run_module_flags": [
+ "--device=hip"
+ ],
+ "skip_compile_tests": [
+ "pytorch/models/sdxl-scheduled-unet-3-tank",
+ "pytorch/models/sdxl-prompt-encoder-tank",
+ "pytorch/models/sdxl-vae-decode-tank"
+ ],
+ "skip_run_tests": [],
+ "expected_compile_failures": [
+ // TODO(#17344): need to regenerate .mlirbc
+ "pytorch/models/opt-125M",
+ "pytorch/models/resnet50"
+ ],
+ "expected_run_failures": []
+}
diff --git a/build_tools/pkgci/external_test_suite/sdxl_prompt_encoder_gpu_rocm_gfx942.json b/build_tools/pkgci/external_test_suite/sdxl_prompt_encoder_gpu_rocm_gfx942.json
new file mode 100644
index 0000000..e3dbc9b
--- /dev/null
+++ b/build_tools/pkgci/external_test_suite/sdxl_prompt_encoder_gpu_rocm_gfx942.json
@@ -0,0 +1,36 @@
+{
+ "config_name": "gpu_rocm",
+ "iree_compile_flags": [
+ "--iree-hal-target-backends=rocm",
+ "--iree-rocm-target-chip=gfx942",
+ "--iree-input-type=torch",
+ "--iree-opt-const-eval=false",
+ "--iree-global-opt-propagate-transposes=true",
+ "--iree-opt-outer-dim-concat=true",
+ "--iree-rocm-waves-per-eu=2",
+ "--iree-llvmgpu-enable-prefetch",
+ "--iree-flow-enable-aggressive-fusion",
+ "--iree-global-opt-enable-fuse-horizontal-contractions=true",
+ "--iree-opt-aggressively-propagate-transposes=true",
+ "--iree-codegen-llvmgpu-use-vector-distribution=true",
+ "--iree-execution-model=async-external",
+ "--iree-preprocessing-pass-pipeline=builtin.module(iree-preprocessing-transpose-convolution-pipeline, util.func(iree-preprocessing-pad-to-intrinsics{pad-target-type=conv}))",
+ "--iree-scheduling-dump-statistics-format=json",
+ "--iree-scheduling-dump-statistics-file=compilation_info.json"
+ ],
+ "iree_run_module_flags": [
+ "--device=hip",
+ "--parameters=model=real_weights.irpa",
+ "--input=1x64xi64=@inference_input.0.bin",
+ "--input=1x64xi64=@inference_input.1.bin",
+ "--input=1x64xi64=@inference_input.2.bin",
+ "--input=1x64xi64=@inference_input.3.bin",
+ "--expected_output=2x64x2048xf16=@inference_output.0.bin",
+ "--expected_output=2x1280xf16=@inference_output.1.bin",
+ "--expected_f16_threshold=1.0f"
+ ],
+ "skip_compile_tests": [],
+ "skip_run_tests": [],
+ "expected_compile_failures": [],
+ "expected_run_failures": []
+}
diff --git a/build_tools/pkgci/external_test_suite/sdxl_scheduled_unet_gpu_rocm_gfx942.json b/build_tools/pkgci/external_test_suite/sdxl_scheduled_unet_gpu_rocm_gfx942.json
new file mode 100644
index 0000000..289e99b
--- /dev/null
+++ b/build_tools/pkgci/external_test_suite/sdxl_scheduled_unet_gpu_rocm_gfx942.json
@@ -0,0 +1,41 @@
+{
+ "config_name": "gpu_rocm",
+ "iree_compile_flags" : [
+ "--iree-hal-target-backends=rocm",
+ "--iree-rocm-target-chip=gfx942",
+ "--iree-opt-const-eval=false",
+ "--iree-codegen-transform-dialect-library=${IREE_TEST_PATH_EXTENSION}/attention_and_matmul_spec.mlir",
+ "--iree-global-opt-propagate-transposes=true",
+ "--iree-global-opt-enable-fuse-horizontal-contractions=true",
+ "--iree-flow-enable-aggressive-fusion=true",
+ "--iree-opt-aggressively-propagate-transposes=true",
+ "--iree-opt-outer-dim-concat=true",
+ "--iree-vm-target-truncate-unsupported-floats",
+ "--iree-llvmgpu-enable-prefetch=true",
+ "--iree-opt-data-tiling=false",
+ "--iree-codegen-gpu-native-math-precision=true",
+ "--iree-codegen-llvmgpu-use-vector-distribution",
+ "--iree-rocm-waves-per-eu=2",
+ "--iree-execution-model=async-external",
+ "--iree-preprocessing-pass-pipeline=builtin.module(iree-preprocessing-transpose-convolution-pipeline, util.func(iree-preprocessing-pad-to-intrinsics))",
+ "--iree-scheduling-dump-statistics-format=json",
+ "--iree-scheduling-dump-statistics-file=compilation_info.json"
+ ],
+ "iree_run_module_flags": [
+ "--device=hip",
+ "--parameters=model=real_weights.irpa",
+ "--module=sdxl_scheduled_unet_pipeline_fp16_rocm.vmfb",
+ "--input=1x4x128x128xf16=@inference_input.0.bin",
+ "--input=2x64x2048xf16=@inference_input.1.bin",
+ "--input=2x1280xf16=@inference_input.2.bin",
+ "--input=1xf16=@inference_input.3.bin",
+ "--expected_output=1x4x128x128xf16=@inference_output.0.bin",
+ "--expected_f16_threshold=0.7f"
+ ],
+ "skip_compile_tests": [],
+ "skip_run_tests": [],
+ "expected_compile_failures": [],
+ "expected_run_failures": [
+ "pytorch/models/sdxl-scheduled-unet-3-tank",
+ ]
+}
diff --git a/build_tools/pkgci/external_test_suite/sdxl_vae_decode_gpu_rocm_gfx942.json b/build_tools/pkgci/external_test_suite/sdxl_vae_decode_gpu_rocm_gfx942.json
new file mode 100644
index 0000000..1ea7251
--- /dev/null
+++ b/build_tools/pkgci/external_test_suite/sdxl_vae_decode_gpu_rocm_gfx942.json
@@ -0,0 +1,29 @@
+{
+ "config_name": "gpu_rocm",
+ "iree_compile_flags" : [
+ "--iree-hal-target-backends=rocm",
+ "--iree-rocm-target-chip=gfx942",
+ "--iree-opt-const-eval=false",
+ "--iree-global-opt-propagate-transposes=true",
+ "--iree-opt-outer-dim-concat=true",
+ "--iree-llvmgpu-enable-prefetch=true",
+ "--iree-rocm-waves-per-eu=2",
+ "--iree-flow-enable-aggressive-fusion",
+ "--iree-codegen-llvmgpu-use-vector-distribution=true",
+ "--iree-execution-model=async-external",
+ "--iree-preprocessing-pass-pipeline=builtin.module(iree-preprocessing-transpose-convolution-pipeline, util.func(iree-preprocessing-pad-to-intrinsics))",
+ "--iree-scheduling-dump-statistics-format=json",
+ "--iree-scheduling-dump-statistics-file=compilation_info.json"
+ ],
+ "iree_run_module_flags": [
+ "--device=hip",
+ "--parameters=model=real_weights.irpa",
+ "--input=1x4x128x128xf16=@inference_input.0.bin",
+ "--expected_output=1x3x1024x1024xf16=@inference_output.0.bin",
+ "--expected_f16_threshold=0.4f"
+ ],
+ "skip_compile_tests": [],
+ "skip_run_tests": [],
+ "expected_compile_failures": [],
+ "expected_run_failures": []
+}