Disable workflows still relying on GCP self-hosted runners. (#18526)
See https://github.com/iree-org/iree/issues/18238.
We've finished migrating most load bearing workflows to use a new
cluster of self-hosted runners. These workflows are still using GCP
runners and are disabled:
* `build_test_all_bazel`: this may work on the new cluster using the
existing `gcr.io/iree-oss/base-bleeding-edge` dockerfile, but it uses
some remote cache storage on GCP and I want to migrate that to
https://github.com/iree-org/base-docker-images/. Need to take some time
to install deps, evaluate build times with/without a remote cache, etc.
* `test_nvidia_t4`, `nvidiagpu_cuda`, `nvidiagpu_vulkan`: we'll try to
spin up some VMs in the new cluster / cloud project with similar GPUs.
That's a high priority for us, so maybe within a few weeks.
Additionally, these workflows are still enabled but we should find a
longer term solution for them:
* `linux_arm64_clang` this is still enabled in code... for now. We can
disable
https://github.com/iree-org/iree/actions/workflows/ci_linux_arm64_clang.yml
from the UI
* arm64 packages are also still enabled:
https://github.com/iree-org/iree/blob/cc891ba8e7da3a3ef1c8650a66af0aa53ceed06b/.github/workflows/build_package.yml#L46-L50
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 31cd87f..0a900e8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -56,28 +56,29 @@
# Jobs that build all of IREE "normally"
##############################################################################
- build_test_all_bazel:
- needs: setup
- if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'build_test_all_bazel')
- runs-on:
- - self-hosted # must come first
- - runner-group=${{ needs.setup.outputs.runner-group }}
- - environment=${{ needs.setup.outputs.runner-env }}
- - cpu
- - os-family=Linux
- steps:
- - name: "Checking out repository"
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- with:
- submodules: true
- - name: "Building and testing with Bazel"
- env:
- IREE_WRITE_REMOTE_BAZEL_CACHE: ${{ needs.setup.outputs.write-caches }}
- run: |
- ./build_tools/github_actions/docker_run.sh \
- --env "IREE_WRITE_REMOTE_BAZEL_CACHE=${IREE_WRITE_REMOTE_BAZEL_CACHE}" \
- gcr.io/iree-oss/base-bleeding-edge@sha256:cf2e78194e64fd0166f4141317366261d7a62432b72e9a324cb8c2ff4e1a515a \
- ./build_tools/bazel/build_test_all.sh
+ # TODO(#18238): migrate to new runner cluster
+ # build_test_all_bazel:
+ # needs: setup
+ # if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'build_test_all_bazel')
+ # runs-on:
+ # - self-hosted # must come first
+ # - runner-group=${{ needs.setup.outputs.runner-group }}
+ # - environment=${{ needs.setup.outputs.runner-env }}
+ # - cpu
+ # - os-family=Linux
+ # steps:
+ # - name: "Checking out repository"
+ # uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+ # with:
+ # submodules: true
+ # - name: "Building and testing with Bazel"
+ # env:
+ # IREE_WRITE_REMOTE_BAZEL_CACHE: ${{ needs.setup.outputs.write-caches }}
+ # run: |
+ # ./build_tools/github_actions/docker_run.sh \
+ # --env "IREE_WRITE_REMOTE_BAZEL_CACHE=${IREE_WRITE_REMOTE_BAZEL_CACHE}" \
+ # gcr.io/iree-oss/base-bleeding-edge@sha256:cf2e78194e64fd0166f4141317366261d7a62432b72e9a324cb8c2ff4e1a515a \
+ # ./build_tools/bazel/build_test_all.sh
############################### Configurations ###############################
# Jobs that build IREE in some non-default configuration
@@ -327,7 +328,7 @@
- setup
# Toolchains
- - build_test_all_bazel
+ # - build_test_all_bazel # Currently disabled.
# Accelerators
# - test_nvidia_a100
diff --git a/.github/workflows/ci_linux_arm64_clang.yml b/.github/workflows/ci_linux_arm64_clang.yml
index 5539a2c..3772279 100644
--- a/.github/workflows/ci_linux_arm64_clang.yml
+++ b/.github/workflows/ci_linux_arm64_clang.yml
@@ -24,8 +24,8 @@
setup:
uses: ./.github/workflows/setup.yml
- # TODO: Switch to GitHub-hosted arm64 runners when available?
- # TODO: Switch runs-on labels to use different (non-GCP) self-hosted runners?
+ # TODO(#18238): migrate to new runner cluster
+ # Or switch to GitHub-hosted arm64 runners when available?
linux_arm64_clang:
needs: setup
runs-on:
diff --git a/.github/workflows/pkgci.yml b/.github/workflows/pkgci.yml
index b96cc70..5c98cda 100644
--- a/.github/workflows/pkgci.yml
+++ b/.github/workflows/pkgci.yml
@@ -73,11 +73,12 @@
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_amd_w7900')
uses: ./.github/workflows/pkgci_test_amd_w7900.yml
- test_nvidia_t4:
- name: Test NVIDIA T4
- needs: [setup, build_packages]
- if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_nvidia_t4')
- uses: ./.github/workflows/pkgci_test_nvidia_t4.yml
+ # TODO(#18238): migrate to new runner cluster
+ # test_nvidia_t4:
+ # name: Test NVIDIA T4
+ # needs: [setup, build_packages]
+ # if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_nvidia_t4')
+ # uses: ./.github/workflows/pkgci_test_nvidia_t4.yml
test_android:
name: Test Android
diff --git a/.github/workflows/pkgci_test_nvidia_t4.yml b/.github/workflows/pkgci_test_nvidia_t4.yml
index f8cbf0f..b59cb20 100644
--- a/.github/workflows/pkgci_test_nvidia_t4.yml
+++ b/.github/workflows/pkgci_test_nvidia_t4.yml
@@ -19,6 +19,7 @@
jobs:
test_t4:
+ # TODO(#18238): migrate to new runner cluster
runs-on:
- self-hosted # must come first
- runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
diff --git a/.github/workflows/pkgci_test_onnx.yml b/.github/workflows/pkgci_test_onnx.yml
index 46b0085..1d6b890 100644
--- a/.github/workflows/pkgci_test_onnx.yml
+++ b/.github/workflows/pkgci_test_onnx.yml
@@ -42,24 +42,25 @@
runs-on: nodai-amdgpu-w7900-x86-64
# NVIDIA GPU
- - name: nvidiagpu_cuda
- config-file: onnx_ops_gpu_cuda.json
- numprocesses: 4
- runs-on:
- - self-hosted # must come first
- - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
- - environment=prod
- - gpu # TODO(scotttodd): qualify further with vendor/model
- - os-family=Linux
- - name: nvidiagpu_vulkan
- config-file: onnx_ops_gpu_vulkan.json
- numprocesses: 4
- runs-on:
- - self-hosted # must come first
- - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
- - environment=prod
- - gpu # TODO(scotttodd): qualify further with vendor/model
- - os-family=Linux
+ # TODO(#18238): migrate to new runner cluster
+ # - name: nvidiagpu_cuda
+ # config-file: onnx_ops_gpu_cuda.json
+ # numprocesses: 4
+ # runs-on:
+ # - self-hosted # must come first
+ # - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
+ # - environment=prod
+ # - gpu # TODO(scotttodd): qualify further with vendor/model
+ # - os-family=Linux
+ # - name: nvidiagpu_vulkan
+ # config-file: onnx_ops_gpu_vulkan.json
+ # numprocesses: 4
+ # runs-on:
+ # - self-hosted # must come first
+ # - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
+ # - environment=prod
+ # - gpu # TODO(scotttodd): qualify further with vendor/model
+ # - os-family=Linux
env:
PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
CONFIG_FILE_PATH: tests/external/iree-test-suites/onnx_ops/${{ matrix.config-file }}