Move VMVX benchmarks to x86_64 runner (#15969)
This reduces the Pixel 6 benchmarks from 1h15mins to 51mins.
The x86_64 benchmarks with sharding will take 37mins:
https://github.com/openxla/iree/actions/runs/7279522200
benchmark-extra: android-cpu-dt-only,x86_64-dt-only
diff --git a/.github/workflows/benchmark_large.yml b/.github/workflows/benchmark_large.yml
index 7026923..de14b76 100644
--- a/.github/workflows/benchmark_large.yml
+++ b/.github/workflows/benchmark_large.yml
@@ -21,7 +21,7 @@
The reserved keyword `default` assigns a shard count to all target devices
that are not explicitly listed.
# Please keep this default value in sync with the jobs.build_e2e_test_artifacts.with.shard-count field below
- default: a2-highgpu-1g=1,c2-standard-16=2,default=1
+ default: a2-highgpu-1g=1,c2-standard-60=2,default=1
type: string
concurrency:
@@ -76,7 +76,7 @@
benchmark-presets: cuda-large,comp-stats-large,x86_64-large
build-default-benchmark-suites: 0
# Please keep the shard count default value in sync with on.workflow_dispatch.shard-count.default
- shard-count: ${{ inputs && inputs.shard_count || 'a2-highgpu-1g=1,c2-standard-16=2,default=1' }}
+ shard-count: ${{ inputs && inputs.shard_count || 'a2-highgpu-1g=1,c2-standard-60=2,default=1' }}
compilation_benchmarks:
needs: [setup, build_e2e_test_artifacts]
diff --git a/.github/workflows/build_e2e_test_artifacts.yml b/.github/workflows/build_e2e_test_artifacts.yml
index 6b8a79d..9715b47 100644
--- a/.github/workflows/build_e2e_test_artifacts.yml
+++ b/.github/workflows/build_e2e_test_artifacts.yml
@@ -111,6 +111,7 @@
--env "IREE_HOST_BIN_DIR=${HOST_BUILD_DIR}/install/bin" \
--env "IREE_BENCHMARK_PRESETS=${IREE_BENCHMARK_PRESETS}" \
--env "IREE_BUILD_DEFAULT_BENCHMARK_SUITES=${IREE_BUILD_DEFAULT_BENCHMARK_SUITES}" \
+ --env "IREE_SHARD_COUNT=${IREE_SHARD_COUNT}" \
gcr.io/iree-oss/frontends@sha256:b654dffe5b69d35f3182ffe1a41be98e3f32bc7843b6f10829a8eb2aa6a345ee \
build_tools/cmake/build_e2e_test_artifacts.sh \
"${BUILD_E2E_TEST_ARTIFACTS_DIR}"
diff --git a/build_tools/python/benchmark_suites/iree/benchmark_collections.py b/build_tools/python/benchmark_suites/iree/benchmark_collections.py
index e4754d9..f309843 100644
--- a/build_tools/python/benchmark_suites/iree/benchmark_collections.py
+++ b/build_tools/python/benchmark_suites/iree/benchmark_collections.py
@@ -106,7 +106,7 @@
adreno_benchmarks.Android_Adreno_Benchmarks(),
mali_benchmarks.Android_Mali_Benchmarks(),
vulkan_nvidia_benchmarks.Linux_Vulkan_NVIDIA_Benchmarks(),
- vmvx_benchmarks.Android_VMVX_Benchmarks(),
+ vmvx_benchmarks.VMVX_Benchmarks(),
]
all_run_configs: List[iree_definitions.E2EModelRunConfig] = []
for benchmark in benchmarks:
diff --git a/build_tools/python/benchmark_suites/iree/module_execution_configs.py b/build_tools/python/benchmark_suites/iree/module_execution_configs.py
index 3a0dc03..2d076b4 100644
--- a/build_tools/python/benchmark_suites/iree/module_execution_configs.py
+++ b/build_tools/python/benchmark_suites/iree/module_execution_configs.py
@@ -109,14 +109,3 @@
driver=iree_definitions.RuntimeDriver.LOCAL_TASK,
extra_flags=[f"--task_topology_group_count={thread_num}"],
)
-
-
-def get_vmvx_system_scheduling_local_task_config(thread_num: int):
- config_id = f"{unique_ids.IREE_MODULE_EXECUTION_CONFIG_VMVX_SYS_SCHED_LOCAL_TASK_BASE}-{thread_num}"
- return _with_caching_allocator(
- id=config_id,
- tags=[f"{thread_num}-thread", "full-inference", "system-scheduling"],
- loader=iree_definitions.RuntimeLoader.VMVX_MODULE,
- driver=iree_definitions.RuntimeDriver.LOCAL_TASK,
- extra_flags=[f"--task_topology_group_count={thread_num}"],
- )
diff --git a/build_tools/python/benchmark_suites/iree/vmvx_benchmarks.py b/build_tools/python/benchmark_suites/iree/vmvx_benchmarks.py
index 6638b80..6a5b7c1 100644
--- a/build_tools/python/benchmark_suites/iree/vmvx_benchmarks.py
+++ b/build_tools/python/benchmark_suites/iree/vmvx_benchmarks.py
@@ -5,7 +5,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""Defines IREE VMVX benchmarks."""
-from typing import List, Tuple
+from typing import List
from benchmark_suites.iree import benchmark_presets, module_execution_configs, utils
from e2e_test_framework import unique_ids
@@ -14,8 +14,8 @@
from e2e_test_framework.models import tflite_models
-class Android_VMVX_Benchmarks(object):
- """Benchmarks VMVX on Android devices."""
+class VMVX_Benchmarks(object):
+ """Benchmarks with VMVX backend on different platforms."""
VMVX_CPU_TARGET = iree_definitions.CompileTarget(
target_backend=iree_definitions.TargetBackend.VMVX,
@@ -40,23 +40,20 @@
)
for model in [tflite_models.MOBILENET_V2, tflite_models.MOBILENET_V3SMALL]
]
- default_execution_configs = [
- module_execution_configs.get_vmvx_system_scheduling_local_task_config(
- thread_num=2
- )
+ local_task_exec_configs = [
+ module_execution_configs.get_vmvx_local_task_config(thread_num=8)
]
- big_cores_devices = (
+ cascadelake_devices = (
device_collections.DEFAULT_DEVICE_COLLECTION.query_device_specs(
- architecture=common_definitions.DeviceArchitecture.ARMV8_2_A_GENERIC,
- host_environment=common_definitions.HostEnvironment.ANDROID_ARMV8_2_A,
- tags=["big-cores"],
+ architecture=common_definitions.DeviceArchitecture.X86_64_CASCADELAKE,
+ host_environment=common_definitions.HostEnvironment.LINUX_X86_64,
)
)
run_configs = utils.generate_e2e_model_run_configs(
module_generation_configs=gen_configs,
- module_execution_configs=default_execution_configs,
- device_specs=big_cores_devices,
- presets=[benchmark_presets.ANDROID_CPU],
+ module_execution_configs=local_task_exec_configs,
+ device_specs=cascadelake_devices,
+ presets=[benchmark_presets.X86_64],
)
return run_configs
diff --git a/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake b/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake
index eec2b6b..1cbbde3 100644
--- a/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake
+++ b/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake
@@ -4289,8 +4289,6 @@
${PACKAGE_NAME}_iree-module-MobileBertSquad_fp32_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__experimental-flags_no-dt_
${PACKAGE_NAME}_iree-module-MobileBertSquad_int8_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__default-flags_dt-uk_
${PACKAGE_NAME}_iree-module-MobileBertSquad_int8_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__experimental-flags_no-dt_
- ${PACKAGE_NAME}_iree-module-MobileNetV2_fp32_tflite___vmvx-generic-vmvx-vmvx__experimental-flags_
- ${PACKAGE_NAME}_iree-module-MobileNetV3Small_fp32_tflite___vmvx-generic-vmvx-vmvx__experimental-flags_
${PACKAGE_NAME}_iree-module-Vit_int8_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__default-flags_dt-uk_
${PACKAGE_NAME}_iree-module-Vit_int8_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__experimental-flags_no-dt_
)
@@ -4643,10 +4641,12 @@
${PACKAGE_NAME}_iree-module-MobileBertSquad_int8_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__experimental-flags_no-dt_
${PACKAGE_NAME}_iree-module-MobileNetV1_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__default-flags_dt-uk_
${PACKAGE_NAME}_iree-module-MobileNetV1_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__experimental-flags_no-dt_
+ ${PACKAGE_NAME}_iree-module-MobileNetV2_fp32_tflite___vmvx-generic-vmvx-vmvx__experimental-flags_
${PACKAGE_NAME}_iree-module-MobileNetV2_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__default-flags_dt-uk_
${PACKAGE_NAME}_iree-module-MobileNetV2_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__experimental-flags_no-dt_
${PACKAGE_NAME}_iree-module-MobileNetV2_int8_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__default-flags_dt-uk_
${PACKAGE_NAME}_iree-module-MobileNetV2_int8_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__experimental-flags_no-dt_
+ ${PACKAGE_NAME}_iree-module-MobileNetV3Small_fp32_tflite___vmvx-generic-vmvx-vmvx__experimental-flags_
${PACKAGE_NAME}_iree-module-MobileNetV3Small_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__default-flags_dt-uk_
${PACKAGE_NAME}_iree-module-MobileNetV3Small_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__experimental-flags_no-dt_
${PACKAGE_NAME}_iree-module-MobileSSD_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__default-flags_dt-uk_