Move VMVX benchmarks to x86_64 runner (#15969)

This reduces the Pixel 6 benchmarks from 1h15mins to 51mins.

The x86_64 benchmarks with sharding will take 37mins:
https://github.com/openxla/iree/actions/runs/7279522200

benchmark-extra: android-cpu-dt-only,x86_64-dt-only
diff --git a/.github/workflows/benchmark_large.yml b/.github/workflows/benchmark_large.yml
index 7026923..de14b76 100644
--- a/.github/workflows/benchmark_large.yml
+++ b/.github/workflows/benchmark_large.yml
@@ -21,7 +21,7 @@
           The reserved keyword `default` assigns a shard count to all target devices
           that are not explicitly listed.
         # Please keep this default value in sync with the jobs.build_e2e_test_artifacts.with.shard-count field below
-        default: a2-highgpu-1g=1,c2-standard-16=2,default=1
+        default: a2-highgpu-1g=1,c2-standard-60=2,default=1
         type: string
 
 concurrency:
@@ -76,7 +76,7 @@
       benchmark-presets: cuda-large,comp-stats-large,x86_64-large
       build-default-benchmark-suites: 0
       # Please keep the shard count default value in sync with on.workflow_dispatch.shard-count.default
-      shard-count: ${{ inputs && inputs.shard_count || 'a2-highgpu-1g=1,c2-standard-16=2,default=1' }}
+      shard-count: ${{ inputs && inputs.shard_count || 'a2-highgpu-1g=1,c2-standard-60=2,default=1' }}
 
   compilation_benchmarks:
     needs: [setup, build_e2e_test_artifacts]
diff --git a/.github/workflows/build_e2e_test_artifacts.yml b/.github/workflows/build_e2e_test_artifacts.yml
index 6b8a79d..9715b47 100644
--- a/.github/workflows/build_e2e_test_artifacts.yml
+++ b/.github/workflows/build_e2e_test_artifacts.yml
@@ -111,6 +111,7 @@
             --env "IREE_HOST_BIN_DIR=${HOST_BUILD_DIR}/install/bin" \
             --env "IREE_BENCHMARK_PRESETS=${IREE_BENCHMARK_PRESETS}" \
             --env "IREE_BUILD_DEFAULT_BENCHMARK_SUITES=${IREE_BUILD_DEFAULT_BENCHMARK_SUITES}" \
+            --env "IREE_SHARD_COUNT=${IREE_SHARD_COUNT}" \
             gcr.io/iree-oss/frontends@sha256:b654dffe5b69d35f3182ffe1a41be98e3f32bc7843b6f10829a8eb2aa6a345ee \
             build_tools/cmake/build_e2e_test_artifacts.sh \
             "${BUILD_E2E_TEST_ARTIFACTS_DIR}"
diff --git a/build_tools/python/benchmark_suites/iree/benchmark_collections.py b/build_tools/python/benchmark_suites/iree/benchmark_collections.py
index e4754d9..f309843 100644
--- a/build_tools/python/benchmark_suites/iree/benchmark_collections.py
+++ b/build_tools/python/benchmark_suites/iree/benchmark_collections.py
@@ -106,7 +106,7 @@
         adreno_benchmarks.Android_Adreno_Benchmarks(),
         mali_benchmarks.Android_Mali_Benchmarks(),
         vulkan_nvidia_benchmarks.Linux_Vulkan_NVIDIA_Benchmarks(),
-        vmvx_benchmarks.Android_VMVX_Benchmarks(),
+        vmvx_benchmarks.VMVX_Benchmarks(),
     ]
     all_run_configs: List[iree_definitions.E2EModelRunConfig] = []
     for benchmark in benchmarks:
diff --git a/build_tools/python/benchmark_suites/iree/module_execution_configs.py b/build_tools/python/benchmark_suites/iree/module_execution_configs.py
index 3a0dc03..2d076b4 100644
--- a/build_tools/python/benchmark_suites/iree/module_execution_configs.py
+++ b/build_tools/python/benchmark_suites/iree/module_execution_configs.py
@@ -109,14 +109,3 @@
         driver=iree_definitions.RuntimeDriver.LOCAL_TASK,
         extra_flags=[f"--task_topology_group_count={thread_num}"],
     )
-
-
-def get_vmvx_system_scheduling_local_task_config(thread_num: int):
-    config_id = f"{unique_ids.IREE_MODULE_EXECUTION_CONFIG_VMVX_SYS_SCHED_LOCAL_TASK_BASE}-{thread_num}"
-    return _with_caching_allocator(
-        id=config_id,
-        tags=[f"{thread_num}-thread", "full-inference", "system-scheduling"],
-        loader=iree_definitions.RuntimeLoader.VMVX_MODULE,
-        driver=iree_definitions.RuntimeDriver.LOCAL_TASK,
-        extra_flags=[f"--task_topology_group_count={thread_num}"],
-    )
diff --git a/build_tools/python/benchmark_suites/iree/vmvx_benchmarks.py b/build_tools/python/benchmark_suites/iree/vmvx_benchmarks.py
index 6638b80..6a5b7c1 100644
--- a/build_tools/python/benchmark_suites/iree/vmvx_benchmarks.py
+++ b/build_tools/python/benchmark_suites/iree/vmvx_benchmarks.py
@@ -5,7 +5,7 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 """Defines IREE VMVX benchmarks."""
 
-from typing import List, Tuple
+from typing import List
 
 from benchmark_suites.iree import benchmark_presets, module_execution_configs, utils
 from e2e_test_framework import unique_ids
@@ -14,8 +14,8 @@
 from e2e_test_framework.models import tflite_models
 
 
-class Android_VMVX_Benchmarks(object):
-    """Benchmarks VMVX on Android devices."""
+class VMVX_Benchmarks(object):
+    """Benchmarks with VMVX backend on different platforms."""
 
     VMVX_CPU_TARGET = iree_definitions.CompileTarget(
         target_backend=iree_definitions.TargetBackend.VMVX,
@@ -40,23 +40,20 @@
             )
             for model in [tflite_models.MOBILENET_V2, tflite_models.MOBILENET_V3SMALL]
         ]
-        default_execution_configs = [
-            module_execution_configs.get_vmvx_system_scheduling_local_task_config(
-                thread_num=2
-            )
+        local_task_exec_configs = [
+            module_execution_configs.get_vmvx_local_task_config(thread_num=8)
         ]
-        big_cores_devices = (
+        cascadelake_devices = (
             device_collections.DEFAULT_DEVICE_COLLECTION.query_device_specs(
-                architecture=common_definitions.DeviceArchitecture.ARMV8_2_A_GENERIC,
-                host_environment=common_definitions.HostEnvironment.ANDROID_ARMV8_2_A,
-                tags=["big-cores"],
+                architecture=common_definitions.DeviceArchitecture.X86_64_CASCADELAKE,
+                host_environment=common_definitions.HostEnvironment.LINUX_X86_64,
             )
         )
         run_configs = utils.generate_e2e_model_run_configs(
             module_generation_configs=gen_configs,
-            module_execution_configs=default_execution_configs,
-            device_specs=big_cores_devices,
-            presets=[benchmark_presets.ANDROID_CPU],
+            module_execution_configs=local_task_exec_configs,
+            device_specs=cascadelake_devices,
+            presets=[benchmark_presets.X86_64],
         )
 
         return run_configs
diff --git a/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake b/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake
index eec2b6b..1cbbde3 100644
--- a/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake
+++ b/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake
@@ -4289,8 +4289,6 @@
   ${PACKAGE_NAME}_iree-module-MobileBertSquad_fp32_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__experimental-flags_no-dt_
   ${PACKAGE_NAME}_iree-module-MobileBertSquad_int8_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__default-flags_dt-uk_
   ${PACKAGE_NAME}_iree-module-MobileBertSquad_int8_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__experimental-flags_no-dt_
-  ${PACKAGE_NAME}_iree-module-MobileNetV2_fp32_tflite___vmvx-generic-vmvx-vmvx__experimental-flags_
-  ${PACKAGE_NAME}_iree-module-MobileNetV3Small_fp32_tflite___vmvx-generic-vmvx-vmvx__experimental-flags_
   ${PACKAGE_NAME}_iree-module-Vit_int8_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__default-flags_dt-uk_
   ${PACKAGE_NAME}_iree-module-Vit_int8_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__experimental-flags_no-dt_
 )
@@ -4643,10 +4641,12 @@
   ${PACKAGE_NAME}_iree-module-MobileBertSquad_int8_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__experimental-flags_no-dt_
   ${PACKAGE_NAME}_iree-module-MobileNetV1_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__default-flags_dt-uk_
   ${PACKAGE_NAME}_iree-module-MobileNetV1_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__experimental-flags_no-dt_
+  ${PACKAGE_NAME}_iree-module-MobileNetV2_fp32_tflite___vmvx-generic-vmvx-vmvx__experimental-flags_
   ${PACKAGE_NAME}_iree-module-MobileNetV2_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__default-flags_dt-uk_
   ${PACKAGE_NAME}_iree-module-MobileNetV2_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__experimental-flags_no-dt_
   ${PACKAGE_NAME}_iree-module-MobileNetV2_int8_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__default-flags_dt-uk_
   ${PACKAGE_NAME}_iree-module-MobileNetV2_int8_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__experimental-flags_no-dt_
+  ${PACKAGE_NAME}_iree-module-MobileNetV3Small_fp32_tflite___vmvx-generic-vmvx-vmvx__experimental-flags_
   ${PACKAGE_NAME}_iree-module-MobileNetV3Small_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__default-flags_dt-uk_
   ${PACKAGE_NAME}_iree-module-MobileNetV3Small_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__experimental-flags_no-dt_
   ${PACKAGE_NAME}_iree-module-MobileSSD_fp32_tflite___x86_64-cascadelake-linux_gnu-llvm_cpu__default-flags_dt-uk_