Revert "Add sharding to benchmark execution" (#14499)

Reverts openxla/iree#14084

It looks like we forgot to handle the sharding in
`build_tools/benchmarks/run_benchmarks_on_android.py`

See
https://github.com/openxla/iree/actions/runs/5673763887/job/15380463421#step:9:37

The fix should be simple but I suggest we revert first so the Android
benchmarks can run.
diff --git a/.github/workflows/benchmark_execution.yml b/.github/workflows/benchmark_execution.yml
index 26e60f5..2ee1645 100644
--- a/.github/workflows/benchmark_execution.yml
+++ b/.github/workflows/benchmark_execution.yml
@@ -69,38 +69,10 @@
         id: generate
         run: |
           gcloud storage cp "${BENCHMARK_CONFIG_GCS_ARTIFACT}" "${BENCHMARK_CONFIG}"
-          # This jq command takes a benchmark config with the following structure:
-          #
-          #   { "target_device_name" : {
-          #      "host_environment" : { ... },
-          #      "shards" : [ { ... }, { ... } ]
-          #     },
-          #     "other_target_device_name" : { ... }
-          #
-          # and turns it into a flat list of benchmark jobs:
-          #
-          #   [
-          #     { "device_name" : "target_device_name",
-          #       "host_environment" : { ... },
-          #       "shard" : { index: 0, count: 2 }
-          #     },
-          #     { "device_name" : "target_device_name",
-          #       "host_environment" : { ... },
-          #       "shard" : { index: 1, count: 2 }
-          #     },
-          #     { "device_name" : "other_target_device_name",
-          #       "host_environment" : { ... },
-          #       "shard" : { index: 0, count: N }
-          #     },
-          #     ...
-          #   ]
-          echo benchmark-matrix="$(jq -c '[ . | to_entries[]
-            | .key as $device_name
-            | .value.host_environment as $host_environment
-            | (.value.shards | length) as $count
-            | .value.shards[]
-            | {$device_name, $host_environment, shard: {index, $count}}
-            ]' "${BENCHMARK_CONFIG}")" >> "${GITHUB_OUTPUT}"
+          echo benchmark-matrix=$(jq \
+              'to_entries | map({"device_name": .key, "host_environment": .value.host_environment})' \
+              "${BENCHMARK_CONFIG}") \
+            >> "${GITHUB_OUTPUT}"
 
   run_benchmarks:
     needs: [generate_matrix]
@@ -119,8 +91,6 @@
       - machine-type=${{ matrix.benchmark.device_name }}
     env:
       DEVICE_NAME: ${{ matrix.benchmark.device_name }}
-      SHARD_INDEX: ${{ matrix.benchmark.shard.index }}
-      SHARD_COUNT: ${{ matrix.benchmark.shard.count }}
       PLATFORM_ARCH: ${{ matrix.benchmark.host_environment.platform }}-${{ matrix.benchmark.host_environment.architecture }}
       E2E_TEST_ARTIFACTS_GCS_ARTIFACT_DIR: ${{ inputs.e2e-test-artifacts-gcs-artifact-dir }}
       E2E_TEST_ARTIFACTS_DIR: ${{ inputs.e2e-test-artifacts-dir }}
@@ -152,9 +122,8 @@
           mkdir -p "${E2E_TEST_ARTIFACTS_DIR}"
           jq -r \
             --arg DEVICE_NAME "${DEVICE_NAME}" \
-            --arg SHARD_INDEX "${SHARD_INDEX}" \
             --arg GCS_ARTIFACT_DIR "${E2E_TEST_ARTIFACTS_GCS_ARTIFACT_DIR}" \
-            '.[$DEVICE_NAME].shards[($SHARD_INDEX | tonumber)] | .module_dir_paths[] | "\($GCS_ARTIFACT_DIR)/\(.)"' \
+            '.[$DEVICE_NAME] | .module_dir_paths | map("\($GCS_ARTIFACT_DIR)/\(.)") | join("\n")' \
             "${BENCHMARK_CONFIG}" | \
             gcloud storage cp -r --read-paths-from-stdin "${E2E_TEST_ARTIFACTS_DIR}"
           echo "benchmark-config=${BENCHMARK_CONFIG}" >> "${GITHUB_OUTPUT}"
@@ -170,14 +139,6 @@
           echo "normal-benchmark-tools-dir=${BENCHMARK_TOOLS_DIR}/build/tools" >> "${GITHUB_OUTPUT}"
           echo "traced-benchmark-tools-dir=${BENCHMARK_TOOLS_DIR}/build-traced/tools" >> "${GITHUB_OUTPUT}"
           echo "tracy-capture-tool=${BENCHMARK_TOOLS_DIR}/build-traced/tracy-capture" >> "${GITHUB_OUTPUT}"
-      - name: "Determine Shard Suffix"
-        id: sharding
-        run: |
-          if (( SHARD_COUNT > 1 )); then
-            echo "suffix=$(printf -- "-%02d-of-%02d" "${SHARD_INDEX}" "${SHARD_COUNT}")" >> "${GITHUB_OUTPUT}"
-          else
-            echo "suffix=" >> "${GITHUB_OUTPUT}"
-          fi
       - name: "Running benchmarks"
         env:
           IREE_EXECUTION_BENCHMARK_CONFIG: ${{ steps.download-assets.outputs.benchmark-config }}
@@ -186,10 +147,9 @@
           IREE_TRACED_BENCHMARK_TOOLS_DIR: ${{ steps.unpack-tools.outputs.traced-benchmark-tools-dir }}
           IREE_TRACY_CAPTURE_TOOL: ${{ steps.unpack-tools.outputs.tracy-capture-tool }}
           IREE_TARGET_DEVICE_NAME: ${{ env.DEVICE_NAME }}
-          IREE_SHARD_INDEX: ${{ matrix.benchmark.shard.index }}
           IREE_E2E_TEST_ARTIFACTS_DIR: ${{ env.E2E_TEST_ARTIFACTS_DIR }}
-          IREE_BENCHMARK_RESULTS: ${{ env.BENCHMARK_RESULTS_DIR }}/benchmark-results-${{ matrix.benchmark.device_name }}${{ steps.sharding.outputs.suffix }}.json
-          IREE_BENCHMARK_TRACES: ${{ env.BENCHMARK_RESULTS_DIR }}/benchmark-traces-${{ matrix.benchmark.device_name }}${{ steps.sharding.outputs.suffix }}.tar.gz
+          IREE_BENCHMARK_RESULTS: ${{ env.BENCHMARK_RESULTS_DIR }}/benchmark-results-${{ matrix.benchmark.device_name }}.json
+          IREE_BENCHMARK_TRACES: ${{ env.BENCHMARK_RESULTS_DIR }}/benchmark-traces-${{ matrix.benchmark.device_name }}.tar.gz
         run: |
           mkdir -p ${BENCHMARK_RESULTS_DIR}
           ./build_tools/benchmarks/run_benchmarks.sh
diff --git a/.github/workflows/benchmark_large.yml b/.github/workflows/benchmark_large.yml
index 7026923..ad0a4f4 100644
--- a/.github/workflows/benchmark_large.yml
+++ b/.github/workflows/benchmark_large.yml
@@ -13,16 +13,6 @@
     # Scheduled to run at 09:00 UTC and 21:00 UTC.
     - cron: '0 09,21 * * *'
   workflow_dispatch:
-    inputs:
-      shard-count:
-        description: |
-          A device-name to integer mapping as a comma separated list.
-          Allows to assign a distinct shard count for each target device.
-          The reserved keyword `default` assigns a shard count to all target devices
-          that are not explicitly listed.
-        # Please keep this default value in sync with the jobs.build_e2e_test_artifacts.with.shard-count field below
-        default: a2-highgpu-1g=1,c2-standard-16=2,default=1
-        type: string
 
 concurrency:
   # A PR number if a pull request and otherwise the commit hash. This cancels
@@ -75,8 +65,6 @@
       build-dir-gcs-artifact: ${{ needs.build_all.outputs.build-dir-gcs-artifact }}
       benchmark-presets: cuda-large,comp-stats-large,x86_64-large
       build-default-benchmark-suites: 0
-      # Please keep the shard count default value in sync with on.workflow_dispatch.shard-count.default
-      shard-count: ${{ inputs && inputs.shard_count || 'a2-highgpu-1g=1,c2-standard-16=2,default=1' }}
 
   compilation_benchmarks:
     needs: [setup, build_e2e_test_artifacts]
diff --git a/.github/workflows/build_e2e_test_artifacts.yml b/.github/workflows/build_e2e_test_artifacts.yml
index 1dd93c5..d730a9a 100644
--- a/.github/workflows/build_e2e_test_artifacts.yml
+++ b/.github/workflows/build_e2e_test_artifacts.yml
@@ -39,14 +39,6 @@
           if set to 1.
         default: 1
         type: number
-      shard-count:
-        description: |
-          A device-name to integer mapping as a comma separated list.
-          Allows to assign a distinct shard count for each target device.
-          The reserved keyword `default` assigns a shard count to all target devices
-          that are not explicitly listed.
-        default: default=1
-        type: string
     outputs:
       e2e-test-artifacts-dir:
         description: |
@@ -105,7 +97,6 @@
           IREE_BENCHMARK_PRESETS: ${{ inputs.benchmark-presets }}
           IREE_BUILD_DEFAULT_BENCHMARK_SUITES: ${{ inputs.build-default-benchmark-suites }}
           BUILD_E2E_TEST_ARTIFACTS_DIR: build-e2e-test-artifacts
-          IREE_SHARD_COUNT: ${{ inputs.shard-count }}
         run: |
           build_tools/github_actions/docker_run.sh \
             --env "IREE_HOST_BIN_DIR=${HOST_BUILD_DIR}/install/bin" \
diff --git a/build_tools/benchmarks/benchmark_helper.py b/build_tools/benchmarks/benchmark_helper.py
index 44c6893..5a5d377 100755
--- a/build_tools/benchmarks/benchmark_helper.py
+++ b/build_tools/benchmarks/benchmark_helper.py
@@ -17,8 +17,7 @@
 import os
 import shlex
 import subprocess
-from typing import Dict, List, Optional, Sequence
-import functools
+from typing import List, Optional, Sequence
 
 from e2e_test_artifacts import model_artifacts, iree_artifacts
 from e2e_test_framework import serialization
@@ -115,29 +114,23 @@
     if execution_benchmark_config is not None:
         benchmark_groups = json.loads(execution_benchmark_config.read_text())
         for target_device, benchmark_group in benchmark_groups.items():
-            shard_count = len(benchmark_group["shards"])
-            for shard in benchmark_group["shards"]:
-                run_configs = serialization.unpack_and_deserialize(
-                    data=shard["run_configs"],
-                    root_type=List[iree_definitions.E2EModelRunConfig],
-                )
-                for run_config in run_configs:
-                    if (
-                        benchmark_id is not None
-                        and benchmark_id != run_config.composite_id
-                    ):
-                        continue
+            run_configs = serialization.unpack_and_deserialize(
+                data=benchmark_group["run_configs"],
+                root_type=List[iree_definitions.E2EModelRunConfig],
+            )
+            for run_config in run_configs:
+                if benchmark_id is not None and benchmark_id != run_config.composite_id:
+                    continue
 
-                    lines.append("################")
-                    lines.append("")
-                    lines.append(f"Execution Benchmark ID: {run_config.composite_id}")
-                    lines.append(f"Name: {run_config}")
-                    lines.append(f"Target Device: {target_device}")
-                    lines.append(f"Shard: {shard['index']} / {shard_count}")
-                    lines.append("")
-                    lines += _dump_cmds_from_run_config(
-                        run_config=run_config, root_path=e2e_test_artifacts_dir
-                    )
+                lines.append("################")
+                lines.append("")
+                lines.append(f"Execution Benchmark ID: {run_config.composite_id}")
+                lines.append(f"Name: {run_config}")
+                lines.append(f"Target Device: {target_device}")
+                lines.append("")
+                lines += _dump_cmds_from_run_config(
+                    run_config=run_config, root_path=e2e_test_artifacts_dir
+                )
 
     if compilation_benchmark_config is not None:
         benchmark_config = json.loads(compilation_benchmark_config.read_text())
@@ -161,78 +154,12 @@
     print(*lines, sep="\n")
 
 
-# Represents a benchmark results file with the data already loaded from a JSON file.
-class JSONBackedBenchmarkData:
-    def __init__(self, source_filepath: pathlib.PurePath, data: Dict):
-        if not isinstance(data, dict):
-            raise ValueError(
-                f"'{source_filepath}' seems not to be a valid benchmark-results-file (No JSON struct as root element)."
-            )
-        if "commit" not in data:
-            raise ValueError(
-                f"'{source_filepath}' seems not to be a valid benchmark-results-file ('commit' field not found)."
-            )
-        if "benchmarks" not in data:
-            raise ValueError(
-                f"'{source_filepath}' seems not to be a valid benchmark-results-file ('benchmarks' field not found)."
-            )
-
-        self.source_filepath: pathlib.PurePath = source_filepath
-        self.data: Dict = data
-
-    # Parses a JSON benchmark results file and makes some sanity checks
-    @staticmethod
-    def load_from_file(filepath: pathlib.Path):
-        try:
-            data = json.loads(filepath.read_bytes())
-        except json.JSONDecodeError as e:
-            raise ValueError(f"'{filepath}' seems not to be a valid JSON file: {e.msg}")
-        return JSONBackedBenchmarkData(filepath, data)
-
-    # A convenience wrapper around `loadFromFile` that accepts a sequence of paths and returns a sequence of JSONBackedBenchmarkData objects as a generator.
-    @staticmethod
-    def load_many_from_files(filepaths: Sequence[pathlib.Path]):
-        return (
-            JSONBackedBenchmarkData.load_from_file(filepath) for filepath in filepaths
-        )
-
-
-# Merges the benchmark results from `right` into `left` and returns the updated `left`
-def _merge_two_resultsets(
-    left: JSONBackedBenchmarkData, right: JSONBackedBenchmarkData
-) -> JSONBackedBenchmarkData:
-    if left.data["commit"] != right.data["commit"]:
-        raise ValueError(
-            f"'{right.source_filepath}' and the previous files are based on different commits ({left.data['commit']} != {right.data['commit']}). Merging not supported."
-        )
-    left.data["benchmarks"].extend(right.data["benchmarks"])
-    return left
-
-
-def merge_results(benchmark_results: Sequence[JSONBackedBenchmarkData]):
-    return functools.reduce(_merge_two_resultsets, benchmark_results)
-
-
-def _merge_results_handler(
-    benchmark_results_files: Sequence[pathlib.Path], **_unused_args
-):
-    print(
-        json.dumps(
-            merge_results(
-                JSONBackedBenchmarkData.load_many_from_files(benchmark_results_files)
-            )
-        )
-    )
-
-
 def _parse_arguments() -> argparse.Namespace:
     parser = argparse.ArgumentParser(
         description="Miscellaneous tool to help work with benchmark suite and benchmark CI."
     )
 
-    subparser = parser.add_subparsers(
-        required=True, title="operation", dest="operation"
-    )
+    subparser = parser.add_subparsers(required=True, title="operation")
     dump_cmds_parser = subparser.add_parser(
         "dump-cmds", help="Dump the commands to compile and run benchmarks manually."
     )
@@ -257,22 +184,9 @@
     )
     dump_cmds_parser.set_defaults(handler=_dump_cmds_handler)
 
-    merge_results_parser = subparser.add_parser(
-        "merge-results",
-        help="Merges the results from multiple benchmark results JSON files into a single JSON structure.",
-    )
-    merge_results_parser.add_argument(
-        "benchmark_results_files",
-        type=pathlib.Path,
-        nargs="+",
-        help="One or more benchmark results JSON file paths",
-    )
-    merge_results_parser.set_defaults(handler=_merge_results_handler)
-
     args = parser.parse_args()
     if (
-        args.operation == "dump-cmds"
-        and args.execution_benchmark_config is None
+        args.execution_benchmark_config is None
         and args.compilation_benchmark_config is None
     ):
         parser.error(
diff --git a/build_tools/benchmarks/benchmark_helper_test.py b/build_tools/benchmarks/benchmark_helper_test.py
deleted file mode 100644
index 9086d3c..0000000
--- a/build_tools/benchmarks/benchmark_helper_test.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2023 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-import json
-import unittest
-import benchmark_helper
-import tempfile
-import pathlib
-
-
-class BenchmarkHelperTest(unittest.TestCase):
-    def test_merge_results_simple(self):
-        first = benchmark_helper.JSONBackedBenchmarkData(
-            pathlib.Path("first.json"),
-            {
-                "commit": "123",
-                "benchmarks": [{"benchmark_id": "first1"}, {"benchmark_id": "first2"}],
-            },
-        )
-
-        second = benchmark_helper.JSONBackedBenchmarkData(
-            pathlib.Path("second.json"),
-            {
-                "commit": "123",
-                "benchmarks": [
-                    {"benchmark_id": "second1"},
-                    {"benchmark_id": "second2"},
-                ],
-            },
-        )
-
-        result = benchmark_helper.merge_results([first, second])
-
-        self.assertEqual(
-            result.data,
-            {
-                "commit": "123",
-                "benchmarks": [
-                    {"benchmark_id": "first1"},
-                    {"benchmark_id": "first2"},
-                    {"benchmark_id": "second1"},
-                    {"benchmark_id": "second2"},
-                ],
-            },
-        )
-
-    def test_merge_results_mismatching_commits(self):
-        first = benchmark_helper.JSONBackedBenchmarkData(
-            pathlib.Path("first.json"), {"commit": "123", "benchmarks": []}
-        )
-        second = benchmark_helper.JSONBackedBenchmarkData(
-            pathlib.Path("second.json"), {"commit": "456", "benchmarks": []}
-        )
-
-        with self.assertRaisesRegex(ValueError, "based on different commits"):
-            benchmark_helper.merge_results([first, second])
-
-    def test_create_json_backed_benchmark_data_success(self):
-        benchmark_helper.JSONBackedBenchmarkData(
-            pathlib.Path("first.json"), {"commit": "123", "benchmarks": []}
-        )
-
-    def test_create_json_backed_benchmark_data_with_missing_benchmark_list(self):
-        with self.assertRaisesRegex(ValueError, "'benchmarks' field not found"):
-            benchmark_helper.JSONBackedBenchmarkData(
-                pathlib.Path("second.json"), {"commit": "123"}
-            )
-
-    def test_load_from_file_success(self):
-        with tempfile.TemporaryDirectory() as dir:
-            filepath = pathlib.Path(dir) / "first.json"
-            contents = {"commit": "123", "benchmarks": []}
-            filepath.write_text(json.dumps(contents))
-
-            result = benchmark_helper.JSONBackedBenchmarkData.load_from_file(filepath)
-            self.assertEqual(result.data, contents)
-            self.assertEqual(result.source_filepath, filepath)
-
-    def test_load_from_file_invalid_json(self):
-        with tempfile.TemporaryDirectory() as dir:
-            filepath = pathlib.Path(dir) / "first.json"
-            filepath.write_text("bliblablub")
-
-            with self.assertRaisesRegex(
-                ValueError, "seems not to be a valid JSON file"
-            ):
-                benchmark_helper.JSONBackedBenchmarkData.load_from_file(filepath)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/build_tools/benchmarks/common/common_arguments.py b/build_tools/benchmarks/common/common_arguments.py
index 71e8a82..fb38be9 100644
--- a/build_tools/benchmarks/common/common_arguments.py
+++ b/build_tools/benchmarks/common/common_arguments.py
@@ -182,12 +182,6 @@
             required=True,
             help="Target device in benchmark config to run",
         )
-        self.add_argument(
-            "--shard_index",
-            type=int,
-            default=None,
-            help="Shard in benchmark config to run",
-        )
 
 
 def expand_and_check_file_paths(paths: Sequence[str]) -> List[pathlib.Path]:
diff --git a/build_tools/benchmarks/export_benchmark_config.py b/build_tools/benchmarks/export_benchmark_config.py
index a7dcba2..a93e346 100755
--- a/build_tools/benchmarks/export_benchmark_config.py
+++ b/build_tools/benchmarks/export_benchmark_config.py
@@ -87,7 +87,6 @@
 def _export_execution_handler(
     presets: Optional[Sequence[str]] = None,
     target_device_names: Optional[Sequence[str]] = None,
-    shard_count: Optional[Dict[str, int]] = None,
     **_unused_args,
 ):
     _, all_run_configs = benchmark_collections.generate_benchmarks()
@@ -100,9 +99,6 @@
         presets=None if presets is None else set(presets),
     )
 
-    shard_count = {} if shard_count is None else shard_count
-    default_shard_count = shard_count.get("default", 1)
-
     output_map = {}
     for device_name, run_configs in grouped_run_config_map.items():
         host_environments = set(
@@ -114,34 +110,15 @@
             )
         host_environment = host_environments.pop()
 
-        current_shard_count = int(shard_count.get(device_name, default_shard_count))
-        # This splits the `run_configs` list into `current_shard_count` sub-lists in a round-robin way.
-        # Example: current_shard_count = 3; run_configs = range(10); assert(sharded_run_configs == [[0, 3, 6, 9], [1, 4, 7], [2, 5, 8]]
-        sharded_run_configs = [
-            run_configs[shard_idx::current_shard_count]
-            for shard_idx in range(current_shard_count)
-        ]
+        distinct_module_dir_paths = _get_distinct_module_dir_paths(
+            config.module_generation_config for config in run_configs
+        )
 
-        for index, shard in enumerate(sharded_run_configs):
-            distinct_module_dir_paths = _get_distinct_module_dir_paths(
-                config.module_generation_config for config in shard
-            )
-
-            serialized_run_configs = serialization.serialize_and_pack(shard)
-            output_map.setdefault(
-                device_name,
-                {
-                    "host_environment": dataclasses.asdict(host_environment),
-                    "shards": [],
-                },
-            )
-            output_map[device_name]["shards"].append(
-                {
-                    "index": index,
-                    "module_dir_paths": distinct_module_dir_paths,
-                    "run_configs": serialized_run_configs,
-                }
-            )
+        output_map[device_name] = {
+            "host_environment": dataclasses.asdict(host_environment),
+            "module_dir_paths": distinct_module_dir_paths,
+            "run_configs": serialization.serialize_and_pack(run_configs),
+        }
 
     return output_map
 
@@ -188,10 +165,6 @@
     return presets
 
 
-def _parse_shard_count(arg: str):
-    return dict(map(str.strip, el.split("=", 1)) for el in arg.split(","))
-
-
 def _parse_arguments():
     """Parses command-line options."""
 
@@ -255,12 +228,6 @@
             f"{','.join(benchmark_presets.ALL_EXECUTION_PRESETS)}"
         ),
     )
-    execution_parser.add_argument(
-        "--shard_count",
-        type=_parse_shard_count,
-        default={},
-        help="Accepts a comma-separated list of device-name to shard-count mappings. Use reserved keyword 'default' for setting a default shard count: c2-standard-16=3,default=2",
-    )
 
     compilation_parser = subparser.add_parser(
         "compilation",
diff --git a/build_tools/benchmarks/run_benchmarks.sh b/build_tools/benchmarks/run_benchmarks.sh
index 99b9939..6849259 100755
--- a/build_tools/benchmarks/run_benchmarks.sh
+++ b/build_tools/benchmarks/run_benchmarks.sh
@@ -12,15 +12,10 @@
 # the script to learn about the required setup.
 #
 # IREE_NORMAL_BENCHMARK_TOOLS_DIR needs to point to a directory contains IREE
-# benchmark tools. See benchmarks/README.md for more information.
-#
-# Command line arguments:
-# 1. The path of e2e test artifacts directory
-# 2. The path of IREE benchmark run config
-# 3. The target device name
-# 4. The shard index
-# 5. The path to write benchmark results
-# 6. The path to write benchmark traces
+# benchmark tools. See benchmarks/README.md for more information. The first
+# argument is the path of e2e test artifacts directory. The second argument is
+# the path of IREE benchmark run config. The third argument is the path to write
+# benchmark results.
 
 set -euo pipefail
 
@@ -31,9 +26,8 @@
 E2E_TEST_ARTIFACTS_DIR="${1:-${IREE_E2E_TEST_ARTIFACTS_DIR}}"
 EXECUTION_BENCHMARK_CONFIG="${2:-${IREE_EXECUTION_BENCHMARK_CONFIG}}"
 TARGET_DEVICE_NAME="${3:-${IREE_TARGET_DEVICE_NAME}}"
-SHARD_INDEX="${4:-${IREE_SHARD_INDEX}}"
-BENCHMARK_RESULTS="${5:-${IREE_BENCHMARK_RESULTS}}"
-BENCHMARK_TRACES="${6:-${IREE_BENCHMARK_TRACES}}"
+BENCHMARK_RESULTS="${4:-${IREE_BENCHMARK_RESULTS}}"
+BENCHMARK_TRACES="${5:-${IREE_BENCHMARK_TRACES}}"
 
 if [[ "${TARGET_DEVICE_NAME}" == "a2-highgpu-1g" ]]; then
   ${DOCKER_WRAPPER} \
@@ -48,7 +42,6 @@
         --e2e_test_artifacts_dir="${E2E_TEST_ARTIFACTS_DIR}" \
         --execution_benchmark_config="${EXECUTION_BENCHMARK_CONFIG}" \
         --target_device_name="${TARGET_DEVICE_NAME}" \
-        --shard_index="${SHARD_INDEX}" \
         --output="${BENCHMARK_RESULTS}" \
         --verbose
 elif [[ "${TARGET_DEVICE_NAME}" == "c2-standard-16" ]]; then
@@ -62,7 +55,6 @@
         --e2e_test_artifacts_dir="${E2E_TEST_ARTIFACTS_DIR}" \
         --execution_benchmark_config="${EXECUTION_BENCHMARK_CONFIG}" \
         --target_device_name="${TARGET_DEVICE_NAME}" \
-        --shard_index="${SHARD_INDEX}" \
         --output="${BENCHMARK_RESULTS}" \
         --device_model=GCP-c2-standard-16 \
         --cpu_uarch=CascadeLake \
@@ -76,7 +68,6 @@
     --e2e_test_artifacts_dir="${E2E_TEST_ARTIFACTS_DIR}" \
     --execution_benchmark_config="${EXECUTION_BENCHMARK_CONFIG}" \
     --target_device_name="${TARGET_DEVICE_NAME}" \
-    --shard_index="${SHARD_INDEX}" \
     --output="${BENCHMARK_RESULTS}" \
     --pin-cpu-freq \
     --pin-gpu-freq \
diff --git a/build_tools/benchmarks/run_benchmarks_on_linux.py b/build_tools/benchmarks/run_benchmarks_on_linux.py
index eae6fc2..6820e0f 100755
--- a/build_tools/benchmarks/run_benchmarks_on_linux.py
+++ b/build_tools/benchmarks/run_benchmarks_on_linux.py
@@ -142,52 +142,13 @@
     benchmark_config = BenchmarkConfig.build_from_args(args, commit)
 
     benchmark_groups = json.loads(args.execution_benchmark_config.read_text())
-
     benchmark_group = benchmark_groups.get(args.target_device_name)
     if benchmark_group is None:
-        raise ValueError(
-            "Target device '{}' not found in the benchmark config.".format(
-                args.target_device_name
-            )
-        )
-
-    if args.shard_index is None:
-        # In case no shard index was given we will run ALL benchmarks from ALL shards
-        packed_run_configs = [
-            shard["run_configs"] for shard in benchmark_group["shards"]
-        ]
-    else:
-        # Otherwise we will only run the benchmarks from the given shard
-        benchmark_shard = next(
-            (
-                shard
-                for shard in benchmark_group["shards"]
-                if shard["index"] == args.shard_index
-            ),
-            None,
-        )
-        if benchmark_shard is None:
-            raise ValueError(
-                "Given shard (index={}) not found in the benchmark config group. Available indexes: [{}].".format(
-                    args.shard_index,
-                    ", ".join(
-                        str(shard["index"]) for shard in benchmark_group["shards"]
-                    ),
-                )
-            )
-        packed_run_configs = [benchmark_shard["run_configs"]]
-
-    # When no `shard_index` is given we might have more than one shard to process.
-    # We do this by deserializing the `run_config` field from each shard separately
-    # and then merge the unpacked flat lists of `E2EModelRunConfig`.
-    run_configs = [
-        run_config
-        for packed_run_config in packed_run_configs
-        for run_config in serialization.unpack_and_deserialize(
-            data=packed_run_config,
-            root_type=typing.List[iree_definitions.E2EModelRunConfig],
-        )
-    ]
+        raise ValueError("Target device not found in the benchmark config.")
+    run_configs = serialization.unpack_and_deserialize(
+        data=benchmark_group["run_configs"],
+        root_type=typing.List[iree_definitions.E2EModelRunConfig],
+    )
     benchmark_suite = BenchmarkSuite.load_from_run_configs(
         run_configs=run_configs, root_benchmark_dir=benchmark_config.root_benchmark_dir
     )
diff --git a/build_tools/cmake/build_e2e_test_artifacts.sh b/build_tools/cmake/build_e2e_test_artifacts.sh
index 24d964c..f87ceb4 100755
--- a/build_tools/cmake/build_e2e_test_artifacts.sh
+++ b/build_tools/cmake/build_e2e_test_artifacts.sh
@@ -28,7 +28,6 @@
 IREE_HOST_BIN_DIR="$(realpath ${IREE_HOST_BIN_DIR})"
 BENCHMARK_PRESETS="${IREE_BENCHMARK_PRESETS:-}"
 BUILD_DEFAULT_BENCHMARK_SUITES="${IREE_BUILD_DEFAULT_BENCHMARK_SUITES:-1}"
-SHARD_COUNT="${IREE_SHARD_COUNT:-default=1}"
 
 source build_tools/cmake/setup_build.sh
 source build_tools/cmake/setup_tf_python.sh
@@ -102,8 +101,7 @@
 ./build_tools/benchmarks/export_benchmark_config.py \
   execution \
   --benchmark_presets="${EXECUTION_PRESETS}" \
-  --output="${EXECUTION_CONFIG}" \
-  --shard_count="${SHARD_COUNT}"
+  --output="${EXECUTION_CONFIG}"
 ./build_tools/benchmarks/benchmark_helper.py dump-cmds \
   --execution_benchmark_config="${EXECUTION_CONFIG}" \
   --compilation_benchmark_config="${COMPILATION_CONFIG}" \
diff --git a/docs/developers/developing_iree/benchmark_suites.md b/docs/developers/developing_iree/benchmark_suites.md
index 5f42425..386af88 100644
--- a/docs/developers/developing_iree/benchmark_suites.md
+++ b/docs/developers/developing_iree/benchmark_suites.md
@@ -305,9 +305,6 @@
 # Execution benchmark raw results
 gcloud storage cp "${EXECUTION_BENCHMARK_RESULTS_DIR_URL?}/benchmark-results-*.json" .
 
-# Optional: Merge raw results into a single file
-build_tools/benchmarks/benchmark_helper.py merge-results benchmark-results-*.json > benchmark_results.json
-
 # Execution benchmark traces
 gcloud storage cp "${EXECUTION_BENCHMARK_RESULTS_DIR_URL?}/benchmark-traces-*.tar.gz" .