Make config easier to use for local benchmark run (#12435)

diff --git a/.github/workflows/benchmark_execution.yml b/.github/workflows/benchmark_execution.yml
index 2b7bcaf..d317f5e 100644
--- a/.github/workflows/benchmark_execution.yml
+++ b/.github/workflows/benchmark_execution.yml

@@ -165,19 +165,15 @@
       - name: "Running benchmarks"
         id: run
         env:
-          BENCHMARK_CONFIG: ${{ steps.download-assets.outputs.benchmark-config }}
+          IREE_EXECUTION_BENCHMARK_CONFIG: ${{ steps.download-assets.outputs.benchmark-config }}
           IREE_DOCKER_WRAPPER: ./build_tools/github_actions/docker_run.sh
           IREE_NORMAL_BENCHMARK_TOOLS_DIR: ${{ steps.unpack-tools.outputs.normal-benchmark-tools-dir }}
           IREE_TRACED_BENCHMARK_TOOLS_DIR: ${{ steps.unpack-tools.outputs.traced-benchmark-tools-dir }}
-          IREE_DEVICE_NAME: ${{ env.DEVICE_NAME }}
+          IREE_TARGET_DEVICE_NAME: ${{ env.DEVICE_NAME }}
           IREE_E2E_TEST_ARTIFACTS_DIR: ${{ env.E2E_TEST_ARTIFACTS_DIR }}
-          IREE_RUN_CONFIG: run-config.json
           IREE_BENCHMARK_RESULTS: ${{ env.BENCHMARK_RESULTS_DIR }}/benchmark-results-${{ matrix.benchmark.device_name }}.json
         run: |
           mkdir -p ${BENCHMARK_RESULTS_DIR}
-          jq --arg DEVICE_NAME "${IREE_DEVICE_NAME}" \
-            '.[$DEVICE_NAME] | .run_configs' \
-            "${BENCHMARK_CONFIG}" > "${IREE_RUN_CONFIG}"
           ./build_tools/benchmarks/run_benchmarks.sh
           echo "benchmark-results=${IREE_BENCHMARK_RESULTS}" >> "${GITHUB_OUTPUT}"
       - name: "Uploading benchmark results"

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 55b9974..902ff37 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml

@@ -1036,12 +1036,11 @@
           GENERATION_CONFIG: generation-config.json
           COMPILE_STATS_RESULTS: benchmark-results/compile-stats-results.json
         run: |
-          jq '.generation_configs' "${COMPILATION_CONFIG}" > "${GENERATION_CONFIG}"
           mkdir -p benchmark-results
           ./build_tools/benchmarks/collect_compilation_statistics.py alpha \
             --e2e_test_artifacts_dir="${E2E_TEST_ARTIFACTS_DIR}" \
             --build_log="${E2E_TEST_ARTIFACTS_BUILD_LOG}" \
-            --generation_config="${GENERATION_CONFIG}" \
+            --compilation_benchmark_config="${COMPILATION_CONFIG}" \
             --output="${COMPILE_STATS_RESULTS}"
           echo "compile-stats-results=${COMPILE_STATS_RESULTS}" >> "${GITHUB_OUTPUT}"
       - name: "Uploading benchmark results"

diff --git a/build_tools/benchmarks/collect_compilation_statistics.py b/build_tools/benchmarks/collect_compilation_statistics.py
index bc91dd2..3550065 100755
--- a/build_tools/benchmarks/collect_compilation_statistics.py
+++ b/build_tools/benchmarks/collect_compilation_statistics.py

@@ -133,11 +133,13 @@
   return module_path
 
 
-def get_module_map_from_generation_config(
-    serialized_gen_config: TextIO, e2e_test_artifacts_dir: pathlib.PurePath
+def get_module_map_from_compilation_benchmark_config(
+    compilation_benchmark_config_data: TextIO,
+    e2e_test_artifacts_dir: pathlib.PurePath
 ) -> Dict[CompilationInfo, pathlib.Path]:
+  benchmark_config = json.load(compilation_benchmark_config_data)
   gen_configs = serialization.unpack_and_deserialize(
-      data=json.load(serialized_gen_config),
+      data=benchmark_config["generation_configs"],
       root_type=List[iree_definitions.ModuleGenerationConfig])
   module_map = {}
   for gen_config in gen_configs:
@@ -203,8 +205,9 @@
 
 
 def _alpha_get_module_map_and_build_log(args: argparse.Namespace):
-  module_map = get_module_map_from_generation_config(
-      serialized_gen_config=args.generation_config.open("r"),
+  config_data = args.compilation_benchmark_config.open("r")
+  module_map = get_module_map_from_compilation_benchmark_config(
+      compilation_benchmark_config_data=config_data,
       e2e_test_artifacts_dir=args.e2e_test_artifacts_dir)
   return module_map, args.build_log
 
@@ -257,10 +260,10 @@
   alpha_parser.set_defaults(
       get_module_map_and_build_log=_alpha_get_module_map_and_build_log)
   alpha_parser.add_argument(
-      "--generation_config",
+      "--compilation_benchmark_config",
       type=_check_file_path,
       required=True,
-      help="Exported module generation config of e2e test artifacts.")
+      help="Exported compilation benchmark config of e2e test artifacts.")
   alpha_parser.add_argument("--build_log",
                             type=_check_file_path,
                             required=True,

diff --git a/build_tools/benchmarks/collect_compilation_statistics_test.py b/build_tools/benchmarks/collect_compilation_statistics_test.py
index e03e4f2..ac96bdc 100644
--- a/build_tools/benchmarks/collect_compilation_statistics_test.py
+++ b/build_tools/benchmarks/collect_compilation_statistics_test.py

@@ -92,7 +92,7 @@
 
     self.assertEqual(moduel_path, "/abcd-compile-stats.vmfb")
 
-  def test_get_module_map_from_generation_config(self):
+  def test_get_module_map_from_compilation_benchmark_config(self):
     model_a = common_definitions.Model(
         id="1234",
         name="tflite_m",
@@ -126,12 +126,14 @@
         imported_model=imported_model_a, compile_config=compile_config_a)
     gen_config_b = iree_definitions.ModuleGenerationConfig.with_flag_generation(
         imported_model=imported_model_a, compile_config=compile_config_b)
-    serialized_gen_config = json.dumps(
-        serialization.serialize_and_pack([gen_config_a, gen_config_b]))
+    benchmark_config = dict(generation_configs=serialization.serialize_and_pack(
+        [gen_config_a, gen_config_b]),
+                            module_dir_paths=["a", "b"])
     root_dir = pathlib.PurePath("artifacts_dir")
 
-    module_map = collect_compilation_statistics.get_module_map_from_generation_config(
-        serialized_gen_config=StringIO(serialized_gen_config),
+    module_map = collect_compilation_statistics.get_module_map_from_compilation_benchmark_config(
+        compilation_benchmark_config_data=StringIO(
+            json.dumps(benchmark_config)),
         e2e_test_artifacts_dir=root_dir)
 
     compile_info_a = common.benchmark_definition.CompilationInfo(

diff --git a/build_tools/benchmarks/common/benchmark_config.py b/build_tools/benchmarks/common/benchmark_config.py
index 2677857..2d2bc59 100644
--- a/build_tools/benchmarks/common/benchmark_config.py
+++ b/build_tools/benchmarks/common/benchmark_config.py

@@ -113,7 +113,7 @@
     else:
       # TODO(#11076): Remove legacy path.
       build_dir = args.build_dir.resolve()
-      if args.run_config is not None:
+      if args.execution_benchmark_config is not None:
         root_benchmark_dir = build_dir / E2E_TEST_ARTIFACTS_REL_PATH
       else:
         root_benchmark_dir = build_dir / BENCHMARK_SUITE_REL_PATH

diff --git a/build_tools/benchmarks/common/benchmark_config_test.py b/build_tools/benchmarks/common/benchmark_config_test.py
index 7bef77a..e432cf5 100644
--- a/build_tools/benchmarks/common/benchmark_config_test.py
+++ b/build_tools/benchmarks/common/benchmark_config_test.py

@@ -93,13 +93,15 @@
 
   def test_build_from_args_with_e2e_test_artifacts_dir(self):
     with tempfile.TemporaryDirectory() as e2e_test_artifacts_dir:
-      run_config = pathlib.Path(e2e_test_artifacts_dir) / "run_config.json"
-      run_config.touch()
+      exec_bench_config = pathlib.Path(
+          e2e_test_artifacts_dir) / "exec_bench_config.json"
+      exec_bench_config.touch()
       args = common_arguments.Parser().parse_args([
           f"--tmp_dir={self.tmp_dir}",
           f"--normal_benchmark_tool_dir={self.normal_tool_dir}",
           f"--e2e_test_artifacts_dir={e2e_test_artifacts_dir}",
-          f"--run_config={run_config}"
+          f"--execution_benchmark_config={exec_bench_config}",
+          f"--target_device_name=device_a",
       ])
 
       config = benchmark_config.BenchmarkConfig.build_from_args(
@@ -108,14 +110,16 @@
       self.assertEqual(config.root_benchmark_dir,
                        pathlib.Path(e2e_test_artifacts_dir))
 
-  def test_build_from_args_with_run_config_and_build_dir(self):
+  def test_build_from_args_with_execution_benchmark_config_and_build_dir(self):
     with tempfile.TemporaryDirectory() as e2e_test_artifacts_dir:
-      run_config = pathlib.Path(e2e_test_artifacts_dir) / "run_config.json"
-      run_config.touch()
+      exec_bench_config = pathlib.Path(
+          e2e_test_artifacts_dir) / "exec_bench_config.json"
+      exec_bench_config.touch()
       args = common_arguments.Parser().parse_args([
           f"--tmp_dir={self.tmp_dir}",
           f"--normal_benchmark_tool_dir={self.normal_tool_dir}",
-          f"--run_config={run_config}",
+          f"--execution_benchmark_config={exec_bench_config}",
+          f"--target_device_name=device_a",
           str(self.build_dir)
       ])
 

diff --git a/build_tools/benchmarks/common/common_arguments.py b/build_tools/benchmarks/common/common_arguments.py
index ea14265..b6fc8f0 100644
--- a/build_tools/benchmarks/common/common_arguments.py
+++ b/build_tools/benchmarks/common/common_arguments.py

@@ -59,7 +59,7 @@
         help=(
             "Path to the IREE e2e test artifacts directory. This will override "
             "<build-dir> and eventually replace it. For now must use with "
-            "--run_config"))
+            "--execution_benchmark_config"))
 
     self.add_argument(
         "--normal_benchmark_tool_dir",
@@ -155,17 +155,33 @@
         "for). In that case, no --benchmark_repetitions flag will be passed."
         " If not specified, a --benchmark_repetitions will be passed "
         "instead.")
-    self.add_argument("--run_config",
+    self.add_argument("--execution_benchmark_config",
                       type=_check_file_path,
                       default=None,
-                      help="JSON file of the run config")
+                      help="JSON config for the execution benchmarks")
+    self.add_argument("--target_device_name",
+                      type=str,
+                      default=None,
+                      help="Target device in benchmark config to run")
 
   def parse_args(
       self, arg_strs: Optional[Sequence[str]] = None) -> argparse.Namespace:
     args = super().parse_args(arg_strs)
 
-    if args.e2e_test_artifacts_dir is not None and args.run_config is None:
-      raise self.error("--e2e_test_artifacts_dir requires --run_config.")
+    # TODO(#11076): Remove these checks and make --execution_benchmark_config
+    # and --target_device_name required args.
+    use_new_benchmark_suite = (args.execution_benchmark_config is not None or
+                               args.target_device_name is not None)
+    if use_new_benchmark_suite:
+      if (args.execution_benchmark_config is None or
+          args.target_device_name is None):
+        self.error(
+            "--execution_benchmark_config and --target_device_name must be set together."
+        )
+    elif args.e2e_test_artifacts_dir is not None:
+      self.error(
+          "--e2e_test_artifacts_dir requires --execution_benchmark_config and --target_device_name."
+      )
 
     return args
 

diff --git a/build_tools/benchmarks/common/common_arguments_test.py b/build_tools/benchmarks/common/common_arguments_test.py
index 143a850..1d9a3a6 100644
--- a/build_tools/benchmarks/common/common_arguments_test.py
+++ b/build_tools/benchmarks/common/common_arguments_test.py

@@ -42,12 +42,19 @@
     with self.assertRaises(SystemExit):
       arg_parser.parse_args(["--trace_capture_tool=nonexistent", "."])
 
-  def test_parser_e2e_test_artifacts_dir_requires_run_config(self):
+  def test_parser_e2e_test_artifacts_dir_needs_execution_benchmark_config(self):
     arg_parser = common.common_arguments.Parser()
     with tempfile.TemporaryDirectory() as tempdir:
       with self.assertRaises(SystemExit):
         arg_parser.parse_args([f"--e2e_test_artifacts_dir={tempdir}"])
 
+  def test_parser_only_execution_benchmark_config_or_target_device_name(self):
+    arg_parser = common.common_arguments.Parser()
+    with self.assertRaises(SystemExit):
+      arg_parser.parse_args([f"--execution_benchmark_config"])
+    with self.assertRaises(SystemExit):
+      arg_parser.parse_args([f"--target_device_name"])
+
 
 if __name__ == "__main__":
   unittest.main()

diff --git a/build_tools/benchmarks/run_benchmarks.sh b/build_tools/benchmarks/run_benchmarks.sh
index 991973f..7b8694b 100755
--- a/build_tools/benchmarks/run_benchmarks.sh
+++ b/build_tools/benchmarks/run_benchmarks.sh

@@ -20,11 +20,11 @@
 set -euo pipefail
 
 DOCKER_WRAPPER="${IREE_DOCKER_WRAPPER:-./build_tools/docker/docker_run.sh}"
-DEVICE_NAME="${IREE_DEVICE_NAME}"
 NORMAL_BENCHMARK_TOOLS_DIR="${IREE_NORMAL_BENCHMARK_TOOLS_DIR}"
 E2E_TEST_ARTIFACTS_DIR="${1:-${IREE_E2E_TEST_ARTIFACTS_DIR}}"
-RUN_CONFIG="${2:-${IREE_RUN_CONFIG}}"
-BENCHMARK_RESULTS="${3:-${IREE_BENCHMARK_RESULTS}}"
+EXECUTION_BENCHMARK_CONFIG="${2:-${IREE_EXECUTION_BENCHMARK_CONFIG}}"
+TARGET_DEVICE_NAME="${3:-${IREE_TARGET_DEVICE_NAME}}"
+BENCHMARK_RESULTS="${4:-${IREE_BENCHMARK_RESULTS}}"
 
 if [[ "${DEVICE_NAME}" == "a2-highgpu-1g" ]]; then
   ${DOCKER_WRAPPER} \
@@ -34,7 +34,8 @@
       ./build_tools/benchmarks/run_benchmarks_on_linux.py \
         --normal_benchmark_tool_dir="${NORMAL_BENCHMARK_TOOLS_DIR}" \
         --e2e_test_artifacts_dir="${E2E_TEST_ARTIFACTS_DIR}" \
-        --run_config="${RUN_CONFIG}" \
+        --execution_benchmark_config="${EXECUTION_BENCHMARK_CONFIG}" \
+        --target_device_name="${TARGET_DEVICE_NAME}" \
         --output="${BENCHMARK_RESULTS}" \
         --verbose
 elif [[ "${DEVICE_NAME}" == "c2-standard-16" ]]; then
@@ -43,7 +44,8 @@
       ./build_tools/benchmarks/run_benchmarks_on_linux.py \
         --normal_benchmark_tool_dir="${NORMAL_BENCHMARK_TOOLS_DIR}" \
         --e2e_test_artifacts_dir="${E2E_TEST_ARTIFACTS_DIR}" \
-        --run_config="${RUN_CONFIG}" \
+        --execution_benchmark_config="${EXECUTION_BENCHMARK_CONFIG}" \
+        --target_device_name="${TARGET_DEVICE_NAME}" \
         --output="${BENCHMARK_RESULTS}" \
         --device_model=GCP-c2-standard-16 \
         --cpu_uarch=CascadeLake \

diff --git a/build_tools/benchmarks/run_benchmarks_on_android.py b/build_tools/benchmarks/run_benchmarks_on_android.py
index 0ff954d..3891da4 100755
--- a/build_tools/benchmarks/run_benchmarks_on_android.py
+++ b/build_tools/benchmarks/run_benchmarks_on_android.py

@@ -339,7 +339,7 @@
   if args.verbose:
     print(device_info)
 
-  if args.run_config is not None:
+  if args.execution_benchmark_config is not None:
     raise ValueError("Run config option isn't supported yet.")
 
   commit = get_git_commit_hash("HEAD")

diff --git a/build_tools/benchmarks/run_benchmarks_on_linux.py b/build_tools/benchmarks/run_benchmarks_on_linux.py
index e8a022c..d8b43ce 100755
--- a/build_tools/benchmarks/run_benchmarks_on_linux.py
+++ b/build_tools/benchmarks/run_benchmarks_on_linux.py

@@ -147,14 +147,17 @@
   commit = get_git_commit_hash("HEAD")
   benchmark_config = BenchmarkConfig.build_from_args(args, commit)
 
-  if args.run_config is None:
+  if args.execution_benchmark_config is None:
     # TODO(#11076): Remove legacy path.
     benchmark_suite = BenchmarkSuite.load_from_benchmark_suite_dir(
         benchmark_config.root_benchmark_dir)
   else:
-    run_config_data = json.loads(args.run_config.read_text())
+    benchmark_groups = json.loads(args.execution_benchmark_config.read_text())
+    benchmark_group = benchmark_groups.get(args.target_device_name)
+    if benchmark_group is None:
+      raise ValueError("Target device not found in the benchmark config.")
     run_configs = serialization.unpack_and_deserialize(
-        data=run_config_data,
+        data=benchmark_group["run_configs"],
         root_type=typing.List[iree_definitions.E2EModelRunConfig])
     benchmark_suite = BenchmarkSuite.load_from_run_configs(
         run_configs=run_configs)