build_tools/benchmarks/export_benchmark_config.py - 3p/openxla/iree - Git at Google

 #!/usr/bin/env python3
 # Copyright 2022 The IREE Authors
 #
 # Licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 """Exports JSON config for benchmarking and compilation statistics.

 Export type: "execution" outputs:
 [
   <target device name>: {
     host_environment: HostEnvironment,
     module_dir_paths: [<paths of dependent module directories>],
     run_configs: serialized [E2EModelRunConfig]
   },
   ...
 ]
 to be used in build_tools/benchmarks/run_benchmarks_on_*.py

 Export type: "compilation" outputs:
 {
   module_dir_paths: [<paths of dependent module directories>],
   generation_configs: serialized [ModuleGenerationConfig]
 }
 of generation configs defined for compilation statistics, to be used in
 build_tools/benchmarks/collect_compilation_statistics.py
 """

 import sys
 import pathlib

 # Add build_tools python dir to the search path.
 sys.path.insert(0, str(pathlib.Path(__file__).parent.with_name("python")))

 from typing import Dict, Iterable, List, Optional, Set, Sequence
 import argparse
 import collections
 import dataclasses
 import json
 import textwrap

 from benchmark_suites.iree import benchmark_collections, benchmark_presets
 from e2e_test_artifacts import iree_artifacts
 from e2e_test_framework import serialization
 from e2e_test_framework.definitions import iree_definitions


 def filter_and_group_run_configs(
     run_configs: List[iree_definitions.E2EModelRunConfig],
     target_device_names: Optional[Set[str]] = None,
     presets: Optional[Set[str]] = None,
 ) -> Dict[str, List[iree_definitions.E2EModelRunConfig]]:
     """Filters run configs and groups by target device name.

     Args:
       run_configs: source e2e model run configs.
       target_device_names: list of target device names, includes all if not set.
       presets: set of presets, matches all if not set.

     Returns:
       A map of e2e model run configs keyed by target device name.
     """
     grouped_run_config_map = collections.defaultdict(list)

     for run_config in run_configs:
         device_name = run_config.target_device_spec.device_name
         if target_device_names is not None and device_name not in target_device_names:
             continue
         if presets is not None and not presets.intersection(run_config.presets):
             continue
         grouped_run_config_map[device_name].append(run_config)

     return grouped_run_config_map


 def _get_distinct_module_dir_paths(
     module_generation_configs: Iterable[iree_definitions.ModuleGenerationConfig],
     root_path: pathlib.PurePath = pathlib.PurePath(),
 ) -> List[str]:
     module_dir_paths = (
         str(iree_artifacts.get_module_dir_path(config, root_path=root_path))
         for config in module_generation_configs
     )
     return sorted(set(module_dir_paths))


 def _export_execution_handler(
     presets: Optional[Sequence[str]] = None,
     target_device_names: Optional[Sequence[str]] = None,
     shard_count: Optional[Dict[str, int]] = None,
     **_unused_args,
 ):
     _, all_run_configs = benchmark_collections.generate_benchmarks()
     target_device_name_set = (
         None if target_device_names is None else set(target_device_names)
     )
     grouped_run_config_map = filter_and_group_run_configs(
         all_run_configs,
         target_device_names=target_device_name_set,
         presets=None if presets is None else set(presets),
     )

     shard_count = {} if shard_count is None else shard_count
     default_shard_count = shard_count.get("default", 1)

     output_map = {}
     for device_name, run_configs in grouped_run_config_map.items():
         host_environments = set(
             run_config.target_device_spec.host_environment for run_config in run_configs
         )
         if len(host_environments) > 1:
             raise ValueError(
                 "Device specs of the same device should have the same host environment."
             )
         host_environment = host_environments.pop()

         current_shard_count = int(shard_count.get(device_name, default_shard_count))
         # This splits the `run_configs` list into `current_shard_count` sub-lists in a round-robin way.
         # Example: current_shard_count = 3; run_configs = range(10); assert(sharded_run_configs == [[0, 3, 6, 9], [1, 4, 7], [2, 5, 8]]
         sharded_run_configs = [
             run_configs[shard_idx::current_shard_count]
             for shard_idx in range(current_shard_count)
         ]

         for index, shard in enumerate(sharded_run_configs):
             distinct_module_dir_paths = _get_distinct_module_dir_paths(
                 config.module_generation_config for config in shard
             )

             serialized_run_configs = serialization.serialize_and_pack(shard)
             output_map.setdefault(
                 device_name,
                 {
                     "host_environment": dataclasses.asdict(host_environment),
                     "shards": [],
                 },
             )
             output_map[device_name]["shards"].append(
                 {
                     "index": index,
                     "module_dir_paths": distinct_module_dir_paths,
                     "run_configs": serialized_run_configs,
                 }
             )

     return output_map


 def _export_compilation_handler(
     presets: Optional[Sequence[str]] = None, **_unused_args
 ):
     all_gen_configs, _ = benchmark_collections.generate_benchmarks()

     if presets is None:
         presets = benchmark_presets.ALL_COMPILATION_PRESETS
     preset_set = set(presets)

     compile_stats_gen_configs = [
         gen_config
         for gen_config in all_gen_configs
         if preset_set.intersection(gen_config.presets)
     ]

     distinct_module_dir_paths = _get_distinct_module_dir_paths(
         compile_stats_gen_configs
     )

     return {
         "module_dir_paths": distinct_module_dir_paths,
         "generation_configs": serialization.serialize_and_pack(
             compile_stats_gen_configs
         ),
     }


 def _parse_and_strip_list_argument(arg: str) -> List[str]:
     return [part.strip() for part in arg.split(",") if part != ""]


 def _parse_benchmark_presets(arg: str, available_presets: Sequence[str]) -> List[str]:
     presets = []
     for preset in _parse_and_strip_list_argument(arg):
         if preset not in available_presets:
             raise argparse.ArgumentTypeError(
                 f"Unrecognized benchmark preset: '{preset}'."
             )
         presets.append(preset)
     return presets


 def _parse_shard_count(arg: str):
     return dict(map(str.strip, el.split("=", 1)) for el in arg.split(","))


 def _parse_arguments():
     """Parses command-line options."""

     # Makes global options come *after* command.
     # See https://stackoverflow.com/q/23296695
     subparser_base = argparse.ArgumentParser(add_help=False)
     subparser_base.add_argument(
         "--output", type=pathlib.Path, help="Path to write the JSON output."
     )

     parser = argparse.ArgumentParser(
         formatter_class=argparse.RawDescriptionHelpFormatter,
         description=textwrap.dedent(
             """
       Export type: "execution" outputs:
       [
         <target device name>: {
           host_environment: HostEnvironment,
           module_dir_paths: [<paths of dependent module directories>],
           run_configs: serialized [E2EModelRunConfig]
         },
         ...
       ]
       to be used in build_tools/benchmarks/run_benchmarks_on_*.py

       Export type: "compilation" outputs:
       {
         module_dir_paths: [<paths of dependent module directories>],
         generation_configs: serialized [ModuleGenerationConfig]
       }
       of generation configs defined for compilation statistics, to be used in
       build_tools/benchmarks/collect_compilation_statistics.py
       """
         ),
     )

     subparser = parser.add_subparsers(required=True, title="export type")
     execution_parser = subparser.add_parser(
         "execution",
         parents=[subparser_base],
         help="Export execution config to run benchmarks.",
     )
     execution_parser.set_defaults(handler=_export_execution_handler)
     execution_parser.add_argument(
         "--target_device_names",
         type=_parse_and_strip_list_argument,
         help=(
             "Target device names, separated by comma, not specified means "
             "including all devices."
         ),
     )
     execution_parser.add_argument(
         "--presets",
         "--benchmark_presets",
         type=lambda arg: _parse_benchmark_presets(
             arg, benchmark_presets.ALL_EXECUTION_PRESETS
         ),
         help=(
             "Presets that select a bundle of benchmarks, separated by comma, "
             "multiple presets will be union. Available options: "
             f"{','.join(benchmark_presets.ALL_EXECUTION_PRESETS)}"
         ),
     )
     execution_parser.add_argument(
         "--shard_count",
         type=_parse_shard_count,
         default={},
         help="Accepts a comma-separated list of device-name to shard-count mappings. Use reserved keyword 'default' for setting a default shard count: c2-standard-60=3,default=2",
     )

     compilation_parser = subparser.add_parser(
         "compilation",
         parents=[subparser_base],
         help=(
             "Export serialized list of module generation configs defined for "
             "compilation statistics."
         ),
     )
     compilation_parser.set_defaults(handler=_export_compilation_handler)
     compilation_parser.add_argument(
         "--presets",
         "--benchmark_presets",
         type=lambda arg: _parse_benchmark_presets(
             arg, benchmark_presets.ALL_COMPILATION_PRESETS
         ),
         help=(
             "Presets `comp-stats*` that select a bundle of compilation"
             " benchmarks, separated by comma, multiple presets will be union."
             " Available options: "
             f"{','.join(benchmark_presets.ALL_COMPILATION_PRESETS)}"
         ),
     )

     return parser.parse_args()


 def main(args: argparse.Namespace):
     output_obj = args.handler(**vars(args))
     json_data = json.dumps(output_obj, indent=2)
     if args.output is None:
         print(json_data)
     else:
         args.output.write_text(json_data)


 if __name__ == "__main__":
     main(_parse_arguments())
	#!/usr/bin/env python3
	# Copyright 2022 The IREE Authors
	#
	# Licensed under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	"""Exports JSON config for benchmarking and compilation statistics.

	Export type: "execution" outputs:
	[
	<target device name>: {
	host_environment: HostEnvironment,
	module_dir_paths: [<paths of dependent module directories>],
	run_configs: serialized [E2EModelRunConfig]
	},
	...
	]
	to be used in build_tools/benchmarks/run_benchmarks_on_*.py

	Export type: "compilation" outputs:
	{
	module_dir_paths: [<paths of dependent module directories>],
	generation_configs: serialized [ModuleGenerationConfig]
	}
	of generation configs defined for compilation statistics, to be used in
	build_tools/benchmarks/collect_compilation_statistics.py
	"""

	import sys
	import pathlib

	# Add build_tools python dir to the search path.
	sys.path.insert(0, str(pathlib.Path(__file__).parent.with_name("python")))

	from typing import Dict, Iterable, List, Optional, Set, Sequence
	import argparse
	import collections
	import dataclasses
	import json
	import textwrap

	from benchmark_suites.iree import benchmark_collections, benchmark_presets
	from e2e_test_artifacts import iree_artifacts
	from e2e_test_framework import serialization
	from e2e_test_framework.definitions import iree_definitions


	def filter_and_group_run_configs(
	run_configs: List[iree_definitions.E2EModelRunConfig],
	target_device_names: Optional[Set[str]] = None,
	presets: Optional[Set[str]] = None,
	) -> Dict[str, List[iree_definitions.E2EModelRunConfig]]:
	"""Filters run configs and groups by target device name.

	Args:
	run_configs: source e2e model run configs.
	target_device_names: list of target device names, includes all if not set.
	presets: set of presets, matches all if not set.

	Returns:
	A map of e2e model run configs keyed by target device name.
	"""
	grouped_run_config_map = collections.defaultdict(list)

	for run_config in run_configs:
	device_name = run_config.target_device_spec.device_name
	if target_device_names is not None and device_name not in target_device_names:
	continue
	if presets is not None and not presets.intersection(run_config.presets):
	continue
	grouped_run_config_map[device_name].append(run_config)

	return grouped_run_config_map


	def _get_distinct_module_dir_paths(
	module_generation_configs: Iterable[iree_definitions.ModuleGenerationConfig],
	root_path: pathlib.PurePath = pathlib.PurePath(),
	) -> List[str]:
	module_dir_paths = (
	str(iree_artifacts.get_module_dir_path(config, root_path=root_path))
	for config in module_generation_configs
	)
	return sorted(set(module_dir_paths))


	def _export_execution_handler(
	presets: Optional[Sequence[str]] = None,
	target_device_names: Optional[Sequence[str]] = None,
	shard_count: Optional[Dict[str, int]] = None,
	**_unused_args,
	):
	_, all_run_configs = benchmark_collections.generate_benchmarks()
	target_device_name_set = (
	None if target_device_names is None else set(target_device_names)
	)
	grouped_run_config_map = filter_and_group_run_configs(
	all_run_configs,
	target_device_names=target_device_name_set,
	presets=None if presets is None else set(presets),
	)

	shard_count = {} if shard_count is None else shard_count
	default_shard_count = shard_count.get("default", 1)

	output_map = {}
	for device_name, run_configs in grouped_run_config_map.items():
	host_environments = set(
	run_config.target_device_spec.host_environment for run_config in run_configs
	)
	if len(host_environments) > 1:
	raise ValueError(
	"Device specs of the same device should have the same host environment."
	)
	host_environment = host_environments.pop()

	current_shard_count = int(shard_count.get(device_name, default_shard_count))
	# This splits the `run_configs` list into `current_shard_count` sub-lists in a round-robin way.
	# Example: current_shard_count = 3; run_configs = range(10); assert(sharded_run_configs == [[0, 3, 6, 9], [1, 4, 7], [2, 5, 8]]
	sharded_run_configs = [
	run_configs[shard_idx::current_shard_count]
	for shard_idx in range(current_shard_count)
	]

	for index, shard in enumerate(sharded_run_configs):
	distinct_module_dir_paths = _get_distinct_module_dir_paths(
	config.module_generation_config for config in shard
	)

	serialized_run_configs = serialization.serialize_and_pack(shard)
	output_map.setdefault(
	device_name,
	{
	"host_environment": dataclasses.asdict(host_environment),
	"shards": [],
	},
	)
	output_map[device_name]["shards"].append(
	{
	"index": index,
	"module_dir_paths": distinct_module_dir_paths,
	"run_configs": serialized_run_configs,
	}
	)

	return output_map


	def _export_compilation_handler(
	presets: Optional[Sequence[str]] = None, **_unused_args
	):
	all_gen_configs, _ = benchmark_collections.generate_benchmarks()

	if presets is None:
	presets = benchmark_presets.ALL_COMPILATION_PRESETS
	preset_set = set(presets)

	compile_stats_gen_configs = [
	gen_config
	for gen_config in all_gen_configs
	if preset_set.intersection(gen_config.presets)
	]

	distinct_module_dir_paths = _get_distinct_module_dir_paths(
	compile_stats_gen_configs
	)

	return {
	"module_dir_paths": distinct_module_dir_paths,
	"generation_configs": serialization.serialize_and_pack(
	compile_stats_gen_configs
	),
	}


	def _parse_and_strip_list_argument(arg: str) -> List[str]:
	return [part.strip() for part in arg.split(",") if part != ""]


	def _parse_benchmark_presets(arg: str, available_presets: Sequence[str]) -> List[str]:
	presets = []
	for preset in _parse_and_strip_list_argument(arg):
	if preset not in available_presets:
	raise argparse.ArgumentTypeError(
	f"Unrecognized benchmark preset: '{preset}'."
	)
	presets.append(preset)
	return presets


	def _parse_shard_count(arg: str):
	return dict(map(str.strip, el.split("=", 1)) for el in arg.split(","))


	def _parse_arguments():
	"""Parses command-line options."""

	# Makes global options come after command.
	# See https://stackoverflow.com/q/23296695
	subparser_base = argparse.ArgumentParser(add_help=False)
	subparser_base.add_argument(
	"--output", type=pathlib.Path, help="Path to write the JSON output."
	)

	parser = argparse.ArgumentParser(
	formatter_class=argparse.RawDescriptionHelpFormatter,
	description=textwrap.dedent(
	"""
	Export type: "execution" outputs:
	[
	<target device name>: {
	host_environment: HostEnvironment,
	module_dir_paths: [<paths of dependent module directories>],
	run_configs: serialized [E2EModelRunConfig]
	},
	...
	]
	to be used in build_tools/benchmarks/run_benchmarks_on_*.py

	Export type: "compilation" outputs:
	{
	module_dir_paths: [<paths of dependent module directories>],
	generation_configs: serialized [ModuleGenerationConfig]
	}
	of generation configs defined for compilation statistics, to be used in
	build_tools/benchmarks/collect_compilation_statistics.py
	"""
	),
	)

	subparser = parser.add_subparsers(required=True, title="export type")
	execution_parser = subparser.add_parser(
	"execution",
	parents=[subparser_base],
	help="Export execution config to run benchmarks.",
	)
	execution_parser.set_defaults(handler=_export_execution_handler)
	execution_parser.add_argument(
	"--target_device_names",
	type=_parse_and_strip_list_argument,
	help=(
	"Target device names, separated by comma, not specified means "
	"including all devices."
	),
	)
	execution_parser.add_argument(
	"--presets",
	"--benchmark_presets",
	type=lambda arg: _parse_benchmark_presets(
	arg, benchmark_presets.ALL_EXECUTION_PRESETS
	),
	help=(
	"Presets that select a bundle of benchmarks, separated by comma, "
	"multiple presets will be union. Available options: "
	f"{','.join(benchmark_presets.ALL_EXECUTION_PRESETS)}"
	),
	)
	execution_parser.add_argument(
	"--shard_count",
	type=_parse_shard_count,
	default={},
	help="Accepts a comma-separated list of device-name to shard-count mappings. Use reserved keyword 'default' for setting a default shard count: c2-standard-60=3,default=2",
	)

	compilation_parser = subparser.add_parser(
	"compilation",
	parents=[subparser_base],
	help=(
	"Export serialized list of module generation configs defined for "
	"compilation statistics."
	),
	)
	compilation_parser.set_defaults(handler=_export_compilation_handler)
	compilation_parser.add_argument(
	"--presets",
	"--benchmark_presets",
	type=lambda arg: _parse_benchmark_presets(
	arg, benchmark_presets.ALL_COMPILATION_PRESETS
	),
	help=(
	"Presets `comp-stats*` that select a bundle of compilation"
	" benchmarks, separated by comma, multiple presets will be union."
	" Available options: "
	f"{','.join(benchmark_presets.ALL_COMPILATION_PRESETS)}"
	),
	)

	return parser.parse_args()


	def main(args: argparse.Namespace):
	output_obj = args.handler(**vars(args))
	json_data = json.dumps(output_obj, indent=2)
	if args.output is None:
	print(json_data)
	else:
	args.output.write_text(json_data)


	if __name__ == "__main__":
	main(_parse_arguments())