build_tools/benchmarks/benchmark_helper.py - 3p/openxla/iree - Git at Google

 #!/usr/bin/env python3
 # Copyright 2023 The IREE Authors
 #
 # Licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 """Miscellaneous tool to help work with benchmark suite and benchmark CI."""

 import pathlib
 import sys

 # Add build_tools python dir to the search path.
 sys.path.insert(0, str(pathlib.Path(__file__).parent.with_name("python")))

 import argparse
 import json
 import os
 import shlex
 import subprocess
 from typing import Dict, List, Optional, Sequence
 import functools

 from e2e_test_artifacts import model_artifacts, iree_artifacts
 from e2e_test_framework import serialization
 from e2e_test_framework.definitions import iree_definitions

 IREE_COMPILER_NAME = "iree-compile"


 def _convert_to_cmd_string(cmds: Sequence[str]) -> str:
     if os.name == "nt":
         # list2cmdline is an undocumented method for Windows command lines. Python
         # doesn't provide an official method for quoting Windows command lines and
         # the correct implementation is slightly non-trivial. Use the undocumented
         # method for now and can be rewritten with our own implementation later.
         # See https://learn.microsoft.com/en-us/archive/blogs/twistylittlepassagesallalike/everyone-quotes-command-line-arguments-the-wrong-way
         return subprocess.list2cmdline(cmds)

     return " ".join(shlex.quote(cmd) for cmd in cmds)


 def _dump_cmds_of_generation_config(
     gen_config: iree_definitions.ModuleGenerationConfig,
     root_path: pathlib.PurePath = pathlib.PurePath(),
 ):
     imported_model = gen_config.imported_model
     imported_model_path = iree_artifacts.get_imported_model_path(
         imported_model=imported_model, root_path=root_path
     )
     module_dir_path = iree_artifacts.get_module_dir_path(
         module_generation_config=gen_config, root_path=root_path
     )
     module_path = module_dir_path / iree_artifacts.MODULE_FILENAME
     compile_cmds = [
         IREE_COMPILER_NAME,
         str(imported_model_path),
         "-o",
         str(module_path),
     ]
     compile_cmds += gen_config.materialize_compile_flags(
         module_dir_path=module_dir_path
     )
     compile_cmd_str = _convert_to_cmd_string(compile_cmds)

     if imported_model.import_config.tool == iree_definitions.ImportTool.NONE:
         import_cmd_str = "# (Source model is already in MLIR)"
     else:
         source_model_path = model_artifacts.get_model_path(
             model=imported_model.model, root_path=root_path
         )
         import_cmds = [
             imported_model.import_config.tool.value,
             str(source_model_path),
             "-o",
             str(imported_model_path),
         ]
         import_cmds += imported_model.import_config.materialize_import_flags(
             model=imported_model.model
         )
         import_cmd_str = _convert_to_cmd_string(import_cmds)

     # Insert a blank line after each command to help read with line wrap.
     return ["Compile Module:", compile_cmd_str, "", "Import Model:", import_cmd_str, ""]


 def _dump_cmds_from_run_config(
     run_config: iree_definitions.E2EModelRunConfig,
     root_path: pathlib.PurePath = pathlib.PurePath(),
 ):
     gen_config = run_config.module_generation_config
     module_path = (
         iree_artifacts.get_module_dir_path(
             module_generation_config=gen_config, root_path=root_path
         )
         / iree_artifacts.MODULE_FILENAME
     )

     run_cmds = [run_config.tool.value, f"--module={module_path}"]
     run_cmds += run_config.materialize_run_flags()
     # Insert a blank line after the command to help read with line wrap.
     lines = ["Run Module:", _convert_to_cmd_string(run_cmds), ""]
     lines += _dump_cmds_of_generation_config(gen_config=gen_config, root_path=root_path)
     return lines


 def _dump_cmds_handler(
     e2e_test_artifacts_dir: pathlib.Path,
     execution_benchmark_config: Optional[pathlib.Path],
     compilation_benchmark_config: Optional[pathlib.Path],
     benchmark_id: Optional[str],
     **_unused_args,
 ):
     lines = []

     if execution_benchmark_config is not None:
         benchmark_groups = json.loads(execution_benchmark_config.read_text())
         for target_device, benchmark_group in benchmark_groups.items():
             shard_count = len(benchmark_group["shards"])
             for shard in benchmark_group["shards"]:
                 run_configs = serialization.unpack_and_deserialize(
                     data=shard["run_configs"],
                     root_type=List[iree_definitions.E2EModelRunConfig],
                 )
                 for run_config in run_configs:
                     if (
                         benchmark_id is not None
                         and benchmark_id != run_config.composite_id
                     ):
                         continue

                     lines.append("################")
                     lines.append("")
                     lines.append(f"Execution Benchmark ID: {run_config.composite_id}")
                     lines.append(f"Name: {run_config}")
                     lines.append(f"Target Device: {target_device}")
                     lines.append(f"Shard: {shard['index']} / {shard_count}")
                     lines.append("")
                     lines += _dump_cmds_from_run_config(
                         run_config=run_config, root_path=e2e_test_artifacts_dir
                     )

     if compilation_benchmark_config is not None:
         benchmark_config = json.loads(compilation_benchmark_config.read_text())
         gen_configs = serialization.unpack_and_deserialize(
             data=benchmark_config["generation_configs"],
             root_type=List[iree_definitions.ModuleGenerationConfig],
         )
         for gen_config in gen_configs:
             if benchmark_id is not None and benchmark_id != gen_config.composite_id:
                 continue

             lines.append("################")
             lines.append("")
             lines.append(f"Compilation Benchmark ID: {gen_config.composite_id}")
             lines.append(f"Name: {gen_config}")
             lines.append("")
             lines += _dump_cmds_of_generation_config(
                 gen_config=gen_config, root_path=e2e_test_artifacts_dir
             )

     print(*lines, sep="\n")


 # Represents a benchmark results file with the data already loaded from a JSON file.
 class JSONBackedBenchmarkData:
     def __init__(self, source_filepath: pathlib.PurePath, data: Dict):
         if not isinstance(data, dict):
             raise ValueError(
                 f"'{source_filepath}' seems not to be a valid benchmark-results-file (No JSON struct as root element)."
             )
         if "commit" not in data:
             raise ValueError(
                 f"'{source_filepath}' seems not to be a valid benchmark-results-file ('commit' field not found)."
             )
         if "benchmarks" not in data:
             raise ValueError(
                 f"'{source_filepath}' seems not to be a valid benchmark-results-file ('benchmarks' field not found)."
             )

         self.source_filepath: pathlib.PurePath = source_filepath
         self.data: Dict = data

     # Parses a JSON benchmark results file and makes some sanity checks
     @staticmethod
     def load_from_file(filepath: pathlib.Path):
         try:
             data = json.loads(filepath.read_bytes())
         except json.JSONDecodeError as e:
             raise ValueError(f"'{filepath}' seems not to be a valid JSON file: {e.msg}")
         return JSONBackedBenchmarkData(filepath, data)

     # A convenience wrapper around `loadFromFile` that accepts a sequence of paths and returns a sequence of JSONBackedBenchmarkData objects as a generator.
     @staticmethod
     def load_many_from_files(filepaths: Sequence[pathlib.Path]):
         return (
             JSONBackedBenchmarkData.load_from_file(filepath) for filepath in filepaths
         )


 # Merges the benchmark results from `right` into `left` and returns the updated `left`
 def _merge_two_resultsets(
     left: JSONBackedBenchmarkData, right: JSONBackedBenchmarkData
 ) -> JSONBackedBenchmarkData:
     if left.data["commit"] != right.data["commit"]:
         raise ValueError(
             f"'{right.source_filepath}' and the previous files are based on different commits ({left.data['commit']} != {right.data['commit']}). Merging not supported."
         )
     left.data["benchmarks"].extend(right.data["benchmarks"])
     return left


 def merge_results(benchmark_results: Sequence[JSONBackedBenchmarkData]):
     return functools.reduce(_merge_two_resultsets, benchmark_results)


 def _merge_results_handler(
     benchmark_results_files: Sequence[pathlib.Path], **_unused_args
 ):
     print(
         json.dumps(
             merge_results(
                 JSONBackedBenchmarkData.load_many_from_files(benchmark_results_files)
             )
         )
     )


 def _parse_arguments() -> argparse.Namespace:
     parser = argparse.ArgumentParser(
         description="Miscellaneous tool to help work with benchmark suite and benchmark CI."
     )

     subparser = parser.add_subparsers(
         required=True, title="operation", dest="operation"
     )
     dump_cmds_parser = subparser.add_parser(
         "dump-cmds", help="Dump the commands to compile and run benchmarks manually."
     )
     dump_cmds_parser.add_argument(
         "--e2e_test_artifacts_dir",
         type=pathlib.PurePath,
         default=pathlib.Path(),
         help="E2E test artifacts root path used in the outputs of artifact paths",
     )
     dump_cmds_parser.add_argument(
         "--benchmark_id", type=str, help="Only dump the benchmark with this id"
     )
     dump_cmds_parser.add_argument(
         "--execution_benchmark_config",
         type=pathlib.Path,
         help="Config file exported from export_benchmark_config.py execution",
     )
     dump_cmds_parser.add_argument(
         "--compilation_benchmark_config",
         type=pathlib.Path,
         help="Config file exported from export_benchmark_config.py compilation",
     )
     dump_cmds_parser.set_defaults(handler=_dump_cmds_handler)

     merge_results_parser = subparser.add_parser(
         "merge-results",
         help="Merges the results from multiple benchmark results JSON files into a single JSON structure.",
     )
     merge_results_parser.add_argument(
         "benchmark_results_files",
         type=pathlib.Path,
         nargs="+",
         help="One or more benchmark results JSON file paths",
     )
     merge_results_parser.set_defaults(handler=_merge_results_handler)

     args = parser.parse_args()
     if (
         args.operation == "dump-cmds"
         and args.execution_benchmark_config is None
         and args.compilation_benchmark_config is None
     ):
         parser.error(
             "At least one of --execution_benchmark_config or "
             "--compilation_benchmark_config must be set."
         )

     return args


 def main(args: argparse.Namespace):
     args.handler(**vars(args))


 if __name__ == "__main__":
     main(_parse_arguments())
	#!/usr/bin/env python3
	# Copyright 2023 The IREE Authors
	#
	# Licensed under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	"""Miscellaneous tool to help work with benchmark suite and benchmark CI."""

	import pathlib
	import sys

	# Add build_tools python dir to the search path.
	sys.path.insert(0, str(pathlib.Path(__file__).parent.with_name("python")))

	import argparse
	import json
	import os
	import shlex
	import subprocess
	from typing import Dict, List, Optional, Sequence
	import functools

	from e2e_test_artifacts import model_artifacts, iree_artifacts
	from e2e_test_framework import serialization
	from e2e_test_framework.definitions import iree_definitions

	IREE_COMPILER_NAME = "iree-compile"


	def _convert_to_cmd_string(cmds: Sequence[str]) -> str:
	if os.name == "nt":
	# list2cmdline is an undocumented method for Windows command lines. Python
	# doesn't provide an official method for quoting Windows command lines and
	# the correct implementation is slightly non-trivial. Use the undocumented
	# method for now and can be rewritten with our own implementation later.
	# See https://learn.microsoft.com/en-us/archive/blogs/twistylittlepassagesallalike/everyone-quotes-command-line-arguments-the-wrong-way
	return subprocess.list2cmdline(cmds)

	return " ".join(shlex.quote(cmd) for cmd in cmds)


	def _dump_cmds_of_generation_config(
	gen_config: iree_definitions.ModuleGenerationConfig,
	root_path: pathlib.PurePath = pathlib.PurePath(),
	):
	imported_model = gen_config.imported_model
	imported_model_path = iree_artifacts.get_imported_model_path(
	imported_model=imported_model, root_path=root_path
	)
	module_dir_path = iree_artifacts.get_module_dir_path(
	module_generation_config=gen_config, root_path=root_path
	)
	module_path = module_dir_path / iree_artifacts.MODULE_FILENAME
	compile_cmds = [
	IREE_COMPILER_NAME,
	str(imported_model_path),
	"-o",
	str(module_path),
	]
	compile_cmds += gen_config.materialize_compile_flags(
	module_dir_path=module_dir_path
	)
	compile_cmd_str = _convert_to_cmd_string(compile_cmds)

	if imported_model.import_config.tool == iree_definitions.ImportTool.NONE:
	import_cmd_str = "# (Source model is already in MLIR)"
	else:
	source_model_path = model_artifacts.get_model_path(
	model=imported_model.model, root_path=root_path
	)
	import_cmds = [
	imported_model.import_config.tool.value,
	str(source_model_path),
	"-o",
	str(imported_model_path),
	]
	import_cmds += imported_model.import_config.materialize_import_flags(
	model=imported_model.model
	)
	import_cmd_str = _convert_to_cmd_string(import_cmds)

	# Insert a blank line after each command to help read with line wrap.
	return ["Compile Module:", compile_cmd_str, "", "Import Model:", import_cmd_str, ""]


	def _dump_cmds_from_run_config(
	run_config: iree_definitions.E2EModelRunConfig,
	root_path: pathlib.PurePath = pathlib.PurePath(),
	):
	gen_config = run_config.module_generation_config
	module_path = (
	iree_artifacts.get_module_dir_path(
	module_generation_config=gen_config, root_path=root_path
	)
	/ iree_artifacts.MODULE_FILENAME
	)

	run_cmds = [run_config.tool.value, f"--module={module_path}"]
	run_cmds += run_config.materialize_run_flags()
	# Insert a blank line after the command to help read with line wrap.
	lines = ["Run Module:", _convert_to_cmd_string(run_cmds), ""]
	lines += _dump_cmds_of_generation_config(gen_config=gen_config, root_path=root_path)
	return lines


	def _dump_cmds_handler(
	e2e_test_artifacts_dir: pathlib.Path,
	execution_benchmark_config: Optional[pathlib.Path],
	compilation_benchmark_config: Optional[pathlib.Path],
	benchmark_id: Optional[str],
	**_unused_args,
	):
	lines = []

	if execution_benchmark_config is not None:
	benchmark_groups = json.loads(execution_benchmark_config.read_text())
	for target_device, benchmark_group in benchmark_groups.items():
	shard_count = len(benchmark_group["shards"])
	for shard in benchmark_group["shards"]:
	run_configs = serialization.unpack_and_deserialize(
	data=shard["run_configs"],
	root_type=List[iree_definitions.E2EModelRunConfig],
	)
	for run_config in run_configs:
	if (
	benchmark_id is not None
	and benchmark_id != run_config.composite_id
	):
	continue

	lines.append("################")
	lines.append("")
	lines.append(f"Execution Benchmark ID: {run_config.composite_id}")
	lines.append(f"Name: {run_config}")
	lines.append(f"Target Device: {target_device}")
	lines.append(f"Shard: {shard['index']} / {shard_count}")
	lines.append("")
	lines += _dump_cmds_from_run_config(
	run_config=run_config, root_path=e2e_test_artifacts_dir
	)

	if compilation_benchmark_config is not None:
	benchmark_config = json.loads(compilation_benchmark_config.read_text())
	gen_configs = serialization.unpack_and_deserialize(
	data=benchmark_config["generation_configs"],
	root_type=List[iree_definitions.ModuleGenerationConfig],
	)
	for gen_config in gen_configs:
	if benchmark_id is not None and benchmark_id != gen_config.composite_id:
	continue

	lines.append("################")
	lines.append("")
	lines.append(f"Compilation Benchmark ID: {gen_config.composite_id}")
	lines.append(f"Name: {gen_config}")
	lines.append("")
	lines += _dump_cmds_of_generation_config(
	gen_config=gen_config, root_path=e2e_test_artifacts_dir
	)

	print(*lines, sep="\n")


	# Represents a benchmark results file with the data already loaded from a JSON file.
	class JSONBackedBenchmarkData:
	def __init__(self, source_filepath: pathlib.PurePath, data: Dict):
	if not isinstance(data, dict):
	raise ValueError(
	f"'{source_filepath}' seems not to be a valid benchmark-results-file (No JSON struct as root element)."
	)
	if "commit" not in data:
	raise ValueError(
	f"'{source_filepath}' seems not to be a valid benchmark-results-file ('commit' field not found)."
	)
	if "benchmarks" not in data:
	raise ValueError(
	f"'{source_filepath}' seems not to be a valid benchmark-results-file ('benchmarks' field not found)."
	)

	self.source_filepath: pathlib.PurePath = source_filepath
	self.data: Dict = data

	# Parses a JSON benchmark results file and makes some sanity checks
	@staticmethod
	def load_from_file(filepath: pathlib.Path):
	try:
	data = json.loads(filepath.read_bytes())
	except json.JSONDecodeError as e:
	raise ValueError(f"'{filepath}' seems not to be a valid JSON file: {e.msg}")
	return JSONBackedBenchmarkData(filepath, data)

	# A convenience wrapper around `loadFromFile` that accepts a sequence of paths and returns a sequence of JSONBackedBenchmarkData objects as a generator.
	@staticmethod
	def load_many_from_files(filepaths: Sequence[pathlib.Path]):
	return (
	JSONBackedBenchmarkData.load_from_file(filepath) for filepath in filepaths
	)


	# Merges the benchmark results from `right` into `left` and returns the updated `left`
	def _merge_two_resultsets(
	left: JSONBackedBenchmarkData, right: JSONBackedBenchmarkData
	) -> JSONBackedBenchmarkData:
	if left.data["commit"] != right.data["commit"]:
	raise ValueError(
	f"'{right.source_filepath}' and the previous files are based on different commits ({left.data['commit']} != {right.data['commit']}). Merging not supported."
	)
	left.data["benchmarks"].extend(right.data["benchmarks"])
	return left


	def merge_results(benchmark_results: Sequence[JSONBackedBenchmarkData]):
	return functools.reduce(_merge_two_resultsets, benchmark_results)


	def _merge_results_handler(
	benchmark_results_files: Sequence[pathlib.Path], **_unused_args
	):
	print(
	json.dumps(
	merge_results(
	JSONBackedBenchmarkData.load_many_from_files(benchmark_results_files)
	)
	)
	)


	def _parse_arguments() -> argparse.Namespace:
	parser = argparse.ArgumentParser(
	description="Miscellaneous tool to help work with benchmark suite and benchmark CI."
	)

	subparser = parser.add_subparsers(
	required=True, title="operation", dest="operation"
	)
	dump_cmds_parser = subparser.add_parser(
	"dump-cmds", help="Dump the commands to compile and run benchmarks manually."
	)
	dump_cmds_parser.add_argument(
	"--e2e_test_artifacts_dir",
	type=pathlib.PurePath,
	default=pathlib.Path(),
	help="E2E test artifacts root path used in the outputs of artifact paths",
	)
	dump_cmds_parser.add_argument(
	"--benchmark_id", type=str, help="Only dump the benchmark with this id"
	)
	dump_cmds_parser.add_argument(
	"--execution_benchmark_config",
	type=pathlib.Path,
	help="Config file exported from export_benchmark_config.py execution",
	)
	dump_cmds_parser.add_argument(
	"--compilation_benchmark_config",
	type=pathlib.Path,
	help="Config file exported from export_benchmark_config.py compilation",
	)
	dump_cmds_parser.set_defaults(handler=_dump_cmds_handler)

	merge_results_parser = subparser.add_parser(
	"merge-results",
	help="Merges the results from multiple benchmark results JSON files into a single JSON structure.",
	)
	merge_results_parser.add_argument(
	"benchmark_results_files",
	type=pathlib.Path,
	nargs="+",
	help="One or more benchmark results JSON file paths",
	)
	merge_results_parser.set_defaults(handler=_merge_results_handler)

	args = parser.parse_args()
	if (
	args.operation == "dump-cmds"
	and args.execution_benchmark_config is None
	and args.compilation_benchmark_config is None
	):
	parser.error(
	"At least one of --execution_benchmark_config or "
	"--compilation_benchmark_config must be set."
	)

	return args


	def main(args: argparse.Namespace):
	args.handler(**vars(args))


	if __name__ == "__main__":
	main(_parse_arguments())