build_tools/benchmarks/generate_benchmark_comment.py - 3p/openxla/iree - Git at Google

 #!/usr/bin/env python3
 # Copyright 2021 The IREE Authors
 #
 # Licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 """Generates benchmark results as pull request comments.

 This script is meant to be used by CI and uses pip package "markdown_strings".
 """

 import sys
 import pathlib

 # Add build_tools python dir to the search path.
 sys.path.insert(0, str(pathlib.Path(__file__).parent.with_name("python")))

 import argparse
 import collections
 import dataclasses
 import json
 from typing import Any, Dict, Optional, Set, Tuple

 import markdown_strings as md
 import requests

 from common import benchmark_definition, benchmark_presentation, common_arguments
 from reporting import benchmark_comment

 GITHUB_IREE_REPO_PREFIX = "https://github.com/openxla/iree"
 IREE_DASHBOARD_URL = "https://perf.iree.dev/apis/v2"
 IREE_PROJECT_ID = "IREE"
 # The maximal numbers of trials when querying base commit benchmark results.
 MAX_BASE_COMMIT_QUERY_COUNT = 10
 # The max number of rows to show per table.
 TABLE_SIZE_CUT = 3
 THIS_DIRECTORY = pathlib.Path(__file__).resolve().parent


 @dataclasses.dataclass(frozen=True)
 class CommentDef(object):
     title: str
     type_id: str


 # Map from comment type to comment definition.
 COMMENT_DEF_MAP = {
     "android-benchmark-summary": CommentDef(
         title="Abbreviated Android Benchmark Summary",
         type_id="bf8cdf94-a992-466d-b11c-778cbd805a22",
     ),
     "linux-benchmark-summary": CommentDef(
         title="Abbreviated Linux Benchmark Summary",
         type_id="37549014-3c67-4e74-8d88-8e929231abe3",
     ),
     "benchmark-summary": CommentDef(
         title="Abbreviated Benchmark Summary",
         type_id="5b42cbfe-26a0-4164-a51c-07f06762e2dc",
     ),
 }


 def get_git_total_commit_count(commit: str, verbose: bool = False) -> int:
     """Gets the total commit count in history ending with the given commit."""
     # TODO(#11703): Should use --first-parent here. See issue for the required
     # work.
     count = benchmark_definition.execute_cmd_and_get_stdout(
         ["git", "rev-list", "--count", commit], cwd=THIS_DIRECTORY, verbose=verbose
     )
     return int(count)


 def get_from_dashboard(
     url: str, payload: Dict[str, Any], verbose: bool = False
 ) -> Dict[str, Dict[str, Any]]:
     headers = {"Content-type": "application/json"}
     data = json.dumps(payload)

     if verbose:
         print(f"API request payload: {data}")

     response = requests.get(url, data=data, headers=headers)
     code = response.status_code
     if code != 200:
         raise requests.RequestException(
             f"Failed to get from dashboard server with status code {code}"
         )

     data = response.json()
     if verbose:
         print(f"Queried base benchmark data: {data}")
     return data


 BenchmarkQueryResults = Dict[str, Dict[str, Any]]


 def query_base_benchmark_results(
     commit: str, verbose: bool = False
 ) -> BenchmarkQueryResults:
     """Queries the benchmark results for the given commit."""
     build_id = get_git_total_commit_count(commit, verbose)
     payload = {"projectId": IREE_PROJECT_ID, "buildId": build_id}
     return get_from_dashboard(
         f"{IREE_DASHBOARD_URL}/getBuild", payload, verbose=verbose
     )


 @dataclasses.dataclass(frozen=True)
 class ComparableBenchmarkResults(object):
     commit_sha: str
     benchmark_results: BenchmarkQueryResults


 def _find_comparable_benchmark_results(
     start_commit: str, required_benchmark_keys: Set[str], verbose: bool = False
 ) -> Optional[ComparableBenchmarkResults]:
     cmds = [
         "git",
         "rev-list",
         "--first-parent",
         f"--max-count={MAX_BASE_COMMIT_QUERY_COUNT}",
         start_commit,
     ]
     output = benchmark_definition.execute_cmd_and_get_stdout(
         cmds, cwd=THIS_DIRECTORY, verbose=verbose
     )
     previous_commits = output.splitlines()
     # Try to query some base benchmark to diff against, from the top of the
     # tree. Bail out if the maximal trial number is exceeded.
     for base_commit in previous_commits:
         base_benchmarks = query_base_benchmark_results(
             commit=base_commit, verbose=verbose
         )
         base_benchmark_keys = set(base_benchmarks.keys())
         if required_benchmark_keys <= base_benchmark_keys:
             return ComparableBenchmarkResults(
                 commit_sha=base_commit, benchmark_results=base_benchmarks
             )

     return None


 def _get_git_commit_hash(ref: str, verbose: bool = False) -> str:
     """Gets the commit hash for the given commit."""
     return benchmark_definition.execute_cmd_and_get_stdout(
         ["git", "rev-parse", ref], cwd=THIS_DIRECTORY, verbose=verbose
     )


 def _get_git_merge_base_commit(
     pr_commit: str, target_branch: str, verbose: bool = False
 ) -> str:
     return benchmark_definition.execute_cmd_and_get_stdout(
         args=["git", "merge-base", target_branch, pr_commit],
         cwd=THIS_DIRECTORY,
         verbose=verbose,
     )


 def _get_experimental_dt_comparison_markdown(
     execution_benchmarks: Dict[str, benchmark_presentation.AggregateBenchmarkLatency],
 ) -> Optional[str]:
     """Get the comparison table to compare different data-tiling options."""

     dt_tags = {"no-dt": "No-DT (baseline)", "dt-only": "DT-Only", "dt-uk": "DT-UK"}
     latency_map = collections.defaultdict(dict)
     for bench_id, latency in execution_benchmarks.items():
         dt_tag = next((tag for tag in dt_tags if tag in latency.name), None)
         if dt_tag is None:
             continue
         # See build_tools/python/e2e_test_framework/definitions/iree_definitions.py
         # for how benchmark names are constructed.
         # Format: model_name gen_tags exec_tags ...
         model, gen_tags, remaining = latency.name.split(" ", maxsplit=2)
         # Format: [compile targets][tags]
         compile_targets = gen_tags.split("][")[0] + "]"
         key_name = " ".join([model, compile_targets, remaining])
         latency_map[key_name][dt_tag] = (bench_id, latency.mean_time / 1e6)

     if len(latency_map) == 0:
         return None

     # Compute speedup vs. the baseline.
     table = {}
     for key_name, data in latency_map.items():
         baseline = data.get("no-dt")
         baseline = None if baseline is None else baseline[1]
         row = {}
         for dt_tag in dt_tags:
             pair = data.get(dt_tag)
             if pair is None:
                 continue
             bench_id, mean_time = pair
             text = f"{mean_time:.03f}"
             if baseline is not None:
                 text += f" ({(baseline / mean_time):.01f}X)"
             row[dt_tag] = (bench_id, text)
         table[key_name] = row

     table_columns = [["Name"] + list(table.keys())]
     for dt_tag, dt_name in dt_tags.items():
         column = [dt_name]
         for key_name, data in table.items():
             pair = data.get(dt_tag)
             if pair is None:
                 column.append("N/A")
                 continue
             bench_id, text = pair
             column.append(benchmark_presentation.make_series_link(text, bench_id))
         table_columns.append(column)

     return md.table(table_columns)


 def _get_benchmark_result_markdown(
     execution_benchmarks: Dict[str, benchmark_presentation.AggregateBenchmarkLatency],
     compilation_metrics: Dict[str, benchmark_presentation.CompilationMetrics],
     pr_url: str,
     build_url: str,
     comment_def: CommentDef,
     commit_info_md: str,
 ) -> Tuple[str, str]:
     """Gets the full/abbreviated markdown summary of all benchmarks in files."""

     pr_info = md.link("Pull request", pr_url)
     build_info = md.link("Build", build_url)

     # Compose the full benchmark tables.
     full_table = [md.header("Full Benchmark Summary", 2)]
     full_table.append(md.unordered_list([commit_info_md, pr_info, build_info]))

     # Compose the abbreviated benchmark tables.
     abbr_table = [md.header(comment_def.title, 2)]
     abbr_table.append(commit_info_md)

     # The temporary table to help compare different data-tiling options.
     dt_cmp_table = _get_experimental_dt_comparison_markdown(
         execution_benchmarks=execution_benchmarks
     )
     if dt_cmp_table is not None:
         dt_cmp_header = md.header("Data-Tiling Comparison Table", 3)
         full_table += [dt_cmp_header, dt_cmp_table]
         abbr_table += [
             dt_cmp_header,
             "<details>",
             "<summary>Click to show</summary>",
             dt_cmp_table,
             "</details>",
         ]

     if len(execution_benchmarks) > 0:
         full_table.append(
             benchmark_presentation.categorize_benchmarks_into_tables(
                 execution_benchmarks
             )
         )

         abbr_benchmarks_tables = (
             benchmark_presentation.categorize_benchmarks_into_tables(
                 execution_benchmarks, TABLE_SIZE_CUT
             )
         )
         if len(abbr_benchmarks_tables) == 0:
             abbr_table.append("No improved or regressed benchmarks 🏖️")
         else:
             abbr_table.append(abbr_benchmarks_tables)

     # Compose the full compilation metrics tables.
     if len(compilation_metrics) > 0:
         full_table.append(
             benchmark_presentation.categorize_compilation_metrics_into_tables(
                 compilation_metrics
             )
         )

         abbr_compilation_metrics_tables = (
             benchmark_presentation.categorize_compilation_metrics_into_tables(
                 compilation_metrics, TABLE_SIZE_CUT
             )
         )
         if len(abbr_compilation_metrics_tables) == 0:
             abbr_table.append("No improved or regressed compilation metrics 🏖️")
         else:
             abbr_table.append(abbr_compilation_metrics_tables)

     abbr_table.append("For more information:")
     # We don't know until a Gist is really created. Use a placeholder for now and
     # replace later.
     full_result_info = md.link(
         "Full benchmark result tables", benchmark_comment.GIST_LINK_PLACEHORDER
     )
     abbr_table.append(md.unordered_list([full_result_info, build_info]))

     # Append the unique comment type id to help identify and update the existing
     # comment.
     abbr_table.append(f"<!--Comment type id: {comment_def.type_id}-->")

     return "\n\n".join(full_table), "\n\n".join(abbr_table)


 def parse_arguments():
     """Parses command-line options."""

     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--benchmark_files",
         metavar="<benchmark-json-files>",
         default=[],
         action="append",
         help=(
             "Paths to the JSON files containing benchmark results, " "accepts wildcards"
         ),
     )
     parser.add_argument(
         "--compile_stats_files",
         metavar="<compile-stats-json-files>",
         default=[],
         action="append",
         help=(
             "Paths to the JSON files containing compilation statistics, "
             "accepts wildcards"
         ),
     )
     parser.add_argument("--pr_number", required=True, type=int, help="PR number")
     parser.add_argument(
         "--pr_committish", type=str, default="HEAD", help="PR commit hash or ref"
     )
     parser.add_argument(
         "--pr_base_branch", type=str, default=None, help="Base branch to merge the PR."
     )
     parser.add_argument(
         "--comment_type",
         required=True,
         choices=COMMENT_DEF_MAP.keys(),
         help="Type of summary comment",
     )
     parser.add_argument(
         "--build_url",
         required=True,
         type=str,
         help="CI build page url to show in the report",
     )
     parser.add_argument("--output", type=pathlib.Path, default=None)
     parser.add_argument(
         "--verbose",
         action="store_true",
         help="Print internal information during execution",
     )

     return parser.parse_args()


 def main(args):
     benchmark_files = common_arguments.expand_and_check_file_paths(args.benchmark_files)
     compile_stats_files = common_arguments.expand_and_check_file_paths(
         args.compile_stats_files
     )

     pr_commit = _get_git_commit_hash(ref=args.pr_committish, verbose=args.verbose)
     execution_benchmarks = benchmark_presentation.aggregate_all_benchmarks(
         benchmark_files=benchmark_files, expected_pr_commit=pr_commit
     )
     compilation_metrics = benchmark_presentation.collect_all_compilation_metrics(
         compile_stats_files=compile_stats_files, expected_pr_commit=pr_commit
     )

     if args.pr_base_branch is None:
         pr_base_commit = None
     else:
         pr_base_commit = _get_git_merge_base_commit(
             pr_commit=pr_commit, target_branch=args.pr_base_branch, verbose=args.verbose
         )

     if pr_base_commit is None:
         comparable_results = None
     else:
         required_benchmark_keys = set(execution_benchmarks.keys())
         for target_id in compilation_metrics:
             for mapper in benchmark_presentation.COMPILATION_METRICS_TO_TABLE_MAPPERS:
                 required_benchmark_keys.add(mapper.get_series_id(target_id))

         comparable_results = _find_comparable_benchmark_results(
             start_commit=pr_base_commit,
             required_benchmark_keys=required_benchmark_keys,
             verbose=args.verbose,
         )

     if comparable_results is None:
         comparable_commit = None
     else:
         comparable_commit = comparable_results.commit_sha
         # Update the execution benchmarks with base numbers.
         for bench in execution_benchmarks:
             base_benchmark = comparable_results.benchmark_results[bench]
             if base_benchmark["sampleUnit"] != "ns":
                 raise ValueError("Only support nanoseconds for latency sample.")
             execution_benchmarks[bench].base_mean_time = base_benchmark["sample"]

         # Update the compilation metrics with base numbers.
         for target_id, metrics in compilation_metrics.items():
             updated_metrics = metrics
             for mapper in benchmark_presentation.COMPILATION_METRICS_TO_TABLE_MAPPERS:
                 base_benchmark = comparable_results.benchmark_results[
                     mapper.get_series_id(target_id)
                 ]
                 if base_benchmark["sampleUnit"] != mapper.get_unit():
                     raise ValueError("Unit of the queried sample is mismatched.")
                 updated_metrics = mapper.update_base_value(
                     updated_metrics, base_benchmark["sample"]
                 )
             compilation_metrics[target_id] = updated_metrics

     pr_commit_link = md.link(pr_commit, f"{GITHUB_IREE_REPO_PREFIX}/commit/{pr_commit}")
     commit_info_md = f"@ commit {pr_commit_link}"
     if comparable_commit is not None:
         baseline_commit_link = md.link(
             comparable_commit, f"{GITHUB_IREE_REPO_PREFIX}/commit/{comparable_commit}"
         )
         commit_info_md += f" (vs. base {baseline_commit_link})"
     elif pr_base_commit is not None:
         commit_info_md += " (no previous benchmark results to compare)"

     comment_def = COMMENT_DEF_MAP[args.comment_type]
     full_md, abbr_md = _get_benchmark_result_markdown(
         execution_benchmarks=execution_benchmarks,
         compilation_metrics=compilation_metrics,
         pr_url=f"{GITHUB_IREE_REPO_PREFIX}/pull/{args.pr_number}",
         build_url=args.build_url,
         comment_def=comment_def,
         commit_info_md=commit_info_md,
     )

     comment_data = benchmark_comment.CommentData(
         type_id=comment_def.type_id,
         abbr_md=abbr_md,
         full_md=full_md,
         unverified_pr_number=args.pr_number,
     )
     comment_json_data = json.dumps(dataclasses.asdict(comment_data), indent=2)
     if args.output is None:
         print(comment_json_data)
     else:
         args.output.write_text(comment_json_data)


 if __name__ == "__main__":
     main(parse_arguments())
	#!/usr/bin/env python3
	# Copyright 2021 The IREE Authors
	#
	# Licensed under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	"""Generates benchmark results as pull request comments.

	This script is meant to be used by CI and uses pip package "markdown_strings".
	"""

	import sys
	import pathlib

	# Add build_tools python dir to the search path.
	sys.path.insert(0, str(pathlib.Path(__file__).parent.with_name("python")))

	import argparse
	import collections
	import dataclasses
	import json
	from typing import Any, Dict, Optional, Set, Tuple

	import markdown_strings as md
	import requests

	from common import benchmark_definition, benchmark_presentation, common_arguments
	from reporting import benchmark_comment

	GITHUB_IREE_REPO_PREFIX = "https://github.com/openxla/iree"
	IREE_DASHBOARD_URL = "https://perf.iree.dev/apis/v2"
	IREE_PROJECT_ID = "IREE"
	# The maximal numbers of trials when querying base commit benchmark results.
	MAX_BASE_COMMIT_QUERY_COUNT = 10
	# The max number of rows to show per table.
	TABLE_SIZE_CUT = 3
	THIS_DIRECTORY = pathlib.Path(__file__).resolve().parent


	@dataclasses.dataclass(frozen=True)
	class CommentDef(object):
	title: str
	type_id: str


	# Map from comment type to comment definition.
	COMMENT_DEF_MAP = {
	"android-benchmark-summary": CommentDef(
	title="Abbreviated Android Benchmark Summary",
	type_id="bf8cdf94-a992-466d-b11c-778cbd805a22",
	),
	"linux-benchmark-summary": CommentDef(
	title="Abbreviated Linux Benchmark Summary",
	type_id="37549014-3c67-4e74-8d88-8e929231abe3",
	),
	"benchmark-summary": CommentDef(
	title="Abbreviated Benchmark Summary",
	type_id="5b42cbfe-26a0-4164-a51c-07f06762e2dc",
	),
	}


	def get_git_total_commit_count(commit: str, verbose: bool = False) -> int:
	"""Gets the total commit count in history ending with the given commit."""
	# TODO(#11703): Should use --first-parent here. See issue for the required
	# work.
	count = benchmark_definition.execute_cmd_and_get_stdout(
	["git", "rev-list", "--count", commit], cwd=THIS_DIRECTORY, verbose=verbose
	)
	return int(count)


	def get_from_dashboard(
	url: str, payload: Dict[str, Any], verbose: bool = False
	) -> Dict[str, Dict[str, Any]]:
	headers = {"Content-type": "application/json"}
	data = json.dumps(payload)

	if verbose:
	print(f"API request payload: {data}")

	response = requests.get(url, data=data, headers=headers)
	code = response.status_code
	if code != 200:
	raise requests.RequestException(
	f"Failed to get from dashboard server with status code {code}"
	)

	data = response.json()
	if verbose:
	print(f"Queried base benchmark data: {data}")
	return data


	BenchmarkQueryResults = Dict[str, Dict[str, Any]]


	def query_base_benchmark_results(
	commit: str, verbose: bool = False
	) -> BenchmarkQueryResults:
	"""Queries the benchmark results for the given commit."""
	build_id = get_git_total_commit_count(commit, verbose)
	payload = {"projectId": IREE_PROJECT_ID, "buildId": build_id}
	return get_from_dashboard(
	f"{IREE_DASHBOARD_URL}/getBuild", payload, verbose=verbose
	)


	@dataclasses.dataclass(frozen=True)
	class ComparableBenchmarkResults(object):
	commit_sha: str
	benchmark_results: BenchmarkQueryResults


	def _find_comparable_benchmark_results(
	start_commit: str, required_benchmark_keys: Set[str], verbose: bool = False
	) -> Optional[ComparableBenchmarkResults]:
	cmds = [
	"git",
	"rev-list",
	"--first-parent",
	f"--max-count={MAX_BASE_COMMIT_QUERY_COUNT}",
	start_commit,
	]
	output = benchmark_definition.execute_cmd_and_get_stdout(
	cmds, cwd=THIS_DIRECTORY, verbose=verbose
	)
	previous_commits = output.splitlines()
	# Try to query some base benchmark to diff against, from the top of the
	# tree. Bail out if the maximal trial number is exceeded.
	for base_commit in previous_commits:
	base_benchmarks = query_base_benchmark_results(
	commit=base_commit, verbose=verbose
	)
	base_benchmark_keys = set(base_benchmarks.keys())
	if required_benchmark_keys <= base_benchmark_keys:
	return ComparableBenchmarkResults(
	commit_sha=base_commit, benchmark_results=base_benchmarks
	)

	return None


	def _get_git_commit_hash(ref: str, verbose: bool = False) -> str:
	"""Gets the commit hash for the given commit."""
	return benchmark_definition.execute_cmd_and_get_stdout(
	["git", "rev-parse", ref], cwd=THIS_DIRECTORY, verbose=verbose
	)


	def _get_git_merge_base_commit(
	pr_commit: str, target_branch: str, verbose: bool = False
	) -> str:
	return benchmark_definition.execute_cmd_and_get_stdout(
	args=["git", "merge-base", target_branch, pr_commit],
	cwd=THIS_DIRECTORY,
	verbose=verbose,
	)


	def _get_experimental_dt_comparison_markdown(
	execution_benchmarks: Dict[str, benchmark_presentation.AggregateBenchmarkLatency],
	) -> Optional[str]:
	"""Get the comparison table to compare different data-tiling options."""

	dt_tags = {"no-dt": "No-DT (baseline)", "dt-only": "DT-Only", "dt-uk": "DT-UK"}
	latency_map = collections.defaultdict(dict)
	for bench_id, latency in execution_benchmarks.items():
	dt_tag = next((tag for tag in dt_tags if tag in latency.name), None)
	if dt_tag is None:
	continue
	# See build_tools/python/e2e_test_framework/definitions/iree_definitions.py
	# for how benchmark names are constructed.
	# Format: model_name gen_tags exec_tags ...
	model, gen_tags, remaining = latency.name.split(" ", maxsplit=2)
	# Format: [compile targets][tags]
	compile_targets = gen_tags.split("][")[0] + "]"
	key_name = " ".join([model, compile_targets, remaining])
	latency_map[key_name][dt_tag] = (bench_id, latency.mean_time / 1e6)

	if len(latency_map) == 0:
	return None

	# Compute speedup vs. the baseline.
	table = {}
	for key_name, data in latency_map.items():
	baseline = data.get("no-dt")
	baseline = None if baseline is None else baseline[1]
	row = {}
	for dt_tag in dt_tags:
	pair = data.get(dt_tag)
	if pair is None:
	continue
	bench_id, mean_time = pair
	text = f"{mean_time:.03f}"
	if baseline is not None:
	text += f" ({(baseline / mean_time):.01f}X)"
	row[dt_tag] = (bench_id, text)
	table[key_name] = row

	table_columns = [["Name"] + list(table.keys())]
	for dt_tag, dt_name in dt_tags.items():
	column = [dt_name]
	for key_name, data in table.items():
	pair = data.get(dt_tag)
	if pair is None:
	column.append("N/A")
	continue
	bench_id, text = pair
	column.append(benchmark_presentation.make_series_link(text, bench_id))
	table_columns.append(column)

	return md.table(table_columns)


	def _get_benchmark_result_markdown(
	execution_benchmarks: Dict[str, benchmark_presentation.AggregateBenchmarkLatency],
	compilation_metrics: Dict[str, benchmark_presentation.CompilationMetrics],
	pr_url: str,
	build_url: str,
	comment_def: CommentDef,
	commit_info_md: str,
	) -> Tuple[str, str]:
	"""Gets the full/abbreviated markdown summary of all benchmarks in files."""

	pr_info = md.link("Pull request", pr_url)
	build_info = md.link("Build", build_url)

	# Compose the full benchmark tables.
	full_table = [md.header("Full Benchmark Summary", 2)]
	full_table.append(md.unordered_list([commit_info_md, pr_info, build_info]))

	# Compose the abbreviated benchmark tables.
	abbr_table = [md.header(comment_def.title, 2)]
	abbr_table.append(commit_info_md)

	# The temporary table to help compare different data-tiling options.
	dt_cmp_table = _get_experimental_dt_comparison_markdown(
	execution_benchmarks=execution_benchmarks
	)
	if dt_cmp_table is not None:
	dt_cmp_header = md.header("Data-Tiling Comparison Table", 3)
	full_table += [dt_cmp_header, dt_cmp_table]
	abbr_table += [
	dt_cmp_header,
	"<details>",
	"<summary>Click to show</summary>",
	dt_cmp_table,
	"</details>",
	]

	if len(execution_benchmarks) > 0:
	full_table.append(
	benchmark_presentation.categorize_benchmarks_into_tables(
	execution_benchmarks
	)
	)

	abbr_benchmarks_tables = (
	benchmark_presentation.categorize_benchmarks_into_tables(
	execution_benchmarks, TABLE_SIZE_CUT
	)
	)
	if len(abbr_benchmarks_tables) == 0:
	abbr_table.append("No improved or regressed benchmarks 🏖️")
	else:
	abbr_table.append(abbr_benchmarks_tables)

	# Compose the full compilation metrics tables.
	if len(compilation_metrics) > 0:
	full_table.append(
	benchmark_presentation.categorize_compilation_metrics_into_tables(
	compilation_metrics
	)
	)

	abbr_compilation_metrics_tables = (
	benchmark_presentation.categorize_compilation_metrics_into_tables(
	compilation_metrics, TABLE_SIZE_CUT
	)
	)
	if len(abbr_compilation_metrics_tables) == 0:
	abbr_table.append("No improved or regressed compilation metrics 🏖️")
	else:
	abbr_table.append(abbr_compilation_metrics_tables)

	abbr_table.append("For more information:")
	# We don't know until a Gist is really created. Use a placeholder for now and
	# replace later.
	full_result_info = md.link(
	"Full benchmark result tables", benchmark_comment.GIST_LINK_PLACEHORDER
	)
	abbr_table.append(md.unordered_list([full_result_info, build_info]))

	# Append the unique comment type id to help identify and update the existing
	# comment.
	abbr_table.append(f"<!--Comment type id: {comment_def.type_id}-->")

	return "\n\n".join(full_table), "\n\n".join(abbr_table)


	def parse_arguments():
	"""Parses command-line options."""

	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--benchmark_files",
	metavar="<benchmark-json-files>",
	default=[],
	action="append",
	help=(
	"Paths to the JSON files containing benchmark results, " "accepts wildcards"
	),
	)
	parser.add_argument(
	"--compile_stats_files",
	metavar="<compile-stats-json-files>",
	default=[],
	action="append",
	help=(
	"Paths to the JSON files containing compilation statistics, "
	"accepts wildcards"
	),
	)
	parser.add_argument("--pr_number", required=True, type=int, help="PR number")
	parser.add_argument(
	"--pr_committish", type=str, default="HEAD", help="PR commit hash or ref"
	)
	parser.add_argument(
	"--pr_base_branch", type=str, default=None, help="Base branch to merge the PR."
	)
	parser.add_argument(
	"--comment_type",
	required=True,
	choices=COMMENT_DEF_MAP.keys(),
	help="Type of summary comment",
	)
	parser.add_argument(
	"--build_url",
	required=True,
	type=str,
	help="CI build page url to show in the report",
	)
	parser.add_argument("--output", type=pathlib.Path, default=None)
	parser.add_argument(
	"--verbose",
	action="store_true",
	help="Print internal information during execution",
	)

	return parser.parse_args()


	def main(args):
	benchmark_files = common_arguments.expand_and_check_file_paths(args.benchmark_files)
	compile_stats_files = common_arguments.expand_and_check_file_paths(
	args.compile_stats_files
	)

	pr_commit = _get_git_commit_hash(ref=args.pr_committish, verbose=args.verbose)
	execution_benchmarks = benchmark_presentation.aggregate_all_benchmarks(
	benchmark_files=benchmark_files, expected_pr_commit=pr_commit
	)
	compilation_metrics = benchmark_presentation.collect_all_compilation_metrics(
	compile_stats_files=compile_stats_files, expected_pr_commit=pr_commit
	)

	if args.pr_base_branch is None:
	pr_base_commit = None
	else:
	pr_base_commit = _get_git_merge_base_commit(
	pr_commit=pr_commit, target_branch=args.pr_base_branch, verbose=args.verbose
	)

	if pr_base_commit is None:
	comparable_results = None
	else:
	required_benchmark_keys = set(execution_benchmarks.keys())
	for target_id in compilation_metrics:
	for mapper in benchmark_presentation.COMPILATION_METRICS_TO_TABLE_MAPPERS:
	required_benchmark_keys.add(mapper.get_series_id(target_id))

	comparable_results = _find_comparable_benchmark_results(
	start_commit=pr_base_commit,
	required_benchmark_keys=required_benchmark_keys,
	verbose=args.verbose,
	)

	if comparable_results is None:
	comparable_commit = None
	else:
	comparable_commit = comparable_results.commit_sha
	# Update the execution benchmarks with base numbers.
	for bench in execution_benchmarks:
	base_benchmark = comparable_results.benchmark_results[bench]
	if base_benchmark["sampleUnit"] != "ns":
	raise ValueError("Only support nanoseconds for latency sample.")
	execution_benchmarks[bench].base_mean_time = base_benchmark["sample"]

	# Update the compilation metrics with base numbers.
	for target_id, metrics in compilation_metrics.items():
	updated_metrics = metrics
	for mapper in benchmark_presentation.COMPILATION_METRICS_TO_TABLE_MAPPERS:
	base_benchmark = comparable_results.benchmark_results[
	mapper.get_series_id(target_id)
	]
	if base_benchmark["sampleUnit"] != mapper.get_unit():
	raise ValueError("Unit of the queried sample is mismatched.")
	updated_metrics = mapper.update_base_value(
	updated_metrics, base_benchmark["sample"]
	)
	compilation_metrics[target_id] = updated_metrics

	pr_commit_link = md.link(pr_commit, f"{GITHUB_IREE_REPO_PREFIX}/commit/{pr_commit}")
	commit_info_md = f"@ commit {pr_commit_link}"
	if comparable_commit is not None:
	baseline_commit_link = md.link(
	comparable_commit, f"{GITHUB_IREE_REPO_PREFIX}/commit/{comparable_commit}"
	)
	commit_info_md += f" (vs. base {baseline_commit_link})"
	elif pr_base_commit is not None:
	commit_info_md += " (no previous benchmark results to compare)"

	comment_def = COMMENT_DEF_MAP[args.comment_type]
	full_md, abbr_md = _get_benchmark_result_markdown(
	execution_benchmarks=execution_benchmarks,
	compilation_metrics=compilation_metrics,
	pr_url=f"{GITHUB_IREE_REPO_PREFIX}/pull/{args.pr_number}",
	build_url=args.build_url,
	comment_def=comment_def,
	commit_info_md=commit_info_md,
	)

	comment_data = benchmark_comment.CommentData(
	type_id=comment_def.type_id,
	abbr_md=abbr_md,
	full_md=full_md,
	unverified_pr_number=args.pr_number,
	)
	comment_json_data = json.dumps(dataclasses.asdict(comment_data), indent=2)
	if args.output is None:
	print(comment_json_data)
	else:
	args.output.write_text(comment_json_data)


	if __name__ == "__main__":
	main(parse_arguments())