build_tools/benchmarks/reporting/parse_shark_benchmarks.py - 3p/openxla/iree - Git at Google

 # Copyright 2022 The IREE Authors
 #
 # Licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 r"""Script to generate a HTML summary of SHARK Tank benchmarks.

 Example usage:

 python parse_shark_benchmarks.py \
   --cpu_shark_csv=icelake_shark_bench_results.csv \
   --cpu_iree_csv=icelake_iree_bench_results.csv \
   --cpu_baseline_csv=cpu_baseline.csv \
   --gpu_shark_csv=a100_shark_bench_results.csv \
   --gpu_iree_csv=a100_iree_bench_results.csv \
   --gpu_baseline_csv=a100_baseline.csv \
   --version_info iree=version_info.txt \
   --output_path=/tmp/summary.html
 """

 import argparse
 import pandas as pd
 import pathlib
 import sys

 from datetime import date

 # Add build_tools python dir to the search path.
 sys.path.insert(0, str(pathlib.Path(__file__).parent.parent.parent / "python"))
 from reporting.common import html_utils

 # Column headers.
 _MODEL = "model"
 _DIALECT = "dialect"
 _DATA_TYPE = "data_type"
 _BASELINE = "baseline"
 _DEVICE = "device"
 _BASELINE_LATENCY = "baseline latency (ms)"
 _IREE_LATENCY = "IREE latency (ms)"
 _SHARK_LATENCY = "SHARK latency (ms)"
 _IREE_VS_BASELINE = "IREE vs baseline"
 _SHARK_VS_BASELINE = "SHARK vs baseline"
 _IREE_VS_SHARK = "IREE vs SHARK"

 _PERF_COLUMNS = [_IREE_VS_BASELINE, _SHARK_VS_BASELINE, _IREE_VS_SHARK]
 _LATENCY_COLUMNS = [_BASELINE_LATENCY, _IREE_LATENCY, _SHARK_LATENCY]


 def _generate_table(df_iree, df_shark, df_baseline, title):
   """Generates a table comparing latencies between IREE, SHARK and a baseline."""
   summary = pd.DataFrame(columns=[
       _MODEL, _BASELINE, _DATA_TYPE, _DIALECT, _DEVICE, _BASELINE_LATENCY,
       _IREE_LATENCY, _SHARK_LATENCY, _IREE_VS_BASELINE, _SHARK_VS_BASELINE,
       _IREE_VS_SHARK
   ])

   models = df_iree.model.unique()
   for model in models:
     iree_results_per_model = df_iree.loc[df_iree.model == model]
     dialects = iree_results_per_model.dialect.unique()
     for dialect in dialects:
       iree_results_per_dialect = iree_results_per_model.loc[
           iree_results_per_model.dialect == dialect]
       data_types = iree_results_per_dialect.data_type.unique()
       for data_type in data_types:
         iree_results_per_datatype = iree_results_per_dialect.loc[
             iree_results_per_dialect.data_type == data_type]
         device_types = iree_results_per_datatype.device.unique()
         for device in device_types:
           iree_results = iree_results_per_datatype.loc[
               iree_results_per_datatype.device == device]
           if len(iree_results) != 3:
             print(f"Warning! Expected number of results to be 3. Got"
                   f" {len(iree_results)}")
             print(iree_results)
             continue

           baseline_results = df_baseline.loc[(df_baseline.model == model) &
                                              (df_baseline.dialect == dialect) &
                                              (df_baseline.data_type
                                               == data_type) &
                                              (df_baseline.device == device)]

           if baseline_results.empty:
             # We use snapshots of latencies for baseline. If it is a new
             # benchmark that is not included in the snapshot yet, emit a
             # warning.
             print(
                 f"Warning: No baseline results found for {model}, {dialect},"
                 f" {data_type}, {device}. Using IREE version as baseline. Please"
                 f" update baseline csv.")
             engine = iree_results.engine.iloc[0]
             baseline_latency = iree_results.loc[iree_results.engine == engine]
             baseline_latency = baseline_latency.iloc[0]["ms/iter"]
           else:
             engine = baseline_results.engine.iloc[0]
             baseline_latency = baseline_results.loc[baseline_results.engine ==
                                                     engine]
             baseline_latency = baseline_latency.iloc[0]["ms/iter"]

           iree_latency = iree_results.loc[iree_results.engine == "shark_iree_c"]
           iree_latency = iree_latency.iloc[0]["ms/iter"]
           iree_vs_baseline = html_utils.format_latency_comparison(
               iree_latency, baseline_latency)

           if df_shark is not None:
             shark_results = df_shark.loc[(df_shark.model == model) &
                                          (df_shark.dialect == dialect) &
                                          (df_shark.data_type == data_type) &
                                          (df_shark.device == device)]
             if shark_results.empty:
               print(
                   f"Warning: No SHARK results for {model}, {dialect}, {data_type}, {device}."
               )
               continue

             shark_latency = shark_results.loc[shark_results.engine ==
                                               "shark_iree_c"]
             shark_latency = shark_latency.iloc[0]["ms/iter"]
             shark_vs_baseline = html_utils.format_latency_comparison(
                 shark_latency, baseline_latency)
             iree_vs_shark = html_utils.format_latency_comparison(
                 iree_latency, shark_latency)
           else:
             # If there are no SHARK benchmarks available, use default values.
             # These columns will be hidden later.
             shark_latency = 0
             shark_vs_baseline = "<missing_comparison>"
             iree_vs_shark = "<missing_comparison>"

           summary.loc[len(summary)] = [
               model, engine, data_type, dialect, device,
               f"{baseline_latency:.1f}", f"{iree_latency:.1f}",
               f"{shark_latency:.1f}", iree_vs_baseline, shark_vs_baseline,
               iree_vs_shark
           ]

   summary = summary.round(2)

   st = summary.style.set_table_styles(html_utils.get_table_css())
   st = st.hide(axis="index")
   if df_shark is None:
     st = st.hide_columns(
         subset=[_SHARK_LATENCY, _SHARK_VS_BASELINE, _IREE_VS_SHARK])
   st = st.set_caption(title)
   st = st.applymap(html_utils.style_performance, subset=_PERF_COLUMNS)
   st = st.set_properties(subset=[_MODEL],
                          **{
                              "width": "300px",
                              "text-align": "left",
                          })
   st = st.set_properties(subset=[_BASELINE],
                          **{
                              "width": "140",
                              "text-align": "center",
                          })
   st = st.set_properties(subset=[_DIALECT, _DATA_TYPE, _DEVICE],
                          **{
                              "width": "100",
                              "text-align": "center",
                          })
   st = st.set_properties(subset=_LATENCY_COLUMNS,
                          **{
                              "width": "100",
                              "text-align": "right",
                          })
   st = st.set_properties(subset=_PERF_COLUMNS,
                          **{
                              "width": "150px",
                              "text-align": "right",
                              "color": "#ffffff"
                          })

   return st.to_html() + "<br/>"


 def generate_table(iree_csv,
                    baseline_csv,
                    shark_csv=None,
                    shape_type="static",
                    device="cpu",
                    title="Benchmarks"):
   """Generates a table comparing latencies between IREE, SHARK and a baseline.

   Args:
     iree_csv: Path to the csv file containing IREE latencies.
     baseline_csv: Path to the csv file containing baseline latencies.
     shark_csv: Path to the csv file containing SHARK-Runtime latencies. This is optional.
     shape_type: Currently either `static` or `dynamic`.
     device: Device used to run the benchmarks.
     title: The title of the generated table.

   Returns:
     An HTML string containing the summarized report.
   """
   shark_df = None
   if shark_csv is not None:
     shark_df = pd.read_csv(shark_csv)
     shark_df = shark_df.loc[(shark_df.shape_type == shape_type) &
                             (shark_df.device == device)]

   iree_df = pd.read_csv(iree_csv)
   iree_df = iree_df.loc[(iree_df.shape_type == shape_type) &
                         (iree_df.device == device)]

   baseline_df = pd.read_csv(baseline_csv)
   baseline_df = baseline_df.loc[(baseline_df.shape_type == shape_type) &
                                 (baseline_df.device == device)]

   return _generate_table(iree_df, shark_df, baseline_df, title)


 def main(args):
   """Summarizes benchmark results generated by the SHARK Tank."""
   version_html = f"<i>last updated: {date.today().isoformat()}</i><br/><br/>"
   version_html += "<i><b>Version Info</b></i><br/>"
   with open(args.version_info) as f:
     version_info = dict(l.strip().split("=", 1) for l in f)
   for key, value in version_info.items():
     version_html += f"<i>{key}: {value}</i><br/>"
   version_html += "<br/>"

   html = html_utils.generate_header_and_legend(version_html)

   # Generate Server CPU Static.
   if args.cpu_iree_csv is not None:
     html += generate_table(args.cpu_iree_csv,
                            args.cpu_baseline_csv,
                            shark_csv=args.cpu_shark_csv,
                            shape_type="static",
                            device="cpu",
                            title="Server Intel Ice Lake CPU (Static Shapes)")

   # Generate Server GPU Static.
   if args.gpu_iree_csv is not None:
     html += generate_table(args.gpu_iree_csv,
                            args.gpu_baseline_csv,
                            shark_csv=args.gpu_shark_csv,
                            shape_type="static",
                            device="cuda",
                            title="Server NVIDIA Tesla A100 GPU (Static Shapes)")

   # Generate Server CPU Dynamic.
   if args.cpu_iree_csv is not None:
     html += generate_table(args.cpu_iree_csv,
                            args.cpu_baseline_csv,
                            shark_csv=args.cpu_shark_csv,
                            shape_type="dynamic",
                            device="cpu",
                            title="Server Intel Ice Lake CPU (Dynamic Shapes)")

   # Generate Server GPU Dynamic.
   if args.gpu_iree_csv is not None:
     html += generate_table(
         args.gpu_iree_csv,
         args.gpu_baseline_csv,
         shark_csv=args.gpu_shark_csv,
         shape_type="dynamic",
         device="cuda",
         title="Server NVIDIA Tesla A100 GPU (Dynamic Shapes)")

   args.output_path.write_text(html)


 def parse_args():
   parser = argparse.ArgumentParser()
   parser.add_argument(
       "--cpu_shark_csv",
       type=str,
       default=None,
       help="The path to the csv file with CPU benchmarking results from the "
       "SHARK runtime.")
   parser.add_argument(
       "--cpu_iree_csv",
       type=str,
       default=None,
       help="The path to the csv file with CPU benchmarking results from IREE.")
   parser.add_argument(
       "--cpu_baseline_csv",
       type=str,
       default="data/icelake_baseline_2022-09-19.csv",
       help="The path to the csv file containing baseline CPU results.")
   parser.add_argument(
       "--gpu_shark_csv",
       type=str,
       default=None,
       help="The path to the csv file with GPU benchmarking results from the "
       "SHARK runtime.")
   parser.add_argument(
       "--gpu_iree_csv",
       type=str,
       default=None,
       help="The path to the csv file with CPU benchmarking results from IREE.")
   parser.add_argument(
       "--gpu_baseline_csv",
       type=str,
       default="data/a100_baseline_2022-09-19.csv",
       help="The path to the csv file containing baseline GPU results.")
   parser.add_argument(
       "--version_info",
       type=pathlib.Path,
       default=None,
       help=
       "The path to a text file containing version information of the frameworks tested."
   )
   parser.add_argument(
       "--output_path",
       type=pathlib.Path,
       default="/tmp/summary.html",
       help="The path to the output html file that summarizes results.")
   return parser.parse_args()


 if __name__ == "__main__":
   main(parse_args())
	# Copyright 2022 The IREE Authors
	#
	# Licensed under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	r"""Script to generate a HTML summary of SHARK Tank benchmarks.

	Example usage:

	python parse_shark_benchmarks.py \
	--cpu_shark_csv=icelake_shark_bench_results.csv \
	--cpu_iree_csv=icelake_iree_bench_results.csv \
	--cpu_baseline_csv=cpu_baseline.csv \
	--gpu_shark_csv=a100_shark_bench_results.csv \
	--gpu_iree_csv=a100_iree_bench_results.csv \
	--gpu_baseline_csv=a100_baseline.csv \
	--version_info iree=version_info.txt \
	--output_path=/tmp/summary.html
	"""

	import argparse
	import pandas as pd
	import pathlib
	import sys

	from datetime import date

	# Add build_tools python dir to the search path.
	sys.path.insert(0, str(pathlib.Path(__file__).parent.parent.parent / "python"))
	from reporting.common import html_utils

	# Column headers.
	_MODEL = "model"
	_DIALECT = "dialect"
	_DATA_TYPE = "data_type"
	_BASELINE = "baseline"
	_DEVICE = "device"
	_BASELINE_LATENCY = "baseline latency (ms)"
	_IREE_LATENCY = "IREE latency (ms)"
	_SHARK_LATENCY = "SHARK latency (ms)"
	_IREE_VS_BASELINE = "IREE vs baseline"
	_SHARK_VS_BASELINE = "SHARK vs baseline"
	_IREE_VS_SHARK = "IREE vs SHARK"

	_PERF_COLUMNS = [_IREE_VS_BASELINE, _SHARK_VS_BASELINE, _IREE_VS_SHARK]
	_LATENCY_COLUMNS = [_BASELINE_LATENCY, _IREE_LATENCY, _SHARK_LATENCY]


	def _generate_table(df_iree, df_shark, df_baseline, title):
	"""Generates a table comparing latencies between IREE, SHARK and a baseline."""
	summary = pd.DataFrame(columns=[
	_MODEL, _BASELINE, _DATA_TYPE, _DIALECT, _DEVICE, _BASELINE_LATENCY,
	_IREE_LATENCY, _SHARK_LATENCY, _IREE_VS_BASELINE, _SHARK_VS_BASELINE,
	_IREE_VS_SHARK
	])

	models = df_iree.model.unique()
	for model in models:
	iree_results_per_model = df_iree.loc[df_iree.model == model]
	dialects = iree_results_per_model.dialect.unique()
	for dialect in dialects:
	iree_results_per_dialect = iree_results_per_model.loc[
	iree_results_per_model.dialect == dialect]
	data_types = iree_results_per_dialect.data_type.unique()
	for data_type in data_types:
	iree_results_per_datatype = iree_results_per_dialect.loc[
	iree_results_per_dialect.data_type == data_type]
	device_types = iree_results_per_datatype.device.unique()
	for device in device_types:
	iree_results = iree_results_per_datatype.loc[
	iree_results_per_datatype.device == device]
	if len(iree_results) != 3:
	print(f"Warning! Expected number of results to be 3. Got"
	f" {len(iree_results)}")
	print(iree_results)
	continue

	baseline_results = df_baseline.loc[(df_baseline.model == model) &
	(df_baseline.dialect == dialect) &
	(df_baseline.data_type
	== data_type) &
	(df_baseline.device == device)]

	if baseline_results.empty:
	# We use snapshots of latencies for baseline. If it is a new
	# benchmark that is not included in the snapshot yet, emit a
	# warning.
	print(
	f"Warning: No baseline results found for {model}, {dialect},"
	f" {data_type}, {device}. Using IREE version as baseline. Please"
	f" update baseline csv.")
	engine = iree_results.engine.iloc[0]
	baseline_latency = iree_results.loc[iree_results.engine == engine]
	baseline_latency = baseline_latency.iloc[0]["ms/iter"]
	else:
	engine = baseline_results.engine.iloc[0]
	baseline_latency = baseline_results.loc[baseline_results.engine ==
	engine]
	baseline_latency = baseline_latency.iloc[0]["ms/iter"]

	iree_latency = iree_results.loc[iree_results.engine == "shark_iree_c"]
	iree_latency = iree_latency.iloc[0]["ms/iter"]
	iree_vs_baseline = html_utils.format_latency_comparison(
	iree_latency, baseline_latency)

	if df_shark is not None:
	shark_results = df_shark.loc[(df_shark.model == model) &
	(df_shark.dialect == dialect) &
	(df_shark.data_type == data_type) &
	(df_shark.device == device)]
	if shark_results.empty:
	print(
	f"Warning: No SHARK results for {model}, {dialect}, {data_type}, {device}."
	)
	continue

	shark_latency = shark_results.loc[shark_results.engine ==
	"shark_iree_c"]
	shark_latency = shark_latency.iloc[0]["ms/iter"]
	shark_vs_baseline = html_utils.format_latency_comparison(
	shark_latency, baseline_latency)
	iree_vs_shark = html_utils.format_latency_comparison(
	iree_latency, shark_latency)
	else:
	# If there are no SHARK benchmarks available, use default values.
	# These columns will be hidden later.
	shark_latency = 0
	shark_vs_baseline = "<missing_comparison>"
	iree_vs_shark = "<missing_comparison>"

	summary.loc[len(summary)] = [
	model, engine, data_type, dialect, device,
	f"{baseline_latency:.1f}", f"{iree_latency:.1f}",
	f"{shark_latency:.1f}", iree_vs_baseline, shark_vs_baseline,
	iree_vs_shark
	]

	summary = summary.round(2)

	st = summary.style.set_table_styles(html_utils.get_table_css())
	st = st.hide(axis="index")
	if df_shark is None:
	st = st.hide_columns(
	subset=[_SHARK_LATENCY, _SHARK_VS_BASELINE, _IREE_VS_SHARK])
	st = st.set_caption(title)
	st = st.applymap(html_utils.style_performance, subset=_PERF_COLUMNS)
	st = st.set_properties(subset=[_MODEL],
	**{
	"width": "300px",
	"text-align": "left",
	})
	st = st.set_properties(subset=[_BASELINE],
	**{
	"width": "140",
	"text-align": "center",
	})
	st = st.set_properties(subset=[_DIALECT, _DATA_TYPE, _DEVICE],
	**{
	"width": "100",
	"text-align": "center",
	})
	st = st.set_properties(subset=_LATENCY_COLUMNS,
	**{
	"width": "100",
	"text-align": "right",
	})
	st = st.set_properties(subset=_PERF_COLUMNS,
	**{
	"width": "150px",
	"text-align": "right",
	"color": "#ffffff"
	})

	return st.to_html() + "<br/>"


	def generate_table(iree_csv,
	baseline_csv,
	shark_csv=None,
	shape_type="static",
	device="cpu",
	title="Benchmarks"):
	"""Generates a table comparing latencies between IREE, SHARK and a baseline.

	Args:
	iree_csv: Path to the csv file containing IREE latencies.
	baseline_csv: Path to the csv file containing baseline latencies.
	shark_csv: Path to the csv file containing SHARK-Runtime latencies. This is optional.
	shape_type: Currently either `static` or `dynamic`.
	device: Device used to run the benchmarks.
	title: The title of the generated table.

	Returns:
	An HTML string containing the summarized report.
	"""
	shark_df = None
	if shark_csv is not None:
	shark_df = pd.read_csv(shark_csv)
	shark_df = shark_df.loc[(shark_df.shape_type == shape_type) &
	(shark_df.device == device)]

	iree_df = pd.read_csv(iree_csv)
	iree_df = iree_df.loc[(iree_df.shape_type == shape_type) &
	(iree_df.device == device)]

	baseline_df = pd.read_csv(baseline_csv)
	baseline_df = baseline_df.loc[(baseline_df.shape_type == shape_type) &
	(baseline_df.device == device)]

	return _generate_table(iree_df, shark_df, baseline_df, title)


	def main(args):
	"""Summarizes benchmark results generated by the SHARK Tank."""
	version_html = f"<i>last updated: {date.today().isoformat()}</i><br/><br/>"
	version_html += "<i><b>Version Info</b></i><br/>"
	with open(args.version_info) as f:
	version_info = dict(l.strip().split("=", 1) for l in f)
	for key, value in version_info.items():
	version_html += f"<i>{key}: {value}</i><br/>"
	version_html += "<br/>"

	html = html_utils.generate_header_and_legend(version_html)

	# Generate Server CPU Static.
	if args.cpu_iree_csv is not None:
	html += generate_table(args.cpu_iree_csv,
	args.cpu_baseline_csv,
	shark_csv=args.cpu_shark_csv,
	shape_type="static",
	device="cpu",
	title="Server Intel Ice Lake CPU (Static Shapes)")

	# Generate Server GPU Static.
	if args.gpu_iree_csv is not None:
	html += generate_table(args.gpu_iree_csv,
	args.gpu_baseline_csv,
	shark_csv=args.gpu_shark_csv,
	shape_type="static",
	device="cuda",
	title="Server NVIDIA Tesla A100 GPU (Static Shapes)")

	# Generate Server CPU Dynamic.
	if args.cpu_iree_csv is not None:
	html += generate_table(args.cpu_iree_csv,
	args.cpu_baseline_csv,
	shark_csv=args.cpu_shark_csv,
	shape_type="dynamic",
	device="cpu",
	title="Server Intel Ice Lake CPU (Dynamic Shapes)")

	# Generate Server GPU Dynamic.
	if args.gpu_iree_csv is not None:
	html += generate_table(
	args.gpu_iree_csv,
	args.gpu_baseline_csv,
	shark_csv=args.gpu_shark_csv,
	shape_type="dynamic",
	device="cuda",
	title="Server NVIDIA Tesla A100 GPU (Dynamic Shapes)")

	args.output_path.write_text(html)


	def parse_args():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--cpu_shark_csv",
	type=str,
	default=None,
	help="The path to the csv file with CPU benchmarking results from the "
	"SHARK runtime.")
	parser.add_argument(
	"--cpu_iree_csv",
	type=str,
	default=None,
	help="The path to the csv file with CPU benchmarking results from IREE.")
	parser.add_argument(
	"--cpu_baseline_csv",
	type=str,
	default="data/icelake_baseline_2022-09-19.csv",
	help="The path to the csv file containing baseline CPU results.")
	parser.add_argument(
	"--gpu_shark_csv",
	type=str,
	default=None,
	help="The path to the csv file with GPU benchmarking results from the "
	"SHARK runtime.")
	parser.add_argument(
	"--gpu_iree_csv",
	type=str,
	default=None,
	help="The path to the csv file with CPU benchmarking results from IREE.")
	parser.add_argument(
	"--gpu_baseline_csv",
	type=str,
	default="data/a100_baseline_2022-09-19.csv",
	help="The path to the csv file containing baseline GPU results.")
	parser.add_argument(
	"--version_info",
	type=pathlib.Path,
	default=None,
	help=
	"The path to a text file containing version information of the frameworks tested."
	)
	parser.add_argument(
	"--output_path",
	type=pathlib.Path,
	default="/tmp/summary.html",
	help="The path to the output html file that summarizes results.")
	return parser.parse_args()


	if __name__ == "__main__":
	main(parse_args())