blob: 97fd5cdffaaea93d8f0900c008ba1a35b20ec618 [file] [log] [blame]
# Copyright 2022 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
r"""Script to generate a HTML summary of SHARK Tank benchmarks.
Example usage:
python parse_shark_benchmarks.py \
--cpu_shark_csv=icelake_shark_bench_results.csv \
--cpu_iree_csv=icelake_iree_bench_results.csv \
--cpu_baseline_csv=cpu_baseline.csv \
--gpu_shark_csv=a100_shark_bench_results.csv \
--gpu_iree_csv=a100_iree_bench_results.csv \
--gpu_baseline_csv=a100_baseline.csv \
--version_info iree=version_info.txt \
--output_path=/tmp/summary.html
"""
import argparse
import pandas as pd
import pathlib
import sys
from datetime import date
# Add build_tools python dir to the search path.
sys.path.insert(0, str(pathlib.Path(__file__).parent.parent.parent / "python"))
from reporting.common import html_utils
# Column headers.
_MODEL = "model"
_DIALECT = "dialect"
_DATA_TYPE = "data_type"
_BASELINE = "baseline"
_DEVICE = "device"
_BASELINE_LATENCY = "baseline latency (ms)"
_IREE_LATENCY = "IREE latency (ms)"
_SHARK_LATENCY = "SHARK latency (ms)"
_IREE_VS_BASELINE = "IREE vs baseline"
_SHARK_VS_BASELINE = "SHARK vs baseline"
_IREE_VS_SHARK = "IREE vs SHARK"
_BASELINE_MEMORY = "Baseline Peak Device Memory (MB)"
_IREE_MEMORY = "IREE Peak Device Memory (MB)"
_SHARK_MEMORY = "SHARK Peak Device Memory (MB)"
_PERF_COLUMNS = [_IREE_VS_BASELINE, _SHARK_VS_BASELINE, _IREE_VS_SHARK]
_LATENCY_COLUMNS = [_BASELINE_LATENCY, _IREE_LATENCY, _SHARK_LATENCY]
_MEMORY_COLUMNS = [_BASELINE_MEMORY, _IREE_MEMORY, _SHARK_MEMORY]
def _generate_table(df_iree, df_shark, df_baseline, title):
"""Generates a table comparing latencies between IREE, SHARK and a baseline."""
summary = pd.DataFrame(
columns=[
_MODEL,
_BASELINE,
_DATA_TYPE,
_DIALECT,
_DEVICE,
_BASELINE_LATENCY,
_IREE_LATENCY,
_SHARK_LATENCY,
_IREE_VS_BASELINE,
_SHARK_VS_BASELINE,
_IREE_VS_SHARK,
_BASELINE_MEMORY,
_IREE_MEMORY,
_SHARK_MEMORY,
]
)
models = df_iree.model.unique()
for model in models:
iree_results_per_model = df_iree.loc[df_iree.model == model]
dialects = iree_results_per_model.dialect.unique()
for dialect in dialects:
iree_results_per_dialect = iree_results_per_model.loc[
iree_results_per_model.dialect == dialect
]
data_types = iree_results_per_dialect.data_type.unique()
for data_type in data_types:
iree_results_per_datatype = iree_results_per_dialect.loc[
iree_results_per_dialect.data_type == data_type
]
device_types = iree_results_per_datatype.device.unique()
for device in device_types:
iree_results = iree_results_per_datatype.loc[
iree_results_per_datatype.device == device
]
if len(iree_results) != 3:
print(
f"Warning! Expected number of results to be 3. Got"
f" {len(iree_results)}"
)
print(iree_results)
continue
baseline_results = df_baseline.loc[
(df_baseline.model == model)
& (df_baseline.dialect == dialect)
& (df_baseline.data_type == data_type)
& (df_baseline.device == device)
]
if baseline_results.empty:
# We use snapshots of latencies for baseline. If it is a new
# benchmark that is not included in the snapshot yet, emit a
# warning.
print(
f"Warning: No baseline results found for {model}, {dialect},"
f" {data_type}, {device}. Using IREE version as baseline. Please"
f" update baseline csv."
)
engine = iree_results.engine.iloc[0]
baseline_df = iree_results.loc[iree_results.engine == engine]
baseline_latency = baseline_df.iloc[0]["ms/iter"]
baseline_device_mb = baseline_df.iloc[0]["device_memory_mb"]
else:
engine = baseline_results.engine.iloc[0]
baseline_df = baseline_results.loc[
baseline_results.engine == engine
]
baseline_latency = baseline_df.iloc[0]["ms/iter"]
baseline_device_mb = baseline_df.iloc[0]["device_memory_mb"]
iree_df = iree_results.loc[iree_results.engine == "shark_iree_c"]
iree_latency = iree_df.iloc[0]["ms/iter"]
iree_device_mb = iree_df.iloc[0]["device_memory_mb"]
iree_vs_baseline = html_utils.format_latency_comparison(
iree_latency, baseline_latency
)
if df_shark is not None:
shark_results = df_shark.loc[
(df_shark.model == model)
& (df_shark.dialect == dialect)
& (df_shark.data_type == data_type)
& (df_shark.device == device)
]
if shark_results.empty:
print(
f"Warning: No SHARK results for {model}, {dialect}, {data_type}, {device}."
)
continue
shark_df = shark_results.loc[
shark_results.engine == "shark_iree_c"
]
shark_latency = shark_df.iloc[0]["ms/iter"]
shark_device_mb = shark_df.iloc[0]["device_memory_mb"]
shark_vs_baseline = html_utils.format_latency_comparison(
shark_latency, baseline_latency
)
iree_vs_shark = html_utils.format_latency_comparison(
iree_latency, shark_latency
)
else:
# If there are no SHARK benchmarks available, use default values.
# These columns will be hidden later.
shark_latency = 0
shark_vs_baseline = "<missing_comparison>"
iree_vs_shark = "<missing_comparison>"
summary.loc[len(summary)] = [
model,
engine,
data_type,
dialect,
device,
f"{baseline_latency:.1f}",
f"{iree_latency:.1f}",
f"{shark_latency:.1f}",
iree_vs_baseline,
shark_vs_baseline,
iree_vs_shark,
f"{baseline_device_mb:.3f}",
f"{iree_device_mb:.3f}",
f"{shark_device_mb:.3f}",
]
summary = summary.round(2)
st = summary.style.set_table_styles(html_utils.get_table_css())
st = st.hide(axis="index")
if df_shark is None:
st = st.hide_columns(
subset=[_SHARK_LATENCY, _SHARK_VS_BASELINE, _IREE_VS_SHARK]
)
st = st.set_caption(title)
st = st.applymap(html_utils.style_performance, subset=_PERF_COLUMNS)
st = st.set_properties(
subset=[_MODEL],
**{
"width": "300px",
"text-align": "left",
},
)
st = st.set_properties(
subset=[_BASELINE],
**{
"width": "140",
"text-align": "center",
},
)
st = st.set_properties(
subset=[_DIALECT, _DATA_TYPE, _DEVICE],
**{
"width": "100",
"text-align": "center",
},
)
st = st.set_properties(
subset=_LATENCY_COLUMNS,
**{
"width": "100",
"text-align": "right",
},
)
st = st.set_properties(
subset=_PERF_COLUMNS,
**{"width": "150px", "text-align": "right", "color": "#ffffff"},
)
st = st.set_properties(
subset=_MEMORY_COLUMNS,
**{
"width": "100",
"text-align": "right",
},
)
return st.to_html() + "<br/>"
def generate_table(
iree_csv,
baseline_csv,
shark_csv=None,
shape_type="static",
device="cpu",
title="Benchmarks",
):
"""Generates a table comparing latencies between IREE, SHARK and a baseline.
Args:
iree_csv: Path to the csv file containing IREE latencies.
baseline_csv: Path to the csv file containing baseline latencies.
shark_csv: Path to the csv file containing SHARK-Runtime latencies. This is optional.
shape_type: Currently either `static` or `dynamic`.
device: Device used to run the benchmarks.
title: The title of the generated table.
Returns:
An HTML string containing the summarized report.
"""
shark_df = None
if shark_csv is not None:
shark_df = pd.read_csv(shark_csv)
shark_df = shark_df.loc[
(shark_df.shape_type == shape_type) & (shark_df.device == device)
]
iree_df = pd.read_csv(iree_csv)
iree_df = iree_df.loc[
(iree_df.shape_type == shape_type) & (iree_df.device == device)
]
baseline_df = pd.read_csv(baseline_csv)
baseline_df = baseline_df.loc[
(baseline_df.shape_type == shape_type) & (baseline_df.device == device)
]
return _generate_table(iree_df, shark_df, baseline_df, title)
def main(args):
"""Summarizes benchmark results generated by the SHARK Tank."""
version_html = f"<i>last updated: {date.today().isoformat()}</i><br/><br/>"
version_html += "<i><b>Version Info</b></i><br/>"
with open(args.version_info) as f:
version_info = dict(l.strip().split("=", 1) for l in f)
for key, value in version_info.items():
version_html += f"<i>{key}: {value}</i><br/>"
version_html += "<br/>"
html = html_utils.generate_header_and_legend(version_html)
# Generate Server CPU Static.
if args.cpu_iree_csv is not None:
html += generate_table(
args.cpu_iree_csv,
args.cpu_baseline_csv,
shark_csv=args.cpu_shark_csv,
shape_type="static",
device="cpu",
title="Server Intel Ice Lake CPU (Static Shapes)",
)
# Generate Server GPU Static.
if args.gpu_iree_csv is not None:
html += generate_table(
args.gpu_iree_csv,
args.gpu_baseline_csv,
shark_csv=args.gpu_shark_csv,
shape_type="static",
device="cuda",
title="Server NVIDIA Tesla A100 GPU (Static Shapes)",
)
# Generate Server CPU Dynamic.
if args.cpu_iree_csv is not None:
html += generate_table(
args.cpu_iree_csv,
args.cpu_baseline_csv,
shark_csv=args.cpu_shark_csv,
shape_type="dynamic",
device="cpu",
title="Server Intel Ice Lake CPU (Dynamic Shapes)",
)
# Generate Server GPU Dynamic.
if args.gpu_iree_csv is not None:
html += generate_table(
args.gpu_iree_csv,
args.gpu_baseline_csv,
shark_csv=args.gpu_shark_csv,
shape_type="dynamic",
device="cuda",
title="Server NVIDIA Tesla A100 GPU (Dynamic Shapes)",
)
args.output_path.write_text(html)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--cpu_shark_csv",
type=str,
default=None,
help="The path to the csv file with CPU benchmarking results from the "
"SHARK runtime.",
)
parser.add_argument(
"--cpu_iree_csv",
type=str,
default=None,
help="The path to the csv file with CPU benchmarking results from IREE.",
)
parser.add_argument(
"--cpu_baseline_csv",
type=str,
default="data/icelake_baseline_2022-09-19.csv",
help="The path to the csv file containing baseline CPU results.",
)
parser.add_argument(
"--gpu_shark_csv",
type=str,
default=None,
help="The path to the csv file with GPU benchmarking results from the "
"SHARK runtime.",
)
parser.add_argument(
"--gpu_iree_csv",
type=str,
default=None,
help="The path to the csv file with CPU benchmarking results from IREE.",
)
parser.add_argument(
"--gpu_baseline_csv",
type=str,
default="data/a100_baseline_2022-09-19.csv",
help="The path to the csv file containing baseline GPU results.",
)
parser.add_argument(
"--version_info",
type=pathlib.Path,
default=None,
help="The path to a text file containing version information of the frameworks tested.",
)
parser.add_argument(
"--output_path",
type=pathlib.Path,
default="/tmp/summary.html",
help="The path to the output html file that summarizes results.",
)
return parser.parse_args()
if __name__ == "__main__":
main(parse_args())