| # Copyright 2022 The IREE Authors |
| # |
| # Licensed under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| r"""Script to generate a HTML summary comparing IREE and TFLite latencies and memory usage. |
| |
| Example usage: |
| |
| python parse_tflite_benchmarks.py \ |
| --iree_version=20220924.276 \ |
| --tflite_version=20220924.162 \ |
| --platform=server \ |
| --input_csv=server_results.csv \ |
| --output_path=/tmp/server_summary.html |
| |
| """ |
| |
| import argparse |
| import pandas as pd |
| import pathlib |
| import sys |
| |
| from datetime import date |
| |
| # Add build_tools python dir to the search path. |
| sys.path.insert(0, str(pathlib.Path(__file__).parent / ".." / ".." / "python")) |
| from reporting.common import html_utils |
| |
| # Supported platforms. |
| _PLATFORM_SERVER = "server" |
| _PLATFORM_MOBILE = "mobile" |
| |
| # A map of model name to data type. |
| _MODEL_TO_DATA_TYPE = { |
| "albert_lite_base_squadv1_1": "fp32", |
| "albert_lite_base_squadv1_1_fp16": "fp16", |
| "deeplabv3": "fp32", |
| "deeplabv3_fp16": "fp16", |
| "efficientnet_lite0_fp32_2": "fp32", |
| "efficientnet_lite0_fp32_2_fp16": "fp16", |
| "efficientnet_lite0_int8_2": "int8", |
| "inception_v4_299_fp32": "fp32", |
| "inception_v4_299_fp32_fp16": "fp16", |
| "inception_v4_299_uint8": "uint8", |
| "mobilebert-baseline-tf2-quant": "int8", |
| "mobilebert_float_384_gpu": "fp32", |
| "mobilebert_float_384_gpu_fp16": "fp16", |
| "mobilenet_v2_1.0_224": "fp32", |
| "mobilenet_v2_1.0_224_fp16": "fp16", |
| "mobilenet_v2_224_1.0_uint8": "uint8", |
| "person_detect": "int8", |
| "resnet_v2_101_1_default_1": "fp32", |
| "resnet_v2_101_1_default_1_fp16": "fp16", |
| "ssd_mobilenet_v2_static_1.0_int8": "int8", |
| "ssd_mobilenet_v2_fpnlite_fp32": "fp32", |
| "ssd_mobilenet_v2_fpnlite_fp32_fp16": "fp16", |
| "ssd_mobilenet_v2_fpnlite_uint8": "uint8", |
| } |
| |
| # Column headers. |
| _MODEL = "model" |
| _DATA_TYPE = "data type" |
| _RUNTIME = "runtime" |
| _LATENCY = "latency (ms)" |
| _TASKSET = "taskset" |
| _MEMORY = "vmhwm (KB)" |
| _THREADS = "threads" |
| _CONFIG = "config" |
| _DRIVER = "driver/delegate" |
| _TFLITE_CONFIG = "TFLite config" |
| _IREE_CONFIG = "IREE config" |
| _IREE_LATENCY = "IREE latency (ms)" |
| _TFLITE_LATENCY = "TFLite latency (ms)" |
| _IREE_MEMORY = "IREE vmhwm (kb)" |
| _TFLITE_MEMORY = "TFLite vmhwm (kb)" |
| |
| _IREE_VS_TFLITE_LATENCY = "IREE vs TFLite latency" |
| _IREE_VS_TFLITE_MEMORY = "IREE vs TFLite memory" |
| |
| _PERF_COLUMNS = [_IREE_VS_TFLITE_LATENCY, _IREE_VS_TFLITE_MEMORY] |
| _NUMBER_COLUMNS = [_IREE_LATENCY, _TFLITE_LATENCY, _IREE_MEMORY, _TFLITE_MEMORY] |
| _CONFIG_COLUMNS = [_TFLITE_CONFIG, _IREE_CONFIG] |
| |
| |
| def get_tflite_model_list(df): |
| """Retrieves the list of TFLite models, filtering out duplicates. |
| |
| The .csv file includes multiple entries of the same model but under a |
| different configuration (e.g. XNNPack enabled, XNNPack disabled). |
| """ |
| df = df.loc[df.runtime == "tflite"] |
| # Remove rows where the model name ends with `noxnn` since this is a duplicate. |
| df = df[~df.model.str.endswith("noxnn")] |
| return df.model.unique() |
| |
| |
| def get_fastest_result(model, df): |
| """Retrieves the lowest latency result from multiple configurations. |
| |
| Benchmarks are run under different configurations (e.g. number of threads, |
| Big core, LITTLE core, etc). This method retrieves the fastest configuration |
| whilst ensuring apples to apples comparisons (e.g. FP16 results are not |
| considered when the model is FP32). |
| |
| Args: |
| model: The model name. |
| df: The dataframe to filter through. |
| |
| Returns: |
| A dataframe containing the lowest latency. |
| """ |
| df = df[df.model.str.startswith(model)] |
| if not model.endswith("fp16"): |
| df = df[~df[_MODEL].str.endswith("fp16")] |
| df = df[df[_LATENCY] != 0] |
| df = df[df[_LATENCY] == df[_LATENCY].min()] |
| return df.head(1) |
| |
| |
| def get_tflite_config(model, df): |
| """Generates a configuration string from TFLite config variables.""" |
| config = [] |
| if _TASKSET in df.columns: |
| taskset = df.taskset.iloc[0] |
| config.append(f"taskset {taskset}") |
| threads = df.threads.iloc[0] |
| config.append(f"{threads} threads" if threads > 1 else f"{threads} thread") |
| config.append("no xnnpack" if model.endswith("noxnn") else "xnnpack") |
| return ", ".join(config) |
| |
| |
| def generate_tflite_summary(dataframe): |
| """Generates a dataframe containing the fastest TFLite result for each model.""" |
| summary = pd.DataFrame(columns=[_MODEL, _LATENCY, _MEMORY, _CONFIG]) |
| tflite_df = dataframe[dataframe.runtime == "tflite"] |
| model_list = get_tflite_model_list(dataframe) |
| for model in model_list: |
| df = get_fastest_result(model, tflite_df) |
| if df.empty: |
| print(f"Warning: TFLite results invalid for {model}.") |
| continue |
| latency = df[_LATENCY].iloc[0] |
| full_model_name = df.model.iloc[0] |
| memory = df[_MEMORY].iloc[0] |
| config = get_tflite_config(full_model_name, df) |
| summary.loc[len(summary)] = [model, latency, memory, config] |
| return summary |
| |
| |
| def get_iree_model_list(df): |
| """Retrieves the list of IREE models, filtering out duplicates. |
| |
| The .csv file includes multiple entries of the same model but under a |
| different configuration (e.g. mmt4d). |
| """ |
| df = df.loc[df.runtime == "iree"] |
| df = df[~df.model.str.endswith("mmt4d")] |
| df = df[~df.model.str.endswith("padfuse")] |
| return df.model.unique() |
| |
| |
| def get_iree_config(model, df): |
| """Generates a configuration string from IREE config variables. |
| |
| The configuration is embedded in the model name. |
| """ |
| config = [] |
| if _TASKSET in df.columns: |
| taskset = df.taskset.iloc[0] |
| config.append(f"taskset {taskset}") |
| threads = df.threads.iloc[0] |
| config.append(f"{threads} threads" if threads > 1 else f"{threads} thread") |
| if model.endswith("im2col_mmt4d"): |
| config.append("im2col") |
| config.append("mmt4d") |
| elif model.endswith("mmt4d"): |
| config.append("mmt4d") |
| elif model.endswith("padfuse"): |
| config.append("fused pad") |
| return ", ".join(config) |
| |
| |
| def generate_iree_summary(dataframe): |
| """Generates a dataframe containing the fastest IREE result for each model.""" |
| summary = pd.DataFrame(columns=[_MODEL, _LATENCY, _MEMORY, _CONFIG]) |
| iree_df = dataframe[dataframe.runtime == "iree"] |
| model_list = get_iree_model_list(dataframe) |
| for model in model_list: |
| df = get_fastest_result(model, iree_df) |
| if df.empty: |
| print(f"Warning: IREE results invalid for {model}.") |
| continue |
| latency = df[_LATENCY].iloc[0] |
| full_model_name = df.model.iloc[0] |
| memory = df[_MEMORY].iloc[0] |
| config = get_iree_config(full_model_name, df) |
| summary.loc[len(summary)] = [model, latency, memory, config] |
| return summary |
| |
| |
| def get_common_html_style(df, title): |
| """Returns HTML style attributes common to both server and mobile.""" |
| st = df.style.set_table_styles(html_utils.get_table_css()) |
| st = st.hide(axis="index") |
| st = st.set_caption(title) |
| st = st.set_properties( |
| subset=[_MODEL], |
| **{ |
| "width": "300px", |
| "text-align": "left", |
| }, |
| ) |
| st = st.set_properties( |
| subset=[_DATA_TYPE], |
| **{ |
| "width": "100", |
| "text-align": "center", |
| }, |
| ) |
| st = st.set_properties( |
| subset=_NUMBER_COLUMNS, |
| **{ |
| "width": "100", |
| "text-align": "right", |
| }, |
| ) |
| st = st.set_properties( |
| subset=_PERF_COLUMNS, |
| **{"width": "150px", "text-align": "right", "color": "#ffffff"}, |
| ) |
| st = st.applymap(html_utils.style_latency, subset=[_IREE_VS_TFLITE_LATENCY]) |
| st = st.applymap(html_utils.style_memory, subset=[_IREE_VS_TFLITE_MEMORY]) |
| return st |
| |
| |
| def generate_summary(dataframe, title): |
| """Generates a table comparing latencies and memory usage between IREE and TFLite. |
| |
| For each model, retrieves the lowest latency configuration from both IREE and TFLite. |
| |
| Args: |
| dataframe: The raw data to summarize. |
| title: The title of the table. |
| |
| Returns: |
| An HTML string containing the summarized report. |
| """ |
| summary = pd.DataFrame( |
| columns=[ |
| _MODEL, |
| _DATA_TYPE, |
| _TFLITE_CONFIG, |
| _IREE_CONFIG, |
| _TFLITE_LATENCY, |
| _IREE_LATENCY, |
| _IREE_VS_TFLITE_LATENCY, |
| _TFLITE_MEMORY, |
| _IREE_MEMORY, |
| _IREE_VS_TFLITE_MEMORY, |
| ] |
| ) |
| |
| tflite_df = generate_tflite_summary(dataframe) |
| iree_df = generate_iree_summary(dataframe) |
| model_list = tflite_df[_MODEL].unique() |
| |
| for model in model_list: |
| tflite_results = tflite_df[tflite_df.model == model] |
| iree_results = iree_df[iree_df.model == model] |
| |
| if tflite_results.empty: |
| print(f"Warning: No TFLite results found for model {model}") |
| continue |
| if iree_results.empty: |
| print(f"Warning: No IREE results found for model {model}") |
| continue |
| |
| iree_latency = iree_results[_LATENCY].iloc[0] |
| tflite_latency = tflite_results[_LATENCY].iloc[0] |
| latency_comparison = html_utils.format_latency_comparison( |
| iree_latency, tflite_latency |
| ) |
| |
| iree_memory = iree_results[_MEMORY].iloc[0] |
| tflite_memory = tflite_results[_MEMORY].iloc[0] |
| memory_comparison = html_utils.format_memory_comparison( |
| iree_memory, tflite_memory |
| ) |
| |
| iree_config = iree_results.config.iloc[0] |
| tflite_config = tflite_results.config.iloc[0] |
| summary.loc[len(summary)] = [ |
| model, |
| _MODEL_TO_DATA_TYPE[model], |
| tflite_config, |
| iree_config, |
| f"{tflite_latency:.1f}", |
| f"{iree_latency:.1f}", |
| latency_comparison, |
| f"{tflite_memory:,.0f}", |
| f"{iree_memory:,.0f}", |
| memory_comparison, |
| ] |
| |
| summary = summary.round(2) |
| st = get_common_html_style(summary, title) |
| st = st.set_properties( |
| subset=_CONFIG_COLUMNS, |
| **{ |
| "width": "300px", |
| "text-align": "left", |
| }, |
| ) |
| return st.to_html().replace("\\n", "<br>") + "<br/>" |
| |
| |
| def generate_detail(dataframe, title, platform): |
| """Generates a table comparing latencies and memory usage between IREE and TFLite. |
| |
| The table generated is more detailed than `generate_summary`. It lists latencies |
| of all IREE configurations, using the fastest TFLite configuration as baseline. |
| |
| Args: |
| dataframe: The raw data to summarize. |
| title: The title of the table. |
| platform: Either `server` or `mobile`. |
| |
| Returns: |
| An HTML string containing the detailed report. |
| """ |
| summary = pd.DataFrame( |
| columns=[ |
| _MODEL, |
| _DATA_TYPE, |
| _TFLITE_CONFIG, |
| _IREE_CONFIG, |
| _TASKSET, |
| _THREADS, |
| _TFLITE_LATENCY, |
| _IREE_LATENCY, |
| _IREE_VS_TFLITE_LATENCY, |
| _TFLITE_MEMORY, |
| _IREE_MEMORY, |
| _IREE_VS_TFLITE_MEMORY, |
| ] |
| ) |
| |
| model_list = get_tflite_model_list(dataframe) |
| for model in model_list: |
| df = dataframe[dataframe.model.str.startswith(model)] |
| # If result does not use FP16, remove FP16 results from dataframe to |
| # maintain apples-to-apples comparisons. |
| if not model.endswith("fp16"): |
| df = df[~df.model.str.endswith("fp16")] |
| |
| if _TASKSET in df.columns: |
| tasksets = df.taskset.unique() |
| else: |
| tasksets = ["none"] |
| |
| for taskset in tasksets: |
| per_taskset_df = df if taskset == "none" else df[df.taskset == taskset] |
| threads = per_taskset_df.threads.unique() |
| for thread in threads: |
| per_thread_df = per_taskset_df[per_taskset_df.threads == thread] |
| tflite_df = get_fastest_result( |
| model, per_thread_df[per_thread_df.runtime == "tflite"] |
| ) |
| if tflite_df.empty: |
| continue |
| |
| tflite_latency = tflite_df[_LATENCY].iloc[0] |
| tflite_memory = tflite_df[_MEMORY].iloc[0] |
| if tflite_latency == 0 or tflite_memory == 0: |
| continue |
| |
| full_model_name = tflite_df.model.iloc[0] |
| # For TFLite config, we only want to know if XNNPack was used. The other |
| # configuration settings are covered in other columns. |
| tflite_config = ( |
| "no xnnpack" if full_model_name.endswith("noxnn") else "xnnpack" |
| ) |
| |
| iree_df = per_thread_df[per_thread_df.runtime == "iree"] |
| for _, row in iree_df.iterrows(): |
| iree_config = row[_DRIVER] |
| model_name = row[_MODEL] |
| if model_name.endswith("im2col_mmt4d"): |
| iree_config += ", im2col, mmt4d" |
| elif model_name.endswith("mmt4d"): |
| iree_config += ", mmt4d" |
| elif model_name.endswith("padfuse"): |
| iree_config += ", fused pad" |
| |
| iree_latency = row[_LATENCY] |
| latency_comparison = html_utils.format_latency_comparison( |
| iree_latency, tflite_latency |
| ) |
| iree_memory = row[_MEMORY] |
| memory_comparison = html_utils.format_memory_comparison( |
| iree_memory, tflite_memory |
| ) |
| |
| if iree_latency == 0 or iree_memory == 0: |
| continue |
| |
| summary.loc[len(summary)] = [ |
| model, |
| _MODEL_TO_DATA_TYPE[model], |
| tflite_config, |
| iree_config, |
| taskset, |
| thread, |
| f"{tflite_latency:.1f}", |
| f"{iree_latency:.1f}", |
| latency_comparison, |
| f"{tflite_memory:,.0f}", |
| f"{iree_memory:,.0f}", |
| memory_comparison, |
| ] |
| |
| summary = summary.round(2) |
| st = get_common_html_style(summary, title) |
| st = st.set_properties( |
| subset=[_TASKSET, _THREADS], |
| **{ |
| "width": "100", |
| "text-align": "center", |
| }, |
| ) |
| st = st.set_properties( |
| subset=[_TFLITE_CONFIG], |
| **{ |
| "width": "150px", |
| "text-align": "left", |
| }, |
| ) |
| st = st.set_properties( |
| subset=[_IREE_CONFIG], |
| **{ |
| "width": "300px", |
| "text-align": "left", |
| }, |
| ) |
| if platform != "mobile": |
| st.hide_columns(subset=[_TASKSET]) |
| |
| return st.to_html().replace("\\n", "<br>") + "<br/>" |
| |
| |
| def main(args): |
| """Summarizes IREE vs TFLite benchmark results.""" |
| if args.platform == _PLATFORM_SERVER: |
| cpu_drivers = ["cpu", "local-task"] |
| gpu_drivers = ["gpu", "cuda"] |
| else: |
| cpu_drivers = ["cpu", "local-task"] |
| gpu_drivers = ["gpu", "vulkan", "adreno"] |
| |
| version_html = ( |
| f"<i>IREE version: {args.iree_version}</i><br/>" |
| f"<i>TFlite version: {args.tflite_version}</i><br/>" |
| f"<i>last updated: {date.today().isoformat()}</i><br/><br/>" |
| ) |
| html = html_utils.generate_header_and_legend(version_html) |
| |
| df = pd.read_csv(args.input_csv) |
| |
| # Generate CPU Summary. |
| results = df[df[_DRIVER].isin(cpu_drivers)] |
| html += generate_summary(results, args.platform.capitalize() + " CPU Summary") |
| |
| # Generate GPU Summary. |
| results = df[df[_DRIVER].isin(gpu_drivers)] |
| html += generate_summary(results, args.platform.capitalize() + " GPU Summary") |
| |
| # Generate CPU Detailed View. |
| results = df[df[_DRIVER].isin(cpu_drivers)] |
| html += generate_detail( |
| results, args.platform.capitalize() + " CPU Detailed", args.platform |
| ) |
| |
| # Generate GPU Detailed View. |
| results = df[df[_DRIVER].isin(gpu_drivers)] |
| html += generate_detail( |
| results, args.platform.capitalize() + " GPU Detailed", args.platform |
| ) |
| |
| args.output_path.write_text(html) |
| |
| |
| def parse_args(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument( |
| "--iree_version", |
| type=str, |
| default=None, |
| required=True, |
| help="The IREE version.", |
| ) |
| parser.add_argument( |
| "--tflite_version", |
| type=str, |
| default=None, |
| required=True, |
| help="The TFLite version.", |
| ) |
| parser.add_argument( |
| "--platform", |
| action="store", |
| type=str.lower, |
| help="The platform the models were benchmarked on. Either server or mobile.", |
| required=True, |
| choices=[_PLATFORM_SERVER, _PLATFORM_MOBILE], |
| ) |
| parser.add_argument( |
| "--input_csv", |
| type=str, |
| default=None, |
| help="The path to the csv file containing benchmark results for both IREE and TFLite.", |
| ) |
| parser.add_argument( |
| "--output_path", |
| type=pathlib.Path, |
| default="/tmp/summary.html", |
| help="The path to the output html file that summarizes results.", |
| ) |
| return parser.parse_args() |
| |
| |
| if __name__ == "__main__": |
| main(parse_args()) |