| # Copyright 2022 The IREE Authors |
| # |
| # Licensed under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| """ Runs benchmarks and saves results to a .csv file |
| |
| Expects a directory structure of: |
| <root-benchmark-dir>/ |
| └── ./benchmark_model (TFLite benchmark binary) |
| ./iree-benchmark-module (IREE benchmark binary) |
| ├── setup/ |
| ├── set_adreno_gpu_scaling_policy.sh |
| ├── set_android_scaling_governor.sh |
| └── set_pixel6_gpu_scaling_policy.sh |
| ├── test_data/ |
| └── models/ |
| ├── tflite/*.tflite |
| └── iree/ |
| └── <target>/*.vmfb e.g. llvm-cpu, vulkan, cuda. |
| |
| """ |
| |
| import argparse |
| import os |
| |
| from common.benchmark_runner import * |
| from common.utils import * |
| from mobilebert_fp32_commands import * |
| from mobilebert_int8_commands import * |
| from simple_commands import * |
| |
| |
| def benchmark_desktop_cpu(device_name: str, |
| command_factories: list[BenchmarkCommandFactory], |
| results_path: str): |
| benchmarks = [] |
| for factory in command_factories: |
| benchmarks.extend(factory.generate_benchmark_commands("desktop", "cpu")) |
| |
| for num_threads in [1, 2, 4, 8]: |
| for benchmark in benchmarks: |
| results_array = [ |
| device_name, benchmark.model_name, benchmark.runtime, |
| benchmark.driver, num_threads |
| ] |
| benchmark.num_threads = num_threads |
| results_array.extend(run_command(benchmark)) |
| write_benchmark_result(results_array, results_path) |
| |
| |
| def benchmark_desktop_gpu(device_name: str, |
| command_factories: list[BenchmarkCommandFactory], |
| results_path: str): |
| benchmarks = [] |
| for factory in command_factories: |
| benchmarks.extend(factory.generate_benchmark_commands("desktop", "gpu")) |
| for benchmark in benchmarks: |
| results_array = [ |
| device_name, benchmark.model_name, benchmark.runtime, benchmark.driver, |
| benchmark.num_threads |
| ] |
| results_array.extend(run_command(benchmark)) |
| write_benchmark_result(results_array, results_path) |
| |
| |
| def benchmark_mobile_cpu(device_name: str, |
| command_factories: list[BenchmarkCommandFactory], |
| results_path: str): |
| benchmarks = [] |
| for factory in command_factories: |
| benchmarks.extend(factory.generate_benchmark_commands("mobile", "cpu")) |
| |
| for _, tuple in enumerate([("80", 1), ("C0", 2), ("F0", 4), ("0F", 4), |
| ("FF", 8)]): |
| taskset = tuple[0] |
| num_threads = tuple[1] |
| for benchmark in benchmarks: |
| results_array = [ |
| device_name, benchmark.model_name, benchmark.runtime, |
| benchmark.driver, taskset, num_threads |
| ] |
| benchmark.taskset = taskset |
| benchmark.num_threads = num_threads |
| results_array.extend(run_command(benchmark)) |
| write_benchmark_result(results_array, results_path) |
| |
| |
| def benchmark_mobile_gpu(device_name: str, |
| command_factories: list[BenchmarkCommandFactory], |
| results_path: str): |
| benchmarks = [] |
| for factory in command_factories: |
| benchmarks.extend(factory.generate_benchmark_commands("mobile", "gpu")) |
| |
| taskset = "80" |
| num_threads = 1 |
| for benchmark in benchmarks: |
| results_array = [ |
| device_name, benchmark.model_name, benchmark.runtime, benchmark.driver, |
| taskset, num_threads |
| ] |
| benchmark.taskset = taskset |
| benchmark.num_threads = num_threads |
| results_array.extend(run_command(benchmark)) |
| write_benchmark_result(results_array, results_path) |
| |
| |
| def main(args): |
| # Create factories for all models to be benchmarked. |
| command_factory = [] |
| command_factory.append( |
| MobilebertFP32CommandFactory(args.base_dir, "mobilebert_float_384_gpu")) |
| command_factory.append(MobilebertInt8CommandFactory(args.base_dir)) |
| command_factory.append( |
| MobilebertFP32CommandFactory(args.base_dir, "albert_lite_base_squadv1_1")) |
| command_factory.append( |
| SimpleCommandFactory(args.base_dir, "mobilenet_v2_1.0_224", |
| "1x224x224x3xf32")) |
| command_factory.append( |
| SimpleCommandFactory(args.base_dir, "mobilenet_v2_224_1.0_uint8", |
| "1x224x224x3xui8")) |
| command_factory.append( |
| SimpleCommandFactory(args.base_dir, "deeplabv3", "1x257x257x3xf32")) |
| command_factory.append( |
| SimpleCommandFactory(args.base_dir, "person_detect", "1x96x96x1xi8")) |
| command_factory.append( |
| SimpleCommandFactory(args.base_dir, "ssd_mobilenet_v2_static_1.0_int8", |
| "1x320x320x3xi8")) |
| command_factory.append( |
| SimpleCommandFactory(args.base_dir, "resnet_v2_101_1_default_1", |
| "1x299x299x3xf32")) |
| command_factory.append( |
| SimpleCommandFactory(args.base_dir, "ssd_mobilenet_v2_fpnlite_uint8", |
| "1x320x320x3xui8")) |
| command_factory.append( |
| SimpleCommandFactory(args.base_dir, "ssd_mobilenet_v2_fpnlite_fp32", |
| "1x320x320x3xf32")) |
| command_factory.append( |
| SimpleCommandFactory(args.base_dir, "efficientnet_lite0_int8_2", |
| "1x224x224x3xui8")) |
| command_factory.append( |
| SimpleCommandFactory(args.base_dir, "efficientnet_lite0_fp32_2", |
| "1x224x224x3xf32")) |
| command_factory.append( |
| SimpleCommandFactory(args.base_dir, "inception_v4_299_uint8", |
| "1x299x299x3xui8")) |
| command_factory.append( |
| SimpleCommandFactory(args.base_dir, "inception_v4_299_fp32", |
| "1x299x299x3xf32")) |
| |
| if args.mode == "desktop": |
| results_path = os.path.join(args.output_dir, "results.csv") |
| with open(results_path, "w") as f: |
| f.write( |
| "device,model,runtime,driver/delegate,threads,latency (ms),vmhwm (KB),vmrss (KB),rssfile (KB)\n" |
| ) |
| |
| if not args.disable_cpu: |
| benchmark_desktop_cpu(args.device_name, command_factory, results_path) |
| if not args.disable_gpu: |
| benchmark_desktop_gpu(args.device_name, command_factory, results_path) |
| else: |
| assert (args.mode == "mobile") |
| results_path = os.path.join(args.output_dir, "results.csv") |
| with open(results_path, "w") as f: |
| f.write( |
| "device,model,runtime,driver/delegate,taskset,threads,latency (ms),vmhwm (KB),vmrss (KB),rssfile (KB)\n" |
| ) |
| if not args.disable_cpu: |
| benchmark_mobile_cpu(args.device_name, command_factory, results_path) |
| if not args.disable_gpu: |
| benchmark_mobile_gpu(args.device_name, command_factory, results_path) |
| |
| |
| def parse_args(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument( |
| "--device_name", |
| type=str, |
| default=None, |
| help="The name of the device the benchmark is running on e.g. Pixel 6") |
| parser.add_argument( |
| "--base_dir", |
| type=str, |
| default=None, |
| help="The directory where all benchmarking artifacts are located.") |
| parser.add_argument("--output_dir", |
| type=str, |
| default=None, |
| help="The directory to save output artifacts into.") |
| parser.add_argument( |
| "--mode", |
| type=str, |
| choices=("desktop", "mobile"), |
| default="desktop", |
| help="The benchmarking mode to use. If mode is `mobile`, uses tasksets.") |
| parser.add_argument("--disable_cpu", |
| action="store_true", |
| help="Disables running benchmarks on CPU.") |
| parser.add_argument("--disable_gpu", |
| action="store_true", |
| help="Disables running benchmarks on GPU.") |
| return parser.parse_args() |
| |
| |
| if __name__ == '__main__': |
| main(parse_args()) |