|  | # Copyright 2022 The IREE Authors | 
|  | # | 
|  | # Licensed under the Apache License v2.0 with LLVM Exceptions. | 
|  | # See https://llvm.org/LICENSE.txt for license information. | 
|  | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | """ Runs benchmarks and saves results to a .csv file | 
|  |  | 
|  | Expects a directory structure of: | 
|  | <root-benchmark-dir>/ | 
|  | └── ./benchmark_model (TFLite benchmark binary) | 
|  | ./iree-benchmark-module (IREE benchmark binary) | 
|  | ├── setup/ | 
|  | ├── set_adreno_gpu_scaling_policy.sh | 
|  | ├── set_android_scaling_governor.sh | 
|  | └── set_pixel6_gpu_scaling_policy.sh | 
|  | ├── test_data/ | 
|  | └── models/ | 
|  | ├── tflite/*.tflite | 
|  | └── iree/ | 
|  | └── <target>/*.vmfb e.g. llvm-cpu, vulkan, cuda. | 
|  |  | 
|  | """ | 
|  |  | 
|  | import argparse | 
|  | import os | 
|  |  | 
|  | from common.benchmark_runner import * | 
|  | from common.utils import * | 
|  | from mobilebert_fp32_commands import * | 
|  | from mobilebert_int8_commands import * | 
|  | from simple_commands import * | 
|  |  | 
|  |  | 
|  | def benchmark_desktop_cpu( | 
|  | device_name: str, | 
|  | command_factories: list[BenchmarkCommandFactory], | 
|  | results_path: str, | 
|  | ): | 
|  | benchmarks = [] | 
|  | for factory in command_factories: | 
|  | benchmarks.extend(factory.generate_benchmark_commands("desktop", "cpu")) | 
|  |  | 
|  | for num_threads in [1, 2, 4, 8]: | 
|  | for benchmark in benchmarks: | 
|  | results_array = [ | 
|  | device_name, | 
|  | benchmark.model_name, | 
|  | benchmark.runtime, | 
|  | benchmark.driver, | 
|  | num_threads, | 
|  | ] | 
|  | benchmark.num_threads = num_threads | 
|  | results_array.extend(run_command(benchmark)) | 
|  | write_benchmark_result(results_array, results_path) | 
|  |  | 
|  |  | 
|  | def benchmark_desktop_gpu( | 
|  | device_name: str, | 
|  | command_factories: list[BenchmarkCommandFactory], | 
|  | results_path: str, | 
|  | ): | 
|  | benchmarks = [] | 
|  | for factory in command_factories: | 
|  | benchmarks.extend(factory.generate_benchmark_commands("desktop", "gpu")) | 
|  | for benchmark in benchmarks: | 
|  | results_array = [ | 
|  | device_name, | 
|  | benchmark.model_name, | 
|  | benchmark.runtime, | 
|  | benchmark.driver, | 
|  | benchmark.num_threads, | 
|  | ] | 
|  | results_array.extend(run_command(benchmark)) | 
|  | write_benchmark_result(results_array, results_path) | 
|  |  | 
|  |  | 
|  | def benchmark_mobile_cpu( | 
|  | device_name: str, | 
|  | command_factories: list[BenchmarkCommandFactory], | 
|  | results_path: str, | 
|  | ): | 
|  | benchmarks = [] | 
|  | for factory in command_factories: | 
|  | benchmarks.extend(factory.generate_benchmark_commands("mobile", "cpu")) | 
|  |  | 
|  | for _, tuple in enumerate([("80", 1), ("C0", 2), ("F0", 4), ("0F", 4), ("FF", 8)]): | 
|  | taskset = tuple[0] | 
|  | num_threads = tuple[1] | 
|  | for benchmark in benchmarks: | 
|  | results_array = [ | 
|  | device_name, | 
|  | benchmark.model_name, | 
|  | benchmark.runtime, | 
|  | benchmark.driver, | 
|  | taskset, | 
|  | num_threads, | 
|  | ] | 
|  | benchmark.taskset = taskset | 
|  | benchmark.num_threads = num_threads | 
|  | results_array.extend(run_command(benchmark)) | 
|  | write_benchmark_result(results_array, results_path) | 
|  |  | 
|  |  | 
|  | def benchmark_mobile_gpu( | 
|  | device_name: str, | 
|  | command_factories: list[BenchmarkCommandFactory], | 
|  | results_path: str, | 
|  | ): | 
|  | benchmarks = [] | 
|  | for factory in command_factories: | 
|  | benchmarks.extend(factory.generate_benchmark_commands("mobile", "gpu")) | 
|  |  | 
|  | taskset = "80" | 
|  | num_threads = 1 | 
|  | for benchmark in benchmarks: | 
|  | results_array = [ | 
|  | device_name, | 
|  | benchmark.model_name, | 
|  | benchmark.runtime, | 
|  | benchmark.driver, | 
|  | taskset, | 
|  | num_threads, | 
|  | ] | 
|  | benchmark.taskset = taskset | 
|  | benchmark.num_threads = num_threads | 
|  | results_array.extend(run_command(benchmark)) | 
|  | write_benchmark_result(results_array, results_path) | 
|  |  | 
|  |  | 
|  | def main(args): | 
|  | # Create factories for all models to be benchmarked. | 
|  | command_factory = [] | 
|  | command_factory.append( | 
|  | MobilebertFP32CommandFactory(args.base_dir, "mobilebert_float_384_gpu") | 
|  | ) | 
|  | command_factory.append(MobilebertInt8CommandFactory(args.base_dir)) | 
|  | command_factory.append( | 
|  | MobilebertFP32CommandFactory(args.base_dir, "albert_lite_base_squadv1_1") | 
|  | ) | 
|  | command_factory.append( | 
|  | SimpleCommandFactory(args.base_dir, "mobilenet_v2_1.0_224", "1x224x224x3xf32") | 
|  | ) | 
|  | command_factory.append( | 
|  | SimpleCommandFactory( | 
|  | args.base_dir, "mobilenet_v2_224_1.0_uint8", "1x224x224x3xui8" | 
|  | ) | 
|  | ) | 
|  | command_factory.append( | 
|  | SimpleCommandFactory(args.base_dir, "deeplabv3", "1x257x257x3xf32") | 
|  | ) | 
|  | command_factory.append( | 
|  | SimpleCommandFactory(args.base_dir, "person_detect", "1x96x96x1xi8") | 
|  | ) | 
|  | command_factory.append( | 
|  | SimpleCommandFactory( | 
|  | args.base_dir, "ssd_mobilenet_v2_static_1.0_int8", "1x320x320x3xi8" | 
|  | ) | 
|  | ) | 
|  | command_factory.append( | 
|  | SimpleCommandFactory( | 
|  | args.base_dir, "resnet_v2_101_1_default_1", "1x299x299x3xf32" | 
|  | ) | 
|  | ) | 
|  | command_factory.append( | 
|  | SimpleCommandFactory( | 
|  | args.base_dir, "ssd_mobilenet_v2_fpnlite_uint8", "1x320x320x3xui8" | 
|  | ) | 
|  | ) | 
|  | command_factory.append( | 
|  | SimpleCommandFactory( | 
|  | args.base_dir, "ssd_mobilenet_v2_fpnlite_fp32", "1x320x320x3xf32" | 
|  | ) | 
|  | ) | 
|  | command_factory.append( | 
|  | SimpleCommandFactory( | 
|  | args.base_dir, "efficientnet_lite0_int8_2", "1x224x224x3xui8" | 
|  | ) | 
|  | ) | 
|  | command_factory.append( | 
|  | SimpleCommandFactory( | 
|  | args.base_dir, "efficientnet_lite0_fp32_2", "1x224x224x3xf32" | 
|  | ) | 
|  | ) | 
|  | command_factory.append( | 
|  | SimpleCommandFactory(args.base_dir, "inception_v4_299_uint8", "1x299x299x3xui8") | 
|  | ) | 
|  | command_factory.append( | 
|  | SimpleCommandFactory(args.base_dir, "inception_v4_299_fp32", "1x299x299x3xf32") | 
|  | ) | 
|  |  | 
|  | if args.mode == "desktop": | 
|  | results_path = os.path.join(args.output_dir, "results.csv") | 
|  | with open(results_path, "w") as f: | 
|  | f.write( | 
|  | "device,model,runtime,driver/delegate,threads,latency (ms),vmhwm (KB),vmrss (KB),rssfile (KB)\n" | 
|  | ) | 
|  |  | 
|  | if not args.disable_cpu: | 
|  | benchmark_desktop_cpu(args.device_name, command_factory, results_path) | 
|  | if not args.disable_gpu: | 
|  | benchmark_desktop_gpu(args.device_name, command_factory, results_path) | 
|  | else: | 
|  | assert args.mode == "mobile" | 
|  | results_path = os.path.join(args.output_dir, "results.csv") | 
|  | with open(results_path, "w") as f: | 
|  | f.write( | 
|  | "device,model,runtime,driver/delegate,taskset,threads,latency (ms),vmhwm (KB),vmrss (KB),rssfile (KB)\n" | 
|  | ) | 
|  | if not args.disable_cpu: | 
|  | benchmark_mobile_cpu(args.device_name, command_factory, results_path) | 
|  | if not args.disable_gpu: | 
|  | benchmark_mobile_gpu(args.device_name, command_factory, results_path) | 
|  |  | 
|  |  | 
|  | def parse_args(): | 
|  | parser = argparse.ArgumentParser() | 
|  | parser.add_argument( | 
|  | "--device_name", | 
|  | type=str, | 
|  | default=None, | 
|  | help="The name of the device the benchmark is running on e.g. Pixel 6", | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--base_dir", | 
|  | type=str, | 
|  | default=None, | 
|  | help="The directory where all benchmarking artifacts are located.", | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--output_dir", | 
|  | type=str, | 
|  | default=None, | 
|  | help="The directory to save output artifacts into.", | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--mode", | 
|  | type=str, | 
|  | choices=("desktop", "mobile"), | 
|  | default="desktop", | 
|  | help="The benchmarking mode to use. If mode is `mobile`, uses tasksets.", | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--disable_cpu", action="store_true", help="Disables running benchmarks on CPU." | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--disable_gpu", action="store_true", help="Disables running benchmarks on GPU." | 
|  | ) | 
|  | return parser.parse_args() | 
|  |  | 
|  |  | 
|  | if __name__ == "__main__": | 
|  | main(parse_args()) |