| #!/usr/bin/env python3 |
| # Copyright 2022 The IREE Authors |
| # |
| # Licensed under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| """Collect compilation statistics from benchmark suites. |
| |
| See docs/developers/developing_iree/benchmark_suites.md for how to build the |
| benchmark suites. |
| """ |
| |
| import pathlib |
| import sys |
| |
| # Add build_tools python dir to the search path. |
| sys.path.insert(0, str(pathlib.Path(__file__).parent.with_name("python"))) |
| |
| import argparse |
| import json |
| import os |
| import re |
| import zipfile |
| |
| from dataclasses import asdict, dataclass |
| from typing import BinaryIO, Dict, List, Optional, TextIO |
| |
| from common import benchmark_definition |
| from common.benchmark_definition import CompilationInfo, CompilationResults, CompilationStatistics, ModuleComponentSizes, get_git_commit_hash |
| from common import benchmark_config |
| from e2e_test_artifacts import iree_artifacts |
| from e2e_test_framework import serialization |
| from e2e_test_framework.definitions import iree_definitions |
| |
| BENCHMARK_FLAGFILE = "flagfile" |
| MODULE_DIR = "vmfb" |
| MODULE_FILE_EXTENSION = "vmfb" |
| NINJA_LOG_HEADER = "ninja log v5" |
| NINJA_BUILD_LOG = ".ninja_log" |
| COMPILATION_STATS_MODULE_SUFFIX = "compile-stats" |
| E2E_TEST_ARTIFACTS_REL_PATH = "e2e_test_artifacts" |
| |
| VM_COMPONENT_NAME = "module.fb" |
| CONST_COMPONENT_NAME = "_const.bin" |
| DISPATCH_COMPONENT_PATTERNS = [ |
| r".+_embedded_elf_.+\.so", |
| r".+_vulkan_spirv_fb\.fb", |
| r".+_cuda_nvptx_fb\.fb", |
| r".+_vmvx_bytecode_fb\.fb", |
| ] |
| |
| |
| @dataclass(frozen=True) |
| class ModuleInfo(object): |
| module_path: pathlib.Path |
| stream_stats_path: pathlib.Path |
| |
| |
| def match_module_cmake_target(module_path: pathlib.PurePath) -> Optional[str]: |
| if module_path.match(f"{E2E_TEST_ARTIFACTS_REL_PATH}/iree_*/" |
| f"{iree_artifacts.MODULE_FILENAME}"): |
| # <e2e test artifacts dir>/iree_<module dir>/<module filename> |
| path_parts = module_path.parts[-3:] |
| # Join to get the CMake target name. This is *not* a filesystem path, so we |
| # don't want \ separators on Windows that we would get with os.path.join(). |
| return '/'.join(path_parts) |
| |
| return None |
| |
| |
| def parse_compilation_time_from_ninja_log(log: TextIO) -> Dict[str, int]: |
| """Retrieve the compilation time (ms) from the Ninja build log. |
| |
| Returns: |
| Map of target name and compilation time in ms. |
| """ |
| |
| target_build_time_map = {} |
| header = log.readline() |
| if NINJA_LOG_HEADER not in header: |
| raise NotImplementedError(f"Unsupported ninja log version: {header}") |
| |
| for line in log: |
| start_time, end_time, _, target, _ = line.strip().split("\t") |
| cmake_target = match_module_cmake_target(pathlib.PurePath(target)) |
| if cmake_target is None: |
| continue |
| |
| start_time = int(start_time) |
| end_time = int(end_time) |
| target_build_time_map[cmake_target] = end_time - start_time |
| |
| return target_build_time_map |
| |
| |
| def get_module_component_info(module: BinaryIO, |
| module_file_bytes: int) -> ModuleComponentSizes: |
| with zipfile.ZipFile(module) as module_zipfile: |
| size_map = dict( |
| (info.filename, info.file_size) for info in module_zipfile.infolist()) |
| |
| identified_names = set() |
| if VM_COMPONENT_NAME in size_map: |
| vm_component_bytes = size_map[VM_COMPONENT_NAME] |
| identified_names.add(VM_COMPONENT_NAME) |
| else: |
| vm_component_bytes = 0 |
| |
| if CONST_COMPONENT_NAME in size_map: |
| const_component_bytes = size_map[CONST_COMPONENT_NAME] |
| identified_names.add(CONST_COMPONENT_NAME) |
| else: |
| const_component_bytes = 0 |
| |
| total_dispatch_component_bytes = 0 |
| for filename, size in size_map.items(): |
| for pattern in DISPATCH_COMPONENT_PATTERNS: |
| if re.match(pattern, filename): |
| total_dispatch_component_bytes += size |
| identified_names.add(filename) |
| break |
| |
| if identified_names != set(size_map.keys()): |
| raise RuntimeError( |
| f"Unrecognized components in the module: {size_map.keys()}.") |
| |
| return ModuleComponentSizes( |
| file_bytes=module_file_bytes, |
| vm_component_bytes=vm_component_bytes, |
| const_component_bytes=const_component_bytes, |
| total_dispatch_component_bytes=total_dispatch_component_bytes) |
| |
| |
| def get_module_map_from_compilation_benchmark_config( |
| compilation_benchmark_config_data: TextIO, |
| e2e_test_artifacts_dir: pathlib.PurePath |
| ) -> Dict[CompilationInfo, ModuleInfo]: |
| benchmark_config = json.load(compilation_benchmark_config_data) |
| gen_configs = serialization.unpack_and_deserialize( |
| data=benchmark_config["generation_configs"], |
| root_type=List[iree_definitions.ModuleGenerationConfig]) |
| module_map = {} |
| for gen_config in gen_configs: |
| model = gen_config.imported_model.model |
| compile_config = gen_config.compile_config |
| target_archs = [] |
| for compile_target in compile_config.compile_targets: |
| arch = compile_target.target_architecture |
| target_archs.append( |
| (f"{arch.type.value}-{arch.architecture}-{arch.microarchitecture}-" |
| f"{compile_target.target_abi.value}")) |
| compilation_info = CompilationInfo( |
| name=gen_config.name, |
| model_name=model.name, |
| model_tags=tuple(model.tags), |
| model_source=model.source_type.value, |
| target_arch=f"[{','.join(target_archs)}]", |
| compile_tags=tuple(compile_config.tags), |
| gen_config_id=gen_config.composite_id) |
| module_dir_path = pathlib.Path( |
| iree_artifacts.get_module_dir_path(module_generation_config=gen_config, |
| root_path=e2e_test_artifacts_dir)) |
| module_path = module_dir_path / iree_artifacts.MODULE_FILENAME |
| stream_stats_path = (module_dir_path / |
| iree_artifacts.SCHEDULING_STATS_FILENAME) |
| module_map[compilation_info] = ModuleInfo( |
| module_path=module_path, stream_stats_path=stream_stats_path) |
| |
| return module_map |
| |
| |
| def _check_dir_path(path_str: str) -> pathlib.Path: |
| path = pathlib.Path(path_str) |
| if not path.is_dir(): |
| raise argparse.ArgumentTypeError(f"{path} is not a directory.") |
| return path |
| |
| |
| def _check_file_path(path_str: str) -> pathlib.Path: |
| path = pathlib.Path(path_str) |
| if not path.is_file(): |
| raise argparse.ArgumentTypeError(f"{path} is not a file.") |
| return path |
| |
| |
| def _parse_arguments(): |
| """Returns an argument parser with common options.""" |
| |
| parser = argparse.ArgumentParser( |
| description="Collect compilation statistics from benchmark suites.") |
| parser.add_argument( |
| "--compilation_benchmark_config", |
| type=_check_file_path, |
| required=True, |
| help="Exported compilation benchmark config of e2e test artifacts.") |
| parser.add_argument("--build_log", |
| type=_check_file_path, |
| required=True, |
| help="Path to the ninja build log.") |
| parser.add_argument("--e2e_test_artifacts_dir", |
| type=_check_dir_path, |
| required=True, |
| help="Path to the e2e test artifacts directory.") |
| parser.add_argument("--output", |
| type=pathlib.Path, |
| help="Path to output JSON file.") |
| |
| return parser.parse_args() |
| |
| |
| def main(args: argparse.Namespace): |
| config_data = args.compilation_benchmark_config.open("r") |
| module_map = get_module_map_from_compilation_benchmark_config( |
| compilation_benchmark_config_data=config_data, |
| e2e_test_artifacts_dir=args.e2e_test_artifacts_dir) |
| build_log_path = args.build_log |
| |
| with build_log_path.open("r") as log_file: |
| target_build_time_map = parse_compilation_time_from_ninja_log(log_file) |
| |
| compilation_statistics_list = [] |
| for compilation_info, module_info in module_map.items(): |
| module_path = module_info.module_path |
| with module_path.open("rb") as module_file: |
| module_component_sizes = get_module_component_info( |
| module_file, |
| module_path.stat().st_size) |
| |
| cmake_target = match_module_cmake_target(module_path) |
| if cmake_target is None: |
| raise RuntimeError( |
| f"Module path isn't a module cmake target: {module_path}") |
| compilation_time_ms = target_build_time_map[cmake_target] |
| |
| stream_stats_json = json.loads(module_info.stream_stats_path.read_text()) |
| exec_stats_json = stream_stats_json["stream-aggregate"]["execution"] |
| ir_stats = benchmark_definition.IRStatistics( |
| stream_dispatch_count=exec_stats_json["dispatch-count"]) |
| |
| compilation_statistics = CompilationStatistics( |
| compilation_info=compilation_info, |
| module_component_sizes=module_component_sizes, |
| compilation_time_ms=compilation_time_ms, |
| ir_stats=ir_stats) |
| compilation_statistics_list.append(compilation_statistics) |
| |
| commit = get_git_commit_hash("HEAD") |
| compilation_results = CompilationResults( |
| commit=commit, compilation_statistics=compilation_statistics_list) |
| |
| json_output = json.dumps(asdict(compilation_results), indent=2) |
| if args.output is None: |
| print(json_output) |
| else: |
| args.output.write_text(json_output) |
| |
| |
| if __name__ == "__main__": |
| main(_parse_arguments()) |