blob: 19c80e9471d7f24798132eecf62c3bbee250e050 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2022 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""Collect compilation statistics from benchmark suites.
See docs/developers/developing_iree/benchmark_suites.md for how to build the
benchmark suites.
"""
import pathlib
import sys
# Add build_tools python dir to the search path.
sys.path.insert(0, str(pathlib.Path(__file__).parent.with_name("python")))
import argparse
import json
import os
import re
import zipfile
from dataclasses import asdict, dataclass
from typing import BinaryIO, Dict, List, Optional, TextIO
from common import benchmark_definition
from common.benchmark_definition import CompilationInfo, CompilationResults, CompilationStatistics, ModuleComponentSizes, get_git_commit_hash
from common import benchmark_config
from e2e_test_artifacts import iree_artifacts
from e2e_test_framework import serialization
from e2e_test_framework.definitions import iree_definitions
BENCHMARK_FLAGFILE = "flagfile"
MODULE_DIR = "vmfb"
MODULE_FILE_EXTENSION = "vmfb"
NINJA_LOG_HEADER = "ninja log v5"
NINJA_BUILD_LOG = ".ninja_log"
COMPILATION_STATS_MODULE_SUFFIX = "compile-stats"
E2E_TEST_ARTIFACTS_REL_PATH = "e2e_test_artifacts"
VM_COMPONENT_NAME = "module.fb"
CONST_COMPONENT_NAME = "_const.bin"
DISPATCH_COMPONENT_PATTERNS = [
r".+_embedded_elf_.+\.so",
r".+_vulkan_spirv_fb\.fb",
r".+_cuda_nvptx_fb\.fb",
r".+_vmvx_bytecode_fb\.fb",
]
@dataclass(frozen=True)
class ModuleInfo(object):
module_path: pathlib.Path
stream_stats_path: pathlib.Path
def match_module_cmake_target(module_path: pathlib.PurePath) -> Optional[str]:
if module_path.match(f"{E2E_TEST_ARTIFACTS_REL_PATH}/iree_*/"
f"{iree_artifacts.MODULE_FILENAME}"):
# <e2e test artifacts dir>/iree_<module dir>/<module filename>
path_parts = module_path.parts[-3:]
# Join to get the CMake target name. This is *not* a filesystem path, so we
# don't want \ separators on Windows that we would get with os.path.join().
return '/'.join(path_parts)
return None
def parse_compilation_time_from_ninja_log(log: TextIO) -> Dict[str, int]:
"""Retrieve the compilation time (ms) from the Ninja build log.
Returns:
Map of target name and compilation time in ms.
"""
target_build_time_map = {}
header = log.readline()
if NINJA_LOG_HEADER not in header:
raise NotImplementedError(f"Unsupported ninja log version: {header}")
for line in log:
start_time, end_time, _, target, _ = line.strip().split("\t")
cmake_target = match_module_cmake_target(pathlib.PurePath(target))
if cmake_target is None:
continue
start_time = int(start_time)
end_time = int(end_time)
target_build_time_map[cmake_target] = end_time - start_time
return target_build_time_map
def get_module_component_info(module: BinaryIO,
module_file_bytes: int) -> ModuleComponentSizes:
with zipfile.ZipFile(module) as module_zipfile:
size_map = dict(
(info.filename, info.file_size) for info in module_zipfile.infolist())
identified_names = set()
if VM_COMPONENT_NAME in size_map:
vm_component_bytes = size_map[VM_COMPONENT_NAME]
identified_names.add(VM_COMPONENT_NAME)
else:
vm_component_bytes = 0
if CONST_COMPONENT_NAME in size_map:
const_component_bytes = size_map[CONST_COMPONENT_NAME]
identified_names.add(CONST_COMPONENT_NAME)
else:
const_component_bytes = 0
total_dispatch_component_bytes = 0
for filename, size in size_map.items():
for pattern in DISPATCH_COMPONENT_PATTERNS:
if re.match(pattern, filename):
total_dispatch_component_bytes += size
identified_names.add(filename)
break
if identified_names != set(size_map.keys()):
raise RuntimeError(
f"Unrecognized components in the module: {size_map.keys()}.")
return ModuleComponentSizes(
file_bytes=module_file_bytes,
vm_component_bytes=vm_component_bytes,
const_component_bytes=const_component_bytes,
total_dispatch_component_bytes=total_dispatch_component_bytes)
def get_module_map_from_compilation_benchmark_config(
compilation_benchmark_config_data: TextIO,
e2e_test_artifacts_dir: pathlib.PurePath
) -> Dict[CompilationInfo, ModuleInfo]:
benchmark_config = json.load(compilation_benchmark_config_data)
gen_configs = serialization.unpack_and_deserialize(
data=benchmark_config["generation_configs"],
root_type=List[iree_definitions.ModuleGenerationConfig])
module_map = {}
for gen_config in gen_configs:
model = gen_config.imported_model.model
compile_config = gen_config.compile_config
target_archs = []
for compile_target in compile_config.compile_targets:
arch = compile_target.target_architecture
target_archs.append(
(f"{arch.type.value}-{arch.architecture}-{arch.microarchitecture}-"
f"{compile_target.target_abi.value}"))
compilation_info = CompilationInfo(
name=gen_config.name,
model_name=model.name,
model_tags=tuple(model.tags),
model_source=model.source_type.value,
target_arch=f"[{','.join(target_archs)}]",
compile_tags=tuple(compile_config.tags),
gen_config_id=gen_config.composite_id)
module_dir_path = pathlib.Path(
iree_artifacts.get_module_dir_path(module_generation_config=gen_config,
root_path=e2e_test_artifacts_dir))
module_path = module_dir_path / iree_artifacts.MODULE_FILENAME
stream_stats_path = (module_dir_path /
iree_artifacts.SCHEDULING_STATS_FILENAME)
module_map[compilation_info] = ModuleInfo(
module_path=module_path, stream_stats_path=stream_stats_path)
return module_map
def _check_dir_path(path_str: str) -> pathlib.Path:
path = pathlib.Path(path_str)
if not path.is_dir():
raise argparse.ArgumentTypeError(f"{path} is not a directory.")
return path
def _check_file_path(path_str: str) -> pathlib.Path:
path = pathlib.Path(path_str)
if not path.is_file():
raise argparse.ArgumentTypeError(f"{path} is not a file.")
return path
def _parse_arguments():
"""Returns an argument parser with common options."""
parser = argparse.ArgumentParser(
description="Collect compilation statistics from benchmark suites.")
parser.add_argument(
"--compilation_benchmark_config",
type=_check_file_path,
required=True,
help="Exported compilation benchmark config of e2e test artifacts.")
parser.add_argument("--build_log",
type=_check_file_path,
required=True,
help="Path to the ninja build log.")
parser.add_argument("--e2e_test_artifacts_dir",
type=_check_dir_path,
required=True,
help="Path to the e2e test artifacts directory.")
parser.add_argument("--output",
type=pathlib.Path,
help="Path to output JSON file.")
return parser.parse_args()
def main(args: argparse.Namespace):
config_data = args.compilation_benchmark_config.open("r")
module_map = get_module_map_from_compilation_benchmark_config(
compilation_benchmark_config_data=config_data,
e2e_test_artifacts_dir=args.e2e_test_artifacts_dir)
build_log_path = args.build_log
with build_log_path.open("r") as log_file:
target_build_time_map = parse_compilation_time_from_ninja_log(log_file)
compilation_statistics_list = []
for compilation_info, module_info in module_map.items():
module_path = module_info.module_path
with module_path.open("rb") as module_file:
module_component_sizes = get_module_component_info(
module_file,
module_path.stat().st_size)
cmake_target = match_module_cmake_target(module_path)
if cmake_target is None:
raise RuntimeError(
f"Module path isn't a module cmake target: {module_path}")
compilation_time_ms = target_build_time_map[cmake_target]
stream_stats_json = json.loads(module_info.stream_stats_path.read_text())
exec_stats_json = stream_stats_json["stream-aggregate"]["execution"]
ir_stats = benchmark_definition.IRStatistics(
stream_dispatch_count=exec_stats_json["dispatch-count"])
compilation_statistics = CompilationStatistics(
compilation_info=compilation_info,
module_component_sizes=module_component_sizes,
compilation_time_ms=compilation_time_ms,
ir_stats=ir_stats)
compilation_statistics_list.append(compilation_statistics)
commit = get_git_commit_hash("HEAD")
compilation_results = CompilationResults(
commit=commit, compilation_statistics=compilation_statistics_list)
json_output = json.dumps(asdict(compilation_results), indent=2)
if args.output is None:
print(json_output)
else:
args.output.write_text(json_output)
if __name__ == "__main__":
main(_parse_arguments())