|  | # Copyright 2021 The IREE Authors | 
|  | # | 
|  | # Licensed under the Apache License v2.0 with LLVM Exceptions. | 
|  | # See https://llvm.org/LICENSE.txt for license information. | 
|  | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  |  | 
|  | import urllib.parse | 
|  | import markdown_strings as md | 
|  |  | 
|  | from dataclasses import dataclass | 
|  | from typing import Any, Dict, Optional, Sequence, Tuple | 
|  |  | 
|  | from .benchmark_definition import BenchmarkResults | 
|  | from .benchmark_thresholds import BENCHMARK_THRESHOLDS, ThresholdUnit | 
|  |  | 
|  | PERFBOARD_SERIES_PREFIX = "https://perf.iree.dev/serie?IREE?" | 
|  |  | 
|  |  | 
|  | @dataclass | 
|  | class AggregateBenchmarkLatency: | 
|  | """An object for describing aggregate latency numbers for a benchmark.""" | 
|  | mean_time: int | 
|  | median_time: int | 
|  | stddev_time: int | 
|  | # The average latency time for the base commit to compare against. | 
|  | base_mean_time: Optional[int] = None | 
|  |  | 
|  |  | 
|  | def aggregate_all_benchmarks( | 
|  | benchmark_files: Sequence[str], | 
|  | expected_pr_commit: Optional[str] = None, | 
|  | verbose: bool = False) -> Dict[str, AggregateBenchmarkLatency]: | 
|  | """Aggregates all benchmarks in the given files. | 
|  |  | 
|  | Args: | 
|  | - benchmark_files: A list of JSON files, each can be decoded as a | 
|  | BenchmarkResults. | 
|  | - expected_pr_commit: An optional Git commit SHA to match against. | 
|  |  | 
|  | Returns: | 
|  | - A dict of benchmark names to AggregateBenchmarkLatency numbers. | 
|  | """ | 
|  |  | 
|  | aggregate_results = {} | 
|  |  | 
|  | for benchmark_file in benchmark_files: | 
|  | with open(benchmark_file) as f: | 
|  | content = f.read() | 
|  | file_results = BenchmarkResults.from_json_str(content) | 
|  |  | 
|  | if (expected_pr_commit is not None) and \ | 
|  | (file_results.commit != expected_pr_commit): | 
|  | raise ValueError("Inconsistent pull request commit") | 
|  |  | 
|  | for benchmark_index in range(len(file_results.benchmarks)): | 
|  | benchmark_case = file_results.benchmarks[benchmark_index] | 
|  |  | 
|  | # Make sure each benchmark has a unique name. | 
|  | name = str(benchmark_case.benchmark_info) | 
|  | if name in aggregate_results: | 
|  | raise ValueError(f"Duplicated benchmarks: {name}") | 
|  |  | 
|  | # Now scan all benchmark iterations and find the aggregate results. | 
|  | mean_time = file_results.get_aggregate_time(benchmark_index, "mean") | 
|  | median_time = file_results.get_aggregate_time(benchmark_index, "median") | 
|  | stddev_time = file_results.get_aggregate_time(benchmark_index, "stddev") | 
|  |  | 
|  | aggregate_results[name] = AggregateBenchmarkLatency( | 
|  | mean_time, median_time, stddev_time) | 
|  |  | 
|  | return aggregate_results | 
|  |  | 
|  |  | 
|  | def _make_benchmark_clickable(name: str) -> str: | 
|  | """Add link to the given benchmark name.""" | 
|  | url = PERFBOARD_SERIES_PREFIX + urllib.parse.quote(name, safe="()[]@,") | 
|  | return md.link(name, url) | 
|  |  | 
|  |  | 
|  | def _add_header_and_get_markdown_table(names: Tuple[str], | 
|  | means: Tuple[Any], | 
|  | medians: Tuple[int], | 
|  | stddevs: Tuple[int], | 
|  | size_cut: Optional[int] = None) -> str: | 
|  | """Generates a markdown table with proper headers for benchmarks. | 
|  |  | 
|  | Args: | 
|  | - size_cut: If not None, only show the top N results for each table. | 
|  | """ | 
|  | total_size = len(names) | 
|  | if size_cut is not None: | 
|  | names = names[0:size_cut] | 
|  | means = means[0:size_cut] | 
|  | medians = medians[0:size_cut] | 
|  | stddevs = stddevs[0:size_cut] | 
|  |  | 
|  | names = tuple([_make_benchmark_clickable(name) for name in names]) | 
|  | names = ("Benchmark Name",) + names | 
|  | means = ("Average Latency (ms)",) + means | 
|  | medians = ("Median Latency (ms)",) + medians | 
|  | stddevs = ("Latency Standard Deviation (ms)",) + stddevs | 
|  |  | 
|  | table_str = md.table([names, means, medians, stddevs]) | 
|  | if size_cut is not None and size_cut < total_size: | 
|  | table_str += "\n\n" | 
|  | table_str += md.italics( | 
|  | f"[Top {size_cut} out of {total_size} benchmark results showed]") | 
|  | return table_str | 
|  |  | 
|  |  | 
|  | def _sort_benchmarks_and_get_table(benchmarks: Dict[str, | 
|  | AggregateBenchmarkLatency], | 
|  | size_cut: Optional[int] = None): | 
|  | """Sorts all benchmarks according to the improvement/regression ratio and | 
|  | returns a markdown table for it. | 
|  |  | 
|  | Args: | 
|  | - size_cut: If not None, only show the top N results for each table. | 
|  | """ | 
|  | sorted_benchmarks = [] | 
|  | for k, v in benchmarks.items(): | 
|  | ratio = abs(v.mean_time - v.base_mean_time) / v.base_mean_time | 
|  | sorted_benchmarks.append((k, (v.mean_time, v.base_mean_time, ratio), | 
|  | v.median_time, v.stddev_time)) | 
|  | # Sort according to ratio in the reverse order. | 
|  | sorted_benchmarks.sort(key=lambda benchmark: benchmark[1][2], reverse=True) | 
|  |  | 
|  | # Split each field into its own tuple in prepration for markdown table. | 
|  | names, means, medians, stddevs = zip(*sorted_benchmarks) | 
|  |  | 
|  | # Turn the tuple about means into a string representation. | 
|  | str_means = [] | 
|  | for pr, base, ratio in means: | 
|  | direction = "↑" if pr > base else ("↓" if pr < base else "") | 
|  | str_means.append(f"{pr} (vs. {base}, {ratio:.2%}{direction})") | 
|  | str_means = tuple(str_means) | 
|  |  | 
|  | return _add_header_and_get_markdown_table(names, str_means, medians, stddevs, | 
|  | size_cut) | 
|  |  | 
|  |  | 
|  | def categorize_benchmarks_into_tables(benchmarks: Dict[ | 
|  | str, AggregateBenchmarkLatency], | 
|  | size_cut: Optional[int] = None) -> str: | 
|  | """Splits benchmarks into regressed/improved/similar/raw categories and | 
|  | returns their markdown tables. | 
|  |  | 
|  | Args: | 
|  | - benchmarks: A dictionary of benchmark names to its aggregate info. | 
|  | - size_cut: If not None, only show the top N results for each table. | 
|  | """ | 
|  | regressed, improved, similar, raw = {}, {}, {}, {} | 
|  |  | 
|  | for name, results in benchmarks.items(): | 
|  | # If no informatio about the base result. Then we cannot analyze. | 
|  | if results.base_mean_time is None: | 
|  | raw[name] = results | 
|  | continue | 
|  |  | 
|  | similar_threshold = None | 
|  | for threshold in BENCHMARK_THRESHOLDS: | 
|  | if threshold.regex.match(name): | 
|  | similar_threshold = threshold | 
|  | break | 
|  | if similar_threshold is None: | 
|  | raise ValueError(f"no matched threshold setting for benchmark: {name}") | 
|  |  | 
|  | current = results.mean_time | 
|  | base = results.base_mean_time | 
|  | if similar_threshold.unit == ThresholdUnit.PERCENTAGE: | 
|  | ratio = abs(current - base) / base * 100 | 
|  | else: | 
|  | ratio = abs(current - base) | 
|  |  | 
|  | if ratio <= similar_threshold.threshold: | 
|  | similar[name] = results | 
|  | elif current > base: | 
|  | regressed[name] = results | 
|  | else: | 
|  | improved[name] = results | 
|  |  | 
|  | tables = [] | 
|  | if regressed: | 
|  | tables.append(md.header("Regressed Benchmarks 🚩", 3)) | 
|  | tables.append(_sort_benchmarks_and_get_table(regressed, size_cut)) | 
|  | if improved: | 
|  | tables.append(md.header("Improved Benchmarks 🎉", 3)) | 
|  | tables.append(_sort_benchmarks_and_get_table(improved, size_cut)) | 
|  | # If we want to abbreviate, similar results won't be interesting. | 
|  | if similar and size_cut is None: | 
|  | tables.append(md.header("Similar Benchmarks", 3)) | 
|  | tables.append(_sort_benchmarks_and_get_table(similar, size_cut)) | 
|  | if raw: | 
|  | tables.append(md.header("Raw Benchmarks", 3)) | 
|  | raw_list = [ | 
|  | (k, v.mean_time, v.median_time, v.stddev_time) for k, v in raw.items() | 
|  | ] | 
|  | names, means, medians, stddevs = zip(*raw_list) | 
|  | tables.append( | 
|  | _add_header_and_get_markdown_table(names=names, | 
|  | means=means, | 
|  | medians=medians, | 
|  | stddevs=stddevs, | 
|  | size_cut=size_cut)) | 
|  | return "\n\n".join(tables) |