| # Copyright 2021 The IREE Authors | 
 | # | 
 | # Licensed under the Apache License v2.0 with LLVM Exceptions. | 
 | # See https://llvm.org/LICENSE.txt for license information. | 
 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
 |  | 
 | import urllib.parse | 
 | import markdown_strings as md | 
 |  | 
 | from dataclasses import dataclass | 
 | from typing import Any, Dict, Optional, Sequence, Tuple | 
 |  | 
 | from .benchmark_definition import BenchmarkResults | 
 | from .benchmark_thresholds import BENCHMARK_THRESHOLDS, ThresholdUnit | 
 |  | 
 | PERFBOARD_SERIES_PREFIX = "https://perf.iree.dev/serie?IREE?" | 
 |  | 
 |  | 
 | @dataclass | 
 | class AggregateBenchmarkLatency: | 
 |   """An object for describing aggregate latency numbers for a benchmark.""" | 
 |   mean_time: int | 
 |   median_time: int | 
 |   stddev_time: int | 
 |   # The average latency time for the base commit to compare against. | 
 |   base_mean_time: Optional[int] = None | 
 |  | 
 |  | 
 | def aggregate_all_benchmarks( | 
 |     benchmark_files: Sequence[str], | 
 |     expected_pr_commit: Optional[str] = None, | 
 |     verbose: bool = False) -> Dict[str, AggregateBenchmarkLatency]: | 
 |   """Aggregates all benchmarks in the given files. | 
 |  | 
 |   Args: | 
 |   - benchmark_files: A list of JSON files, each can be decoded as a | 
 |     BenchmarkResults. | 
 |   - expected_pr_commit: An optional Git commit SHA to match against. | 
 |  | 
 |   Returns: | 
 |   - A dict of benchmark names to AggregateBenchmarkLatency numbers. | 
 |   """ | 
 |  | 
 |   aggregate_results = {} | 
 |  | 
 |   for benchmark_file in benchmark_files: | 
 |     with open(benchmark_file) as f: | 
 |       content = f.read() | 
 |     file_results = BenchmarkResults.from_json_str(content) | 
 |  | 
 |     if (expected_pr_commit is not None) and \ | 
 |             (file_results.commit != expected_pr_commit): | 
 |       raise ValueError("Inconsistent pull request commit") | 
 |  | 
 |     for benchmark_index in range(len(file_results.benchmarks)): | 
 |       benchmark_case = file_results.benchmarks[benchmark_index] | 
 |  | 
 |       # Make sure each benchmark has a unique name. | 
 |       name = str(benchmark_case.benchmark_info) | 
 |       if name in aggregate_results: | 
 |         raise ValueError(f"Duplicated benchmarks: {name}") | 
 |  | 
 |       # Now scan all benchmark iterations and find the aggregate results. | 
 |       mean_time = file_results.get_aggregate_time(benchmark_index, "mean") | 
 |       median_time = file_results.get_aggregate_time(benchmark_index, "median") | 
 |       stddev_time = file_results.get_aggregate_time(benchmark_index, "stddev") | 
 |  | 
 |       aggregate_results[name] = AggregateBenchmarkLatency( | 
 |           mean_time, median_time, stddev_time) | 
 |  | 
 |   return aggregate_results | 
 |  | 
 |  | 
 | def _make_benchmark_clickable(name: str) -> str: | 
 |   """Add link to the given benchmark name.""" | 
 |   url = PERFBOARD_SERIES_PREFIX + urllib.parse.quote(name, safe="()[]@,") | 
 |   return md.link(name, url) | 
 |  | 
 |  | 
 | def _add_header_and_get_markdown_table(names: Tuple[str], | 
 |                                        means: Tuple[Any], | 
 |                                        medians: Tuple[int], | 
 |                                        stddevs: Tuple[int], | 
 |                                        size_cut: Optional[int] = None) -> str: | 
 |   """Generates a markdown table with proper headers for benchmarks. | 
 |  | 
 |   Args: | 
 |   - size_cut: If not None, only show the top N results for each table. | 
 |   """ | 
 |   total_size = len(names) | 
 |   if size_cut is not None: | 
 |     names = names[0:size_cut] | 
 |     means = means[0:size_cut] | 
 |     medians = medians[0:size_cut] | 
 |     stddevs = stddevs[0:size_cut] | 
 |  | 
 |   names = tuple([_make_benchmark_clickable(name) for name in names]) | 
 |   names = ("Benchmark Name",) + names | 
 |   means = ("Average Latency (ms)",) + means | 
 |   medians = ("Median Latency (ms)",) + medians | 
 |   stddevs = ("Latency Standard Deviation (ms)",) + stddevs | 
 |  | 
 |   table_str = md.table([names, means, medians, stddevs]) | 
 |   if size_cut is not None and size_cut < total_size: | 
 |     table_str += "\n\n" | 
 |     table_str += md.italics( | 
 |         f"[Top {size_cut} out of {total_size} benchmark results showed]") | 
 |   return table_str | 
 |  | 
 |  | 
 | def _sort_benchmarks_and_get_table(benchmarks: Dict[str, | 
 |                                                     AggregateBenchmarkLatency], | 
 |                                    size_cut: Optional[int] = None): | 
 |   """Sorts all benchmarks according to the improvement/regression ratio and | 
 |   returns a markdown table for it. | 
 |  | 
 |   Args: | 
 |   - size_cut: If not None, only show the top N results for each table. | 
 |   """ | 
 |   sorted_benchmarks = [] | 
 |   for k, v in benchmarks.items(): | 
 |     ratio = abs(v.mean_time - v.base_mean_time) / v.base_mean_time | 
 |     sorted_benchmarks.append((k, (v.mean_time, v.base_mean_time, ratio), | 
 |                               v.median_time, v.stddev_time)) | 
 |   # Sort according to ratio in the reverse order. | 
 |   sorted_benchmarks.sort(key=lambda benchmark: benchmark[1][2], reverse=True) | 
 |  | 
 |   # Split each field into its own tuple in prepration for markdown table. | 
 |   names, means, medians, stddevs = zip(*sorted_benchmarks) | 
 |  | 
 |   # Turn the tuple about means into a string representation. | 
 |   str_means = [] | 
 |   for pr, base, ratio in means: | 
 |     direction = "↑" if pr > base else ("↓" if pr < base else "") | 
 |     str_means.append(f"{pr} (vs. {base}, {ratio:.2%}{direction})") | 
 |   str_means = tuple(str_means) | 
 |  | 
 |   return _add_header_and_get_markdown_table(names, str_means, medians, stddevs, | 
 |                                             size_cut) | 
 |  | 
 |  | 
 | def categorize_benchmarks_into_tables(benchmarks: Dict[ | 
 |     str, AggregateBenchmarkLatency], | 
 |                                       size_cut: Optional[int] = None) -> str: | 
 |   """Splits benchmarks into regressed/improved/similar/raw categories and | 
 |   returns their markdown tables. | 
 |  | 
 |     Args: | 
 |     - benchmarks: A dictionary of benchmark names to its aggregate info. | 
 |     - size_cut: If not None, only show the top N results for each table. | 
 |     """ | 
 |   regressed, improved, similar, raw = {}, {}, {}, {} | 
 |  | 
 |   for name, results in benchmarks.items(): | 
 |     # If no informatio about the base result. Then we cannot analyze. | 
 |     if results.base_mean_time is None: | 
 |       raw[name] = results | 
 |       continue | 
 |  | 
 |     similar_threshold = None | 
 |     for threshold in BENCHMARK_THRESHOLDS: | 
 |       if threshold.regex.match(name): | 
 |         similar_threshold = threshold | 
 |         break | 
 |     if similar_threshold is None: | 
 |       raise ValueError(f"no matched threshold setting for benchmark: {name}") | 
 |  | 
 |     current = results.mean_time | 
 |     base = results.base_mean_time | 
 |     if similar_threshold.unit == ThresholdUnit.PERCENTAGE: | 
 |       ratio = abs(current - base) / base * 100 | 
 |     else: | 
 |       ratio = abs(current - base) | 
 |  | 
 |     if ratio <= similar_threshold.threshold: | 
 |       similar[name] = results | 
 |     elif current > base: | 
 |       regressed[name] = results | 
 |     else: | 
 |       improved[name] = results | 
 |  | 
 |   tables = [] | 
 |   if regressed: | 
 |     tables.append(md.header("Regressed Benchmarks 🚩", 3)) | 
 |     tables.append(_sort_benchmarks_and_get_table(regressed, size_cut)) | 
 |   if improved: | 
 |     tables.append(md.header("Improved Benchmarks 🎉", 3)) | 
 |     tables.append(_sort_benchmarks_and_get_table(improved, size_cut)) | 
 |   # If we want to abbreviate, similar results won't be interesting. | 
 |   if similar and size_cut is None: | 
 |     tables.append(md.header("Similar Benchmarks", 3)) | 
 |     tables.append(_sort_benchmarks_and_get_table(similar, size_cut)) | 
 |   if raw: | 
 |     tables.append(md.header("Raw Benchmarks", 3)) | 
 |     raw_list = [ | 
 |         (k, v.mean_time, v.median_time, v.stddev_time) for k, v in raw.items() | 
 |     ] | 
 |     names, means, medians, stddevs = zip(*raw_list) | 
 |     tables.append( | 
 |         _add_header_and_get_markdown_table(names=names, | 
 |                                            means=means, | 
 |                                            medians=medians, | 
 |                                            stddevs=stddevs, | 
 |                                            size_cut=size_cut)) | 
 |   return "\n\n".join(tables) |