| #!/usr/bin/env python3 |
| # Copyright 2021 The IREE Authors |
| # |
| # Licensed under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| """Upload benchmark results to IREE Benchmark Dashboards. |
| |
| This script is meant to be used by Buildkite for automation. |
| |
| Example usage: |
| # Export necessary environment variables: |
| export IREE_DASHBOARD_API_TOKEN=... |
| # Then run the script: |
| python3 upload_benchmarks.py /path/to/benchmark/json/file |
| """ |
| |
| import pathlib |
| import sys |
| |
| # Add build_tools python dir to the search path. |
| sys.path.insert(0, str(pathlib.Path(__file__).parent.with_name("python"))) |
| |
| import argparse |
| import json |
| import os |
| import requests |
| |
| from typing import Any, Dict, Optional, Union |
| |
| from common.common_arguments import expand_and_check_file_paths |
| from common import benchmark_definition, benchmark_presentation, benchmark_thresholds |
| |
| IREE_DASHBOARD_URL = "https://perf.iree.dev" |
| IREE_GITHUB_COMMIT_URL_PREFIX = "https://github.com/openxla/iree/commit" |
| IREE_PROJECT_ID = "IREE" |
| THIS_DIRECTORY = pathlib.Path(__file__).resolve().parent |
| |
| COMMON_DESCRIPTION = """ |
| <br> |
| For the graph, the x axis is the Git commit index, and the y axis is the |
| measured metrics. The unit for the numbers is shown in the "Unit" dropdown. |
| <br> |
| See <a href="https://github.com/openxla/iree/tree/main/benchmarks/dashboard.md"> |
| https://github.com/openxla/iree/tree/main/benchmarks/dashboard.md |
| </a> for benchmark philosophy, specification, and definitions. |
| """ |
| |
| # A non-exhaustive list of models and their source URLs. |
| # For models listed here we can provide a nicer description for them on |
| # webpage. |
| IREE_TF_MODEL_SOURCE_URL = { |
| "MobileBertSquad": "https://github.com/google-research/google-research/tree/master/mobilebert", |
| "MobileNetV2": "https://www.tensorflow.org/api_docs/python/tf/keras/applications/MobileNetV2", |
| "MobileNetV3Small": "https://www.tensorflow.org/api_docs/python/tf/keras/applications/MobileNetV3Small", |
| } |
| |
| IREE_TFLITE_MODEL_SOURCE_URL = { |
| "DeepLabV3": "https://tfhub.dev/tensorflow/lite-model/deeplabv3/1/default/1", |
| "MobileSSD": "https://www.tensorflow.org/lite/performance/gpu#demo_app_tutorials", |
| "PoseNet": "https://tfhub.dev/tensorflow/lite-model/posenet/mobilenet/float/075/1/default/1", |
| } |
| |
| |
| def get_model_description(model_name: str, model_source: str) -> Optional[str]: |
| """Gets the model description for the given benchmark.""" |
| url = None |
| if model_source == "TensorFlow": |
| url = IREE_TF_MODEL_SOURCE_URL.get(model_name) |
| elif model_source == "TFLite": |
| url = IREE_TFLITE_MODEL_SOURCE_URL.get(model_name) |
| if url is not None: |
| description = f'{model_name} from <a href="{url}">{url}</a>.' |
| return description |
| return None |
| |
| |
| def get_git_commit_hash(commit: str, verbose: bool = False) -> str: |
| """Gets the commit hash for the given commit.""" |
| return benchmark_definition.execute_cmd_and_get_stdout( |
| ["git", "rev-parse", commit], cwd=THIS_DIRECTORY, verbose=verbose |
| ) |
| |
| |
| def get_git_total_commit_count(commit: str, verbose: bool = False) -> int: |
| """Gets the total commit count in history ending with the given commit.""" |
| count = benchmark_definition.execute_cmd_and_get_stdout( |
| ["git", "rev-list", "--count", commit], cwd=THIS_DIRECTORY, verbose=verbose |
| ) |
| return int(count) |
| |
| |
| def get_git_commit_info(commit: str, verbose: bool = False) -> Dict[str, str]: |
| """Gets commit information dictionary for the given commit.""" |
| cmd = ["git", "show", "--format=%H:::%h:::%an:::%ae:::%s", "--no-patch", commit] |
| info = benchmark_definition.execute_cmd_and_get_stdout( |
| cmd, cwd=THIS_DIRECTORY, verbose=verbose |
| ) |
| segments = info.split(":::") |
| return { |
| "hash": segments[0], |
| "abbrevHash": segments[1], |
| "authorName": segments[2], |
| "authorEmail": segments[3], |
| "subject": segments[4], |
| } |
| |
| |
| def compose_series_payload( |
| project_id: str, |
| series_id: str, |
| series_unit: str, |
| series_name: Optional[str] = None, |
| series_description: Optional[str] = None, |
| average_range: Union[int, str] = "5%", |
| average_min_count: int = 3, |
| better_criterion: str = "smaller", |
| override: bool = False, |
| ) -> Dict[str, Any]: |
| """Composes the payload dictionary for a series.""" |
| payload = { |
| "projectId": project_id, |
| "serieId": series_id, |
| "serieUnit": series_unit, |
| "serieName": series_name, |
| "analyse": { |
| "benchmark": { |
| "range": average_range, |
| "required": average_min_count, |
| "trend": better_criterion, |
| } |
| }, |
| "override": override, |
| } |
| if series_description is not None: |
| payload["description"] = series_description |
| return payload |
| |
| |
| def compose_build_payload( |
| project_id: str, |
| project_github_commit_url: str, |
| build_id: int, |
| commit: str, |
| override: bool = False, |
| ) -> Dict[str, Any]: |
| """Composes the payload dictionary for a build.""" |
| commit_info = get_git_commit_info(commit) |
| commit_info["url"] = f'{project_github_commit_url}/{commit_info["hash"]}' |
| return { |
| "projectId": project_id, |
| "build": { |
| "buildId": build_id, |
| "infos": commit_info, |
| }, |
| "override": override, |
| } |
| |
| |
| def compose_sample_payload( |
| project_id: str, |
| series_id: str, |
| build_id: int, |
| sample_unit: str, |
| sample_value: int, |
| override: bool = False, |
| ) -> Dict[str, Any]: |
| """Composes the payload dictionary for a sample.""" |
| return { |
| "projectId": project_id, |
| "serieId": series_id, |
| "sampleUnit": sample_unit, |
| "sample": {"buildId": build_id, "value": sample_value}, |
| "override": override, |
| } |
| |
| |
| def get_required_env_var(var: str) -> str: |
| """Gets the value for a required environment variable.""" |
| value = os.getenv(var) |
| if value is None: |
| raise RuntimeError(f'Missing environment variable "{var}"') |
| return value |
| |
| |
| def post_to_dashboard( |
| url: str, payload: Dict[str, Any], dry_run: bool = False, verbose: bool = False |
| ): |
| data = json.dumps(payload) |
| |
| if dry_run or verbose: |
| print(f"API request payload: {data}") |
| |
| if dry_run: |
| return |
| |
| api_token = get_required_env_var("IREE_DASHBOARD_API_TOKEN") |
| headers = { |
| "Content-type": "application/json", |
| "Authorization": f"Bearer {api_token}", |
| } |
| |
| response = requests.post(url, data=data, headers=headers) |
| code = response.status_code |
| if code != 200: |
| raise requests.RequestException( |
| f"Failed to post to dashboard server with {code} - {response.text}" |
| ) |
| |
| |
| def add_new_iree_series( |
| series_id: str, |
| series_unit: str, |
| series_name: str, |
| series_description: Optional[str] = None, |
| average_range: Optional[Union[str, int]] = None, |
| override: bool = False, |
| dry_run: bool = False, |
| verbose: bool = False, |
| ): |
| """Posts a new series to the dashboard.""" |
| if average_range is None: |
| raise ValueError(f"no matched threshold setting for benchmark: {series_id}") |
| |
| payload = compose_series_payload( |
| IREE_PROJECT_ID, |
| series_id, |
| series_unit, |
| series_name, |
| series_description, |
| average_range=average_range, |
| override=override, |
| ) |
| post_to_dashboard( |
| f"{IREE_DASHBOARD_URL}/apis/v2/addSerie", |
| payload, |
| dry_run=dry_run, |
| verbose=verbose, |
| ) |
| |
| |
| def add_new_iree_build( |
| build_id: int, |
| commit: str, |
| override: bool = False, |
| dry_run: bool = False, |
| verbose: bool = False, |
| ): |
| """Posts a new build to the dashboard.""" |
| payload = compose_build_payload( |
| IREE_PROJECT_ID, IREE_GITHUB_COMMIT_URL_PREFIX, build_id, commit, override |
| ) |
| post_to_dashboard( |
| f"{IREE_DASHBOARD_URL}/apis/addBuild", payload, dry_run=dry_run, verbose=verbose |
| ) |
| |
| |
| def add_new_sample( |
| series_id: str, |
| build_id: int, |
| sample_unit: str, |
| sample_value: int, |
| override: bool = False, |
| dry_run: bool = False, |
| verbose: bool = False, |
| ): |
| """Posts a new sample to the dashboard.""" |
| payload = compose_sample_payload( |
| IREE_PROJECT_ID, series_id, build_id, sample_unit, sample_value, override |
| ) |
| post_to_dashboard( |
| f"{IREE_DASHBOARD_URL}/apis/v2/addSample", |
| payload, |
| dry_run=dry_run, |
| verbose=verbose, |
| ) |
| |
| |
| def parse_arguments(): |
| """Parses command-line options.""" |
| |
| parser = argparse.ArgumentParser() |
| parser.add_argument( |
| "--benchmark_files", |
| metavar="<benchmark-json-files>", |
| default=[], |
| action="append", |
| help=( |
| "Paths to the JSON files containing benchmark results, " "accepts wildcards" |
| ), |
| ) |
| parser.add_argument( |
| "--compile_stats_files", |
| metavar="<compile-stats-json-files>", |
| default=[], |
| action="append", |
| help=( |
| "Paths to the JSON files containing compilation statistics, " |
| "accepts wildcards" |
| ), |
| ) |
| parser.add_argument( |
| "--dry-run", |
| action="store_true", |
| help="Print the comment instead of posting to dashboard", |
| ) |
| parser.add_argument( |
| "--verbose", |
| action="store_true", |
| help="Print internal information during execution", |
| ) |
| args = parser.parse_args() |
| |
| return args |
| |
| |
| def main(args): |
| benchmark_files = expand_and_check_file_paths(args.benchmark_files) |
| compile_stats_files = expand_and_check_file_paths(args.compile_stats_files) |
| |
| if len(benchmark_files) > 0: |
| committish = benchmark_definition.BenchmarkResults.from_json_str( |
| benchmark_files[0].read_text() |
| ).commit |
| elif len(compile_stats_files) > 0: |
| committish = benchmark_definition.CompilationResults.from_json_object( |
| json.loads(compile_stats_files[0].read_text()) |
| ).commit |
| else: |
| raise ValueError("No benchmark/compilation results.") |
| |
| # Register a new build for the current commit. |
| commit_hash = get_git_commit_hash(committish, verbose=args.verbose) |
| commit_count = get_git_total_commit_count(commit_hash, verbose=args.verbose) |
| |
| aggregate_results = benchmark_presentation.aggregate_all_benchmarks( |
| benchmark_files=benchmark_files, expected_pr_commit=commit_hash |
| ) |
| |
| all_compilation_metrics = benchmark_presentation.collect_all_compilation_metrics( |
| compile_stats_files=compile_stats_files, expected_pr_commit=commit_hash |
| ) |
| |
| # Allow override to support uploading data for the same build in |
| # different batches. |
| add_new_iree_build( |
| commit_count, |
| commit_hash, |
| override=True, |
| dry_run=args.dry_run, |
| verbose=args.verbose, |
| ) |
| |
| # Upload benchmark results to the dashboard. |
| for series_id, benchmark_latency in aggregate_results.items(): |
| series_name = benchmark_latency.name |
| benchmark_info = benchmark_latency.benchmark_info |
| description = get_model_description( |
| benchmark_info.model_name, benchmark_info.model_source |
| ) |
| if description is None: |
| description = "" |
| description += COMMON_DESCRIPTION |
| |
| threshold = next( |
| ( |
| threshold |
| for threshold in benchmark_thresholds.BENCHMARK_THRESHOLDS |
| if threshold.regex.match(series_name) |
| ), |
| None, |
| ) |
| average_range = threshold.get_threshold_str() if threshold is not None else None |
| |
| # Override by default to allow updates to the series. |
| add_new_iree_series( |
| series_id=series_id, |
| series_unit="ns", |
| series_name=benchmark_latency.name, |
| series_description=description, |
| average_range=average_range, |
| override=True, |
| dry_run=args.dry_run, |
| verbose=args.verbose, |
| ) |
| add_new_sample( |
| series_id=series_id, |
| build_id=commit_count, |
| sample_unit="ns", |
| sample_value=benchmark_latency.mean_time, |
| dry_run=args.dry_run, |
| verbose=args.verbose, |
| ) |
| |
| for target_id, compile_metrics in all_compilation_metrics.items(): |
| description = get_model_description( |
| compile_metrics.compilation_info.model_name, |
| compile_metrics.compilation_info.model_source, |
| ) |
| if description is None: |
| description = "" |
| description += COMMON_DESCRIPTION |
| |
| for mapper in benchmark_presentation.COMPILATION_METRICS_TO_TABLE_MAPPERS: |
| sample_value, _ = mapper.get_current_and_base_value(compile_metrics) |
| series_unit = mapper.get_unit() |
| series_id = mapper.get_series_id(target_id) |
| series_name = mapper.get_series_name(compile_metrics.name) |
| |
| threshold = next( |
| ( |
| threshold |
| for threshold in mapper.get_metric_thresholds() |
| if threshold.regex.match(series_name) |
| ), |
| None, |
| ) |
| average_range = ( |
| threshold.get_threshold_str() if threshold is not None else None |
| ) |
| |
| # Override by default to allow updates to the series. |
| add_new_iree_series( |
| series_id=series_id, |
| series_unit=series_unit, |
| series_name=series_name, |
| series_description=description, |
| average_range=average_range, |
| override=True, |
| dry_run=args.dry_run, |
| verbose=args.verbose, |
| ) |
| add_new_sample( |
| series_id=series_id, |
| build_id=commit_count, |
| sample_unit=series_unit, |
| sample_value=sample_value, |
| dry_run=args.dry_run, |
| verbose=args.verbose, |
| ) |
| |
| |
| if __name__ == "__main__": |
| main(parse_arguments()) |