Introduce a noisy benchmark list to control threshold and comment (#7016)
These benchmarks seem to suffer from phone issues so they bounce
in large ranges. So introducing a filter list to reduce the noise
to avoid confusion while waiting for fixing the phones.
diff --git a/build_tools/android/common/noisy_benchmarks.py b/build_tools/android/common/noisy_benchmarks.py
new file mode 100644
index 0000000..7fcb354
--- /dev/null
+++ b/build_tools/android/common/noisy_benchmarks.py
@@ -0,0 +1,24 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""A list of noisy benchmarks and their average thresholds."""
+
+import re
+
+# A list of noisy benchmarks. Each one is a tuple that contains the following
+# fields:
+# - A regular expression to match against the benchmark identifier.
+# - A threshold for computing the benchmark value average. Benchmark sample
+# values from consecutive runs and within the given range will be considered
+# as similar (with some noise). They will be used to compute the moving
+# average. It is a string that sent to Dana as API call JSON payloadsdirectly.
+# There are two formats supported: a percentage or an absolute value. So we
+# use a string here. What value to set depends on the noise range of the
+# particular benchmark.
+NOISY_BENCHMARKS = [
+ (re.compile(r"^PoseNet.*GPU-Mali-G77"), "100%"),
+ (re.compile(r"^DeepLabV3.*GPU-Mali-G77"), "100%"),
+ (re.compile(r"^MobileSSD.*GPU-Mali-G77"), "100%"),
+]
diff --git a/build_tools/android/post_benchmarks_as_pr_comment.py b/build_tools/android/post_benchmarks_as_pr_comment.py
index e47736b..e5c1475 100755
--- a/build_tools/android/post_benchmarks_as_pr_comment.py
+++ b/build_tools/android/post_benchmarks_as_pr_comment.py
@@ -44,6 +44,7 @@
from typing import Any, Dict, Optional, Sequence, Tuple, Union
from common.benchmark_description import BenchmarkResults, get_output
+from common.noisy_benchmarks import NOISY_BENCHMARKS
ABBR_PR_COMMENT_TITLE = "Abbreviated Benchmark Summary"
GITHUB_GIST_API_PREFIX = "https://api.github.com/gists"
@@ -126,7 +127,8 @@
def aggregate_all_benchmarks(
- benchmark_files: Sequence[str]) -> Dict[str, AggregateBenchmarkLatency]:
+ benchmark_files: Sequence[str],
+ verbose: bool = False) -> Dict[str, AggregateBenchmarkLatency]:
"""Aggregates all benchmarks in the given files.
Args:
@@ -156,6 +158,12 @@
if name in aggregate_results:
raise ValueError(f"Duplicated benchmarks: {name}")
+ # Filter noisy benchmarks out.
+ if any([regex.match(name) is not None for regex, _ in NOISY_BENCHMARKS]):
+ if verbose:
+ print(f"Skipping noisy benchmark '{name}'")
+ continue
+
# Now scan all benchmark iterations and find the aggregate results.
mean_time = file_results.get_aggregate_time(benchmark_index, "mean")
median_time = file_results.get_aggregate_time(benchmark_index, "median")
@@ -305,7 +313,7 @@
query_base: bool,
verbose: bool = False) -> Tuple[str, str]:
"""Gets the full/abbreviated markdown summary of all benchmarks in files."""
- all_benchmarks = aggregate_all_benchmarks(benchmark_files)
+ all_benchmarks = aggregate_all_benchmarks(benchmark_files, verbose=verbose)
build_url = get_required_env_var("BUILDKITE_BUILD_URL")
pr_number = get_required_env_var("BUILDKITE_PULL_REQUEST")
diff --git a/build_tools/android/upload_benchmarks_to_dashboard.py b/build_tools/android/upload_benchmarks_to_dashboard.py
index 093ff02..63c136f 100755
--- a/build_tools/android/upload_benchmarks_to_dashboard.py
+++ b/build_tools/android/upload_benchmarks_to_dashboard.py
@@ -28,6 +28,7 @@
from common.benchmark_description import (BenchmarkInfo, BenchmarkResults,
get_output)
+from common.noisy_benchmarks import NOISY_BENCHMARKS
IREE_GITHUB_COMMIT_URL_PREFIX = 'https://github.com/google/iree/commit'
IREE_PROJECT_ID = 'IREE'
@@ -210,9 +211,18 @@
verbose: bool = False):
"""Posts a new series to the dashboard."""
url = get_required_env_var('IREE_DASHBOARD_URL')
+
+ average_range = '5%'
+ # Adjust average threshold for noisy benchmarks.
+ for regex, threshold in NOISY_BENCHMARKS:
+ if regex.match(series_id):
+ average_range = threshold
+ break
+
payload = compose_series_payload(IREE_PROJECT_ID,
series_id,
series_description,
+ average_range=average_range,
override=override)
post_to_dashboard(f'{url}/apis/addSerie',
payload,