build_tools/benchmarks/common/benchmark_thresholds.py - 3p/openxla/iree - Git at Google

 # Copyright 2021 The IREE Authors
 #
 # Licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 """A list of benchmarks and their similarity thresholds."""

 import re

 from dataclasses import dataclass
 from enum import Enum


 class ThresholdUnit(Enum):
   PERCENTAGE = "%"  # Percentage
   VALUE_NS = "ns"  # Absolute value in nanoseconds


 @dataclass
 class BenchmarkThreshold:
   """Similarity threshold for benchmarks matching a regular expression."""
   # A regular expression to match against the benchmark identifier.
   regex: re.Pattern
   # A threshold for computing the benchmark value average. Benchmark sample
   # values from consecutive runs and within the given range will be considered
   # as similar (with some noise). They will be used to compute the moving
   # average. The number will be interpreted according to the given unit.
   # What value to set depends on the noise range of the particular benchmark.
   threshold: int
   unit: ThresholdUnit

   def get_threshold_str(self):
     """Returns a string representation of the threshold."""
     if self.unit == ThresholdUnit.PERCENTAGE:
       return f"{self.threshold}%"
     return self.threshold


 # A list of benchmarks and their similarity thresholds.
 # Order matters here: if multiple regexes match a single benchmark, the first
 # match is used.
 BENCHMARK_THRESHOLDS = [
     # Fluctuating benchmarks on ARM64 CPUs.
     BenchmarkThreshold(re.compile(r"^DeepLabV3.*big-core.*LLVM-CPU.* @ Pixel"),
                        20, ThresholdUnit.PERCENTAGE),
     BenchmarkThreshold(
         re.compile(r"^MobileBertSquad.*big-core.*LLVM-CPU-Sync @ Pixel-4"), 20,
         ThresholdUnit.PERCENTAGE),
     BenchmarkThreshold(re.compile(r"^MobileNetV2.*LLVM-CPU.* @ Pixel"), 15,
                        ThresholdUnit.PERCENTAGE),
     BenchmarkThreshold(re.compile(r"^MobileNetV3Small.*LLVM-CPU.* @ Pixel"), 25,
                        ThresholdUnit.PERCENTAGE),
     BenchmarkThreshold(
         re.compile(r"^MobileSSD.*little-core.*LLVM-CPU.* @ Pixel-6"), 20,
         ThresholdUnit.PERCENTAGE),
     BenchmarkThreshold(re.compile(r"^PoseNet.*big-core.*LLVM-CPU.* @ Pixel"),
                        15, ThresholdUnit.PERCENTAGE),

     # Benchmarks that complete <= 10ms on X86_64 CPUs; using percentage is not
     # suitable anymore.
     BenchmarkThreshold(re.compile(r"^DeepLabV3_fp32.*x86_64"), 1 * 10**6,
                        ThresholdUnit.VALUE_NS),
     BenchmarkThreshold(re.compile(r"^EfficientNet_int8.*x86_64"), 1 * 10**6,
                        ThresholdUnit.VALUE_NS),
     BenchmarkThreshold(re.compile(r"^MobileNetV1_fp32.*x86_64"), 1 * 10**6,
                        ThresholdUnit.VALUE_NS),
     BenchmarkThreshold(re.compile(r"^MobileNetV2_fp32.*x86_64"), 2 * 10**6,
                        ThresholdUnit.VALUE_NS),
     BenchmarkThreshold(re.compile(r"^MobileNetV3Small_fp32.*x86_64"), 1 * 10**6,
                        ThresholdUnit.VALUE_NS),
     BenchmarkThreshold(re.compile(r"^PersonDetect_int8.*x86_64"), 5 * 10**5,
                        ThresholdUnit.VALUE_NS),
     BenchmarkThreshold(re.compile(r"^PoseNet_fp32.*x86_64"), 1 * 10**6,
                        ThresholdUnit.VALUE_NS),

     # Fluctuating benchmarks on mobile GPUs.
     BenchmarkThreshold(
         re.compile(r"^MobileBertSquad.*int8.*full-inference.*GPU-Mali"), 10,
         ThresholdUnit.PERCENTAGE),
     BenchmarkThreshold(
         re.compile(r"^MobileBertSquad.*fp16.*full-inference.*GPU-Mali"), 10,
         ThresholdUnit.PERCENTAGE),
     BenchmarkThreshold(
         re.compile(r"^MobileNetV3Small.*full-inference.*GPU-Mali"), 2 * 10**6,
         ThresholdUnit.VALUE_NS),

     # Benchmarks that complete <= 10ms on GPUs; using percentage is not
     # suitable anymore.
     BenchmarkThreshold(re.compile(r"^DeepLabV3.*GPU-Mali"), 1 * 10**6,
                        ThresholdUnit.VALUE_NS),
     BenchmarkThreshold(re.compile(r"^PersonDetect.*int8.*GPU-Mali"), 2 * 10**5,
                        ThresholdUnit.VALUE_NS),
     BenchmarkThreshold(re.compile(r"^EfficientNet.*int8.*GPU-Mali"), 15 * 10**5,
                        ThresholdUnit.VALUE_NS),
     BenchmarkThreshold(re.compile(r"^MobileNet.*GPU"), 1 * 10**6,
                        ThresholdUnit.VALUE_NS),

     # Default threshold for all ARM64/X86_64 benchmarks: 10%.
     BenchmarkThreshold(re.compile(r".*CPU-ARM.*"), 10,
                        ThresholdUnit.PERCENTAGE),
     BenchmarkThreshold(re.compile(r".*x86_64.*"), 10, ThresholdUnit.PERCENTAGE),
     # Default threshold for all benchmarks: 5%.
     BenchmarkThreshold(re.compile(r".*"), 5, ThresholdUnit.PERCENTAGE),
 ]

 COMPILATION_TIME_THRESHOLDS = [
     # TODO(#11922): Compilation time measurement is very unstable right now.
     # Use a large threshold until we make it stable.
     BenchmarkThreshold(re.compile(r".*"), 100, ThresholdUnit.PERCENTAGE),
 ]

 TOTAL_DISPATCH_SIZE_THRESHOLDS = [
     # Default threshold: 5%.
     BenchmarkThreshold(re.compile(r".*"), 5, ThresholdUnit.PERCENTAGE),
 ]

 TOTAL_ARTIFACT_SIZE_THRESHOLDS = [
     # Default threshold: 5%.
     BenchmarkThreshold(re.compile(r".*"), 5, ThresholdUnit.PERCENTAGE),
 ]

 STREAM_IR_DISPATCH_COUNT_THRESHOLDS = [
     # Default threshold: 0%.
     # Any change on dispatch count should be reported.
     BenchmarkThreshold(re.compile(r".*"), 0, ThresholdUnit.PERCENTAGE),
 ]
	# Copyright 2021 The IREE Authors
	#
	# Licensed under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	"""A list of benchmarks and their similarity thresholds."""

	import re

	from dataclasses import dataclass
	from enum import Enum


	class ThresholdUnit(Enum):
	PERCENTAGE = "%" # Percentage
	VALUE_NS = "ns" # Absolute value in nanoseconds


	@dataclass
	class BenchmarkThreshold:
	"""Similarity threshold for benchmarks matching a regular expression."""
	# A regular expression to match against the benchmark identifier.
	regex: re.Pattern
	# A threshold for computing the benchmark value average. Benchmark sample
	# values from consecutive runs and within the given range will be considered
	# as similar (with some noise). They will be used to compute the moving
	# average. The number will be interpreted according to the given unit.
	# What value to set depends on the noise range of the particular benchmark.
	threshold: int
	unit: ThresholdUnit

	def get_threshold_str(self):
	"""Returns a string representation of the threshold."""
	if self.unit == ThresholdUnit.PERCENTAGE:
	return f"{self.threshold}%"
	return self.threshold


	# A list of benchmarks and their similarity thresholds.
	# Order matters here: if multiple regexes match a single benchmark, the first
	# match is used.
	BENCHMARK_THRESHOLDS = [
	# Fluctuating benchmarks on ARM64 CPUs.
	BenchmarkThreshold(re.compile(r"^DeepLabV3.big-core.LLVM-CPU.* @ Pixel"),
	20, ThresholdUnit.PERCENTAGE),
	BenchmarkThreshold(
	re.compile(r"^MobileBertSquad.big-core.LLVM-CPU-Sync @ Pixel-4"), 20,
	ThresholdUnit.PERCENTAGE),
	BenchmarkThreshold(re.compile(r"^MobileNetV2.LLVM-CPU. @ Pixel"), 15,
	ThresholdUnit.PERCENTAGE),
	BenchmarkThreshold(re.compile(r"^MobileNetV3Small.LLVM-CPU. @ Pixel"), 25,
	ThresholdUnit.PERCENTAGE),
	BenchmarkThreshold(
	re.compile(r"^MobileSSD.little-core.LLVM-CPU.* @ Pixel-6"), 20,
	ThresholdUnit.PERCENTAGE),
	BenchmarkThreshold(re.compile(r"^PoseNet.big-core.LLVM-CPU.* @ Pixel"),
	15, ThresholdUnit.PERCENTAGE),

	# Benchmarks that complete <= 10ms on X86_64 CPUs; using percentage is not
	# suitable anymore.
	BenchmarkThreshold(re.compile(r"^DeepLabV3_fp32.x86_64"), 1 10**6,
	ThresholdUnit.VALUE_NS),
	BenchmarkThreshold(re.compile(r"^EfficientNet_int8.x86_64"), 1 10**6,
	ThresholdUnit.VALUE_NS),
	BenchmarkThreshold(re.compile(r"^MobileNetV1_fp32.x86_64"), 1 10**6,
	ThresholdUnit.VALUE_NS),
	BenchmarkThreshold(re.compile(r"^MobileNetV2_fp32.x86_64"), 2 10**6,
	ThresholdUnit.VALUE_NS),
	BenchmarkThreshold(re.compile(r"^MobileNetV3Small_fp32.x86_64"), 1 10**6,
	ThresholdUnit.VALUE_NS),
	BenchmarkThreshold(re.compile(r"^PersonDetect_int8.x86_64"), 5 10**5,
	ThresholdUnit.VALUE_NS),
	BenchmarkThreshold(re.compile(r"^PoseNet_fp32.x86_64"), 1 10**6,
	ThresholdUnit.VALUE_NS),

	# Fluctuating benchmarks on mobile GPUs.
	BenchmarkThreshold(
	re.compile(r"^MobileBertSquad.int8.full-inference.*GPU-Mali"), 10,
	ThresholdUnit.PERCENTAGE),
	BenchmarkThreshold(
	re.compile(r"^MobileBertSquad.fp16.full-inference.*GPU-Mali"), 10,
	ThresholdUnit.PERCENTAGE),
	BenchmarkThreshold(
	re.compile(r"^MobileNetV3Small.full-inference.GPU-Mali"), 2 * 10**6,
	ThresholdUnit.VALUE_NS),

	# Benchmarks that complete <= 10ms on GPUs; using percentage is not
	# suitable anymore.
	BenchmarkThreshold(re.compile(r"^DeepLabV3.GPU-Mali"), 1 10**6,
	ThresholdUnit.VALUE_NS),
	BenchmarkThreshold(re.compile(r"^PersonDetect.int8.GPU-Mali"), 2 * 10**5,
	ThresholdUnit.VALUE_NS),
	BenchmarkThreshold(re.compile(r"^EfficientNet.int8.GPU-Mali"), 15 * 10**5,
	ThresholdUnit.VALUE_NS),
	BenchmarkThreshold(re.compile(r"^MobileNet.GPU"), 1 10**6,
	ThresholdUnit.VALUE_NS),

	# Default threshold for all ARM64/X86_64 benchmarks: 10%.
	BenchmarkThreshold(re.compile(r".CPU-ARM."), 10,
	ThresholdUnit.PERCENTAGE),
	BenchmarkThreshold(re.compile(r".x86_64."), 10, ThresholdUnit.PERCENTAGE),
	# Default threshold for all benchmarks: 5%.
	BenchmarkThreshold(re.compile(r".*"), 5, ThresholdUnit.PERCENTAGE),
	]

	COMPILATION_TIME_THRESHOLDS = [
	# TODO(#11922): Compilation time measurement is very unstable right now.
	# Use a large threshold until we make it stable.
	BenchmarkThreshold(re.compile(r".*"), 100, ThresholdUnit.PERCENTAGE),
	]

	TOTAL_DISPATCH_SIZE_THRESHOLDS = [
	# Default threshold: 5%.
	BenchmarkThreshold(re.compile(r".*"), 5, ThresholdUnit.PERCENTAGE),
	]

	TOTAL_ARTIFACT_SIZE_THRESHOLDS = [
	# Default threshold: 5%.
	BenchmarkThreshold(re.compile(r".*"), 5, ThresholdUnit.PERCENTAGE),
	]

	STREAM_IR_DISPATCH_COUNT_THRESHOLDS = [
	# Default threshold: 0%.
	# Any change on dispatch count should be reported.
	BenchmarkThreshold(re.compile(r".*"), 0, ThresholdUnit.PERCENTAGE),
	]