Add default-flags configurations for all TFLite benchmarks (#7580)
Following up on https://github.com/google/iree/pull/7553, this adds
default flag configurations for the remaining TFLite models. Now that
we've got timestamps in the Buildkite runs, we can see what this does
to artifact transfer time via the presubmit:
https://buildkite.com/iree/iree-benchmark/builds/1447
I'm contrasting with the run at the merge base on main:
https://buildkite.com/iree/iree-benchmark/builds/1445
For this PR's run, downloads took 1:35 and 4:12 for the two RPIs,
compared to 2:06 and 3:30 before. That looks like it's basically in
the noise. We could do more detailed analysis, but that doesn't seem
worth it to me. Just spot-checking a few, it seems the RPI connected
to the Pixels consistently downloads faster than the ones connected
to the Samsungs.
As for the benchmarks themselves, the run on the RPI took
around 34-35 minutes for each phone, 6-8 min longer than
before.
Overall run latency, excluding waiting for agents, increased
from 43:12 to 49:50. I think this is acceptable given our
current limitations.
diff --git a/benchmarks/TFLite/CMakeLists.txt b/benchmarks/TFLite/CMakeLists.txt
index 1799a2c..3af45c6 100644
--- a/benchmarks/TFLite/CMakeLists.txt
+++ b/benchmarks/TFLite/CMakeLists.txt
@@ -60,14 +60,20 @@
################################################################################
# #
-# Common benchmark configurations #
+# Default benchmark configurations #
# #
-# Each suite benchmarks a list of modules with some specific configuration, #
-# typically involving different translation/runtime flags and targeting #
-# different IREE drivers and hardware architectures. #
+# Each suite benchmarks a list of modules with configurations specifying a #
+# target architecture and runtime characteristics (e.g. threads/cores). These #
+# benchmarks only configure IREE translation and runtime flags for the target #
+# architecture and do *not* include any non-default flags. No non-default #
+# flags should be added here. #
# #
################################################################################
+set(ANDROID_CPU_TRANSLATION_FLAGS
+ "--iree-input-type=tosa"
+ "--iree-llvm-target-triple=aarch64-none-linux-android29")
+
# CPU, Dylib-Sync, big/little-core, full-inference
iree_benchmark_suite(
MODULES
@@ -76,25 +82,19 @@
"${POSENET_FP32_MODULE}"
BENCHMARK_MODES
- "big-core,full-inference"
- "little-core,full-inference"
+ "big-core,full-inference,default-flags"
+ "little-core,full-inference,default-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
TRANSLATION_FLAGS
- "--iree-input-type=tosa"
- "--iree-llvm-target-triple=aarch64-none-linux-android29"
- "--iree-flow-inline-constants-max-byte-length=2048"
- "--iree-llvm-loop-unrolling=true"
+ ${ANDROID_CPU_TRANSLATION_FLAGS}
DRIVER
"dylib-sync"
)
-# CPU, Dylib, 1-thread, big/little-core, full-inference. These benchmarks only
-# configure IREE translation and runtime flags for the target architecture and
-# runtime characteristics and do *not* include any non-default flags. No
-# non-default flags should be added here.
+# CPU, Dylib, 1-thread, big/little-core, full-inference.
iree_benchmark_suite(
MODULES
"${DEEPLABV3_FP32_MODULE}"
@@ -109,14 +109,152 @@
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
TRANSLATION_FLAGS
- "--iree-input-type=tosa"
- "--iree-llvm-target-triple=aarch64-none-linux-android29"
+ ${ANDROID_CPU_TRANSLATION_FLAGS}
DRIVER
"dylib"
RUNTIME_FLAGS
"--task_topology_group_count=1"
)
+# CPU, Dylib, 2 through 4-thread, little-core, full-inference
+iree_benchmark_suite(
+ MODULES
+ "${DEEPLABV3_FP32_MODULE}"
+ "${MOBILESSD_FP32_MODULE}"
+ "${POSENET_FP32_MODULE}"
+
+ BENCHMARK_MODES
+ "2-thread,little-core,full-inference,default-flags"
+ TARGET_BACKEND
+ "dylib-llvm-aot"
+ TARGET_ARCHITECTURE
+ "CPU-ARM64-v8A"
+ TRANSLATION_FLAGS
+ ${ANDROID_CPU_TRANSLATION_FLAGS}
+ DRIVER
+ "dylib"
+ RUNTIME_FLAGS
+ "--task_topology_group_count=2"
+)
+
+iree_benchmark_suite(
+ MODULES
+ "${DEEPLABV3_FP32_MODULE}"
+ "${MOBILESSD_FP32_MODULE}"
+ "${POSENET_FP32_MODULE}"
+
+ BENCHMARK_MODES
+ "3-thread,little-core,full-inference,default-flags"
+ TARGET_BACKEND
+ "dylib-llvm-aot"
+ TARGET_ARCHITECTURE
+ "CPU-ARM64-v8A"
+ TRANSLATION_FLAGS
+ ${ANDROID_CPU_TRANSLATION_FLAGS}
+ DRIVER
+ "dylib"
+ RUNTIME_FLAGS
+ "--task_topology_group_count=3"
+)
+
+iree_benchmark_suite(
+ MODULES
+ "${DEEPLABV3_FP32_MODULE}"
+ "${MOBILESSD_FP32_MODULE}"
+ "${POSENET_FP32_MODULE}"
+
+ BENCHMARK_MODES
+ "4-thread,little-core,full-inference,default-flags"
+ TARGET_BACKEND
+ "dylib-llvm-aot"
+ TARGET_ARCHITECTURE
+ "CPU-ARM64-v8A"
+ TRANSLATION_FLAGS
+ ${ANDROID_CPU_TRANSLATION_FLAGS}
+ DRIVER
+ "dylib"
+ RUNTIME_FLAGS
+ "--task_topology_group_count=4"
+)
+
+# GPU, Vulkan, Adreno, full-inference
+iree_benchmark_suite(
+ MODULES
+ "${DEEPLABV3_FP32_MODULE}"
+ "${MOBILESSD_FP32_MODULE}"
+ "${POSENET_FP32_MODULE}"
+
+ BENCHMARK_MODES
+ "full-inference,default-flags"
+ TARGET_BACKEND
+ "vulkan-spirv"
+ TARGET_ARCHITECTURE
+ "GPU-Adreno"
+ TRANSLATION_FLAGS
+ "--iree-input-type=tosa"
+ "--iree-vulkan-target-triple=adreno-unknown-android11"
+ DRIVER
+ "vulkan"
+)
+
+# GPU, Vulkan, Mali, full-inference
+iree_benchmark_suite(
+ MODULES
+ "${DEEPLABV3_FP32_MODULE}"
+ "${MOBILESSD_FP32_MODULE}"
+ "${POSENET_FP32_MODULE}"
+
+ BENCHMARK_MODES
+ "full-inference,default-flags"
+ TARGET_BACKEND
+ "vulkan-spirv"
+ TARGET_ARCHITECTURE
+ "GPU-Mali-Valhall"
+ TRANSLATION_FLAGS
+ "--iree-input-type=tosa"
+ "--iree-vulkan-target-triple=valhall-unknown-android11"
+ DRIVER
+ "vulkan"
+)
+
+################################################################################
+
+################################################################################
+# #
+# Specialized benchmark configurations #
+# #
+# Each suite benchmarks one or more module with configurations that can vary #
+# on model or architecture characteristics. These are intended for providing #
+# continuous benchmarks of experimental features that cannot be turned on by #
+# default yet. It is primarily intended for whoever is actively investigating #
+# optimizations for a feature exemplified in a specific model or architecture. #
+# Due to our current benchmark setup, there can only be one experimental #
+# configuration per model and other benchmark mode. #
+# #
+################################################################################
+
+# CPU, Dylib-Sync, big/little-core, full-inference
+iree_benchmark_suite(
+ MODULES
+ "${DEEPLABV3_FP32_MODULE}"
+ "${MOBILESSD_FP32_MODULE}"
+ "${POSENET_FP32_MODULE}"
+
+ BENCHMARK_MODES
+ "big-core,full-inference,experimental-flags"
+ "little-core,full-inference,experimental-flags"
+ TARGET_BACKEND
+ "dylib-llvm-aot"
+ TARGET_ARCHITECTURE
+ "CPU-ARM64-v8A"
+ TRANSLATION_FLAGS
+ ${ANDROID_CPU_TRANSLATION_FLAGS}
+ "--iree-flow-inline-constants-max-byte-length=2048"
+ "--iree-llvm-loop-unrolling=true"
+ DRIVER
+ "dylib-sync"
+)
+
# CPU, Dylib, 1-thread, big/little-core, full-inference
iree_benchmark_suite(
MODULES
@@ -125,15 +263,14 @@
"${POSENET_FP32_MODULE}"
BENCHMARK_MODES
- "1-thread,big-core,full-inference"
- "1-thread,little-core,full-inference"
+ "1-thread,big-core,full-inference,experimental-flags"
+ "1-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
TRANSLATION_FLAGS
- "--iree-input-type=tosa"
- "--iree-llvm-target-triple=aarch64-none-linux-android29"
+ ${ANDROID_CPU_TRANSLATION_FLAGS}
"--iree-flow-inline-constants-max-byte-length=2048"
"--iree-llvm-loop-unrolling=true"
DRIVER
@@ -150,14 +287,13 @@
"${POSENET_FP32_MODULE}"
BENCHMARK_MODES
- "2-thread,little-core,full-inference"
+ "2-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
TRANSLATION_FLAGS
- "--iree-input-type=tosa"
- "--iree-llvm-target-triple=aarch64-none-linux-android29"
+ ${ANDROID_CPU_TRANSLATION_FLAGS}
"--iree-flow-inline-constants-max-byte-length=2048"
"--iree-llvm-loop-unrolling=true"
DRIVER
@@ -173,14 +309,13 @@
"${POSENET_FP32_MODULE}"
BENCHMARK_MODES
- "3-thread,little-core,full-inference"
+ "3-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
TRANSLATION_FLAGS
- "--iree-input-type=tosa"
- "--iree-llvm-target-triple=aarch64-none-linux-android29"
+ ${ANDROID_CPU_TRANSLATION_FLAGS}
"--iree-flow-inline-constants-max-byte-length=2048"
"--iree-llvm-loop-unrolling=true"
DRIVER
@@ -196,14 +331,13 @@
"${POSENET_FP32_MODULE}"
BENCHMARK_MODES
- "4-thread,little-core,full-inference"
+ "4-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
TRANSLATION_FLAGS
- "--iree-input-type=tosa"
- "--iree-llvm-target-triple=aarch64-none-linux-android29"
+ ${ANDROID_CPU_TRANSLATION_FLAGS}
"--iree-flow-inline-constants-max-byte-length=2048"
"--iree-llvm-loop-unrolling=true"
DRIVER
@@ -220,7 +354,7 @@
"${POSENET_FP32_MODULE}"
BENCHMARK_MODES
- "full-inference"
+ "full-inference,experimental-flags"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
@@ -242,7 +376,7 @@
"${POSENET_FP32_MODULE}"
BENCHMARK_MODES
- "full-inference"
+ "full-inference,experimental-flags"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
@@ -264,7 +398,7 @@
"${POSENET_FP32_MODULE}"
BENCHMARK_MODES
- "kernel-execution"
+ "kernel-execution,experimental-flags"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
diff --git a/benchmarks/TensorFlow/CMakeLists.txt b/benchmarks/TensorFlow/CMakeLists.txt
index 11ce458..b5ffbb5 100644
--- a/benchmarks/TensorFlow/CMakeLists.txt
+++ b/benchmarks/TensorFlow/CMakeLists.txt
@@ -89,7 +89,7 @@
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
- "3-thread,little-core,full-inference"
+ "3-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"vmvx"
TARGET_ARCHITECTURE
@@ -110,8 +110,8 @@
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
- "big-core,full-inference"
- "little-core,full-inference"
+ "big-core,full-inference,experimental-flags"
+ "little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
@@ -132,8 +132,8 @@
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
- "1-thread,big-core,full-inference"
- "1-thread,little-core,full-inference"
+ "1-thread,big-core,full-inference,experimental-flags"
+ "1-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
@@ -156,8 +156,8 @@
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
- "3-thread,big-core,full-inference"
- "3-thread,little-core,full-inference"
+ "3-thread,big-core,full-inference,experimental-flags"
+ "3-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
@@ -181,7 +181,7 @@
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
- "full-inference"
+ "full-inference,experimental-flags"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
@@ -202,7 +202,7 @@
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
- "kernel-execution"
+ "kernel-execution,experimental-flags"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
@@ -227,7 +227,7 @@
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
- "full-inference"
+ "full-inference,experimental-flags"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
@@ -248,7 +248,7 @@
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
- "kernel-execution"
+ "kernel-execution,experimental-flags"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
@@ -271,7 +271,7 @@
"${MOBILEBERT_FP16_MODULE}"
BENCHMARK_MODES
- "kernel-execution"
+ "kernel-execution,experimental-flags"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
@@ -295,7 +295,7 @@
"${MOBILEBERT_FP16_MODULE}"
BENCHMARK_MODES
- "full-inference"
+ "full-inference,experimental-flags"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
@@ -325,8 +325,8 @@
"${MOBILEBERT_FP32_MODULE}"
BENCHMARK_MODES
- "big-core,full-inference"
- "little-core,full-inference"
+ "big-core,full-inference,experimental-flags"
+ "little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
@@ -346,8 +346,8 @@
"${MOBILEBERT_FP32_MODULE}"
BENCHMARK_MODES
- "1-thread,big-core,full-inference"
- "1-thread,little-core,full-inference"
+ "1-thread,big-core,full-inference,experimental-flags"
+ "1-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
@@ -369,8 +369,8 @@
"${MOBILEBERT_FP32_MODULE}"
BENCHMARK_MODES
- "3-thread,big-core,full-inference"
- "3-thread,little-core,full-inference"
+ "3-thread,big-core,full-inference,experimental-flags"
+ "3-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
diff --git a/build_tools/buildkite/cmake/android/arm64-v8a/benchmark2.yml b/build_tools/buildkite/cmake/android/arm64-v8a/benchmark2.yml
index a6e8e35..56d17e1 100644
--- a/build_tools/buildkite/cmake/android/arm64-v8a/benchmark2.yml
+++ b/build_tools/buildkite/cmake/android/arm64-v8a/benchmark2.yml
@@ -75,6 +75,13 @@
agents:
- "queue=report"
+ # TODO(antiagainst): Remove this block step when the migration is complete.
+ - block: "Upload to Dashboard"
+ prompt: "Uploads are paused while we rename series in Dana. Don't unblock unless you know what you're doing."
+ key: "block-upload-to-dashboard"
+ branches: "main"
+
+
- label: "Push benchmark results to dashboard"
commands:
- "git clean -f"