Add default-flags configurations for all TFLite benchmarks (#7580) Following up on https://github.com/google/iree/pull/7553, this adds default flag configurations for the remaining TFLite models. Now that we've got timestamps in the Buildkite runs, we can see what this does to artifact transfer time via the presubmit: https://buildkite.com/iree/iree-benchmark/builds/1447 I'm contrasting with the run at the merge base on main: https://buildkite.com/iree/iree-benchmark/builds/1445 For this PR's run, downloads took 1:35 and 4:12 for the two RPIs, compared to 2:06 and 3:30 before. That looks like it's basically in the noise. We could do more detailed analysis, but that doesn't seem worth it to me. Just spot-checking a few, it seems the RPI connected to the Pixels consistently downloads faster than the ones connected to the Samsungs. As for the benchmarks themselves, the run on the RPI took around 34-35 minutes for each phone, 6-8 min longer than before. Overall run latency, excluding waiting for agents, increased from 43:12 to 49:50. I think this is acceptable given our current limitations.

commit: 575509fcc692d0544d6709033c64419621b343b4 [log] [tgz]
author: Geoffrey Martin-Noble <gcmn@google.com> Sat Nov 13 10:46:35 2021 -0800
committer: GitHub <noreply@github.com> Sat Nov 13 13:46:35 2021 -0500
tree: b4dfcac23cda8260b4c00d07f5e4e6b4690e0291
parent: bd2001aa8405373e5a06bf431e1b8b7459a5ddc0 [diff]
diff --git a/benchmarks/TFLite/CMakeLists.txt b/benchmarks/TFLite/CMakeLists.txt
index 1799a2c..3af45c6 100644
--- a/benchmarks/TFLite/CMakeLists.txt
+++ b/benchmarks/TFLite/CMakeLists.txt

@@ -60,14 +60,20 @@
 
 ################################################################################
 #                                                                              #
-# Common benchmark configurations                                              #
+# Default benchmark configurations                                             #
 #                                                                              #
-# Each suite benchmarks a list of modules with some specific configuration,    #
-# typically involving different translation/runtime flags and targeting        #
-# different IREE drivers and hardware architectures.                           #
+# Each suite benchmarks a list of modules with configurations specifying a     #
+# target architecture and runtime characteristics (e.g. threads/cores). These  #
+# benchmarks only configure IREE translation and runtime flags for the target  #
+# architecture and do *not* include any non-default flags. No non-default      #
+# flags should be added here.                                                  #
 #                                                                              #
 ################################################################################
 
+set(ANDROID_CPU_TRANSLATION_FLAGS
+      "--iree-input-type=tosa"
+      "--iree-llvm-target-triple=aarch64-none-linux-android29")
+
 # CPU, Dylib-Sync, big/little-core, full-inference
 iree_benchmark_suite(
   MODULES
@@ -76,25 +82,19 @@
     "${POSENET_FP32_MODULE}"
 
   BENCHMARK_MODES
-    "big-core,full-inference"
-    "little-core,full-inference"
+    "big-core,full-inference,default-flags"
+    "little-core,full-inference,default-flags"
   TARGET_BACKEND
     "dylib-llvm-aot"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   TRANSLATION_FLAGS
-    "--iree-input-type=tosa"
-    "--iree-llvm-target-triple=aarch64-none-linux-android29"
-    "--iree-flow-inline-constants-max-byte-length=2048"
-    "--iree-llvm-loop-unrolling=true"
+    ${ANDROID_CPU_TRANSLATION_FLAGS}
   DRIVER
     "dylib-sync"
 )
 
-# CPU, Dylib, 1-thread, big/little-core, full-inference. These benchmarks only
-# configure IREE translation and runtime flags for the target architecture and
-# runtime characteristics and do *not* include any non-default flags. No
-# non-default flags should be added here.
+# CPU, Dylib, 1-thread, big/little-core, full-inference.
 iree_benchmark_suite(
   MODULES
     "${DEEPLABV3_FP32_MODULE}"
@@ -109,14 +109,152 @@
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   TRANSLATION_FLAGS
-    "--iree-input-type=tosa"
-    "--iree-llvm-target-triple=aarch64-none-linux-android29"
+    ${ANDROID_CPU_TRANSLATION_FLAGS}
   DRIVER
     "dylib"
   RUNTIME_FLAGS
     "--task_topology_group_count=1"
 )
 
+# CPU, Dylib, 2 through 4-thread, little-core, full-inference
+iree_benchmark_suite(
+  MODULES
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
+
+  BENCHMARK_MODES
+    "2-thread,little-core,full-inference,default-flags"
+  TARGET_BACKEND
+    "dylib-llvm-aot"
+  TARGET_ARCHITECTURE
+    "CPU-ARM64-v8A"
+  TRANSLATION_FLAGS
+    ${ANDROID_CPU_TRANSLATION_FLAGS}
+  DRIVER
+    "dylib"
+  RUNTIME_FLAGS
+    "--task_topology_group_count=2"
+)
+
+iree_benchmark_suite(
+  MODULES
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
+
+  BENCHMARK_MODES
+    "3-thread,little-core,full-inference,default-flags"
+  TARGET_BACKEND
+    "dylib-llvm-aot"
+  TARGET_ARCHITECTURE
+    "CPU-ARM64-v8A"
+  TRANSLATION_FLAGS
+    ${ANDROID_CPU_TRANSLATION_FLAGS}
+  DRIVER
+    "dylib"
+  RUNTIME_FLAGS
+    "--task_topology_group_count=3"
+)
+
+iree_benchmark_suite(
+  MODULES
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
+
+  BENCHMARK_MODES
+    "4-thread,little-core,full-inference,default-flags"
+  TARGET_BACKEND
+    "dylib-llvm-aot"
+  TARGET_ARCHITECTURE
+    "CPU-ARM64-v8A"
+  TRANSLATION_FLAGS
+    ${ANDROID_CPU_TRANSLATION_FLAGS}
+  DRIVER
+    "dylib"
+  RUNTIME_FLAGS
+    "--task_topology_group_count=4"
+)
+
+# GPU, Vulkan, Adreno, full-inference
+iree_benchmark_suite(
+  MODULES
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
+
+  BENCHMARK_MODES
+    "full-inference,default-flags"
+  TARGET_BACKEND
+    "vulkan-spirv"
+  TARGET_ARCHITECTURE
+    "GPU-Adreno"
+  TRANSLATION_FLAGS
+    "--iree-input-type=tosa"
+    "--iree-vulkan-target-triple=adreno-unknown-android11"
+  DRIVER
+    "vulkan"
+)
+
+# GPU, Vulkan, Mali, full-inference
+iree_benchmark_suite(
+  MODULES
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
+
+  BENCHMARK_MODES
+    "full-inference,default-flags"
+  TARGET_BACKEND
+    "vulkan-spirv"
+  TARGET_ARCHITECTURE
+    "GPU-Mali-Valhall"
+  TRANSLATION_FLAGS
+    "--iree-input-type=tosa"
+    "--iree-vulkan-target-triple=valhall-unknown-android11"
+  DRIVER
+    "vulkan"
+)
+
+################################################################################
+
+################################################################################
+#                                                                              #
+# Specialized benchmark configurations                                         #
+#                                                                              #
+# Each suite benchmarks one or more module with configurations that can vary   #
+# on model or architecture characteristics. These are intended for providing   #
+# continuous benchmarks of experimental features that cannot be turned on by   #
+# default yet. It is primarily intended for whoever is actively investigating  #
+# optimizations for a feature exemplified in a specific model or architecture. #
+# Due to our current benchmark setup, there can only be one experimental       #
+# configuration per model and other benchmark mode.                            #
+#                                                                              #
+################################################################################
+
+# CPU, Dylib-Sync, big/little-core, full-inference
+iree_benchmark_suite(
+  MODULES
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
+
+  BENCHMARK_MODES
+    "big-core,full-inference,experimental-flags"
+    "little-core,full-inference,experimental-flags"
+  TARGET_BACKEND
+    "dylib-llvm-aot"
+  TARGET_ARCHITECTURE
+    "CPU-ARM64-v8A"
+  TRANSLATION_FLAGS
+    ${ANDROID_CPU_TRANSLATION_FLAGS}
+    "--iree-flow-inline-constants-max-byte-length=2048"
+    "--iree-llvm-loop-unrolling=true"
+  DRIVER
+    "dylib-sync"
+)
+
 # CPU, Dylib, 1-thread, big/little-core, full-inference
 iree_benchmark_suite(
   MODULES
@@ -125,15 +263,14 @@
     "${POSENET_FP32_MODULE}"
 
   BENCHMARK_MODES
-    "1-thread,big-core,full-inference"
-    "1-thread,little-core,full-inference"
+    "1-thread,big-core,full-inference,experimental-flags"
+    "1-thread,little-core,full-inference,experimental-flags"
   TARGET_BACKEND
     "dylib-llvm-aot"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   TRANSLATION_FLAGS
-    "--iree-input-type=tosa"
-    "--iree-llvm-target-triple=aarch64-none-linux-android29"
+    ${ANDROID_CPU_TRANSLATION_FLAGS}
     "--iree-flow-inline-constants-max-byte-length=2048"
     "--iree-llvm-loop-unrolling=true"
   DRIVER
@@ -150,14 +287,13 @@
     "${POSENET_FP32_MODULE}"
 
   BENCHMARK_MODES
-    "2-thread,little-core,full-inference"
+    "2-thread,little-core,full-inference,experimental-flags"
   TARGET_BACKEND
     "dylib-llvm-aot"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   TRANSLATION_FLAGS
-    "--iree-input-type=tosa"
-    "--iree-llvm-target-triple=aarch64-none-linux-android29"
+    ${ANDROID_CPU_TRANSLATION_FLAGS}
     "--iree-flow-inline-constants-max-byte-length=2048"
     "--iree-llvm-loop-unrolling=true"
   DRIVER
@@ -173,14 +309,13 @@
     "${POSENET_FP32_MODULE}"
 
   BENCHMARK_MODES
-    "3-thread,little-core,full-inference"
+    "3-thread,little-core,full-inference,experimental-flags"
   TARGET_BACKEND
     "dylib-llvm-aot"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   TRANSLATION_FLAGS
-    "--iree-input-type=tosa"
-    "--iree-llvm-target-triple=aarch64-none-linux-android29"
+    ${ANDROID_CPU_TRANSLATION_FLAGS}
     "--iree-flow-inline-constants-max-byte-length=2048"
     "--iree-llvm-loop-unrolling=true"
   DRIVER
@@ -196,14 +331,13 @@
     "${POSENET_FP32_MODULE}"
 
   BENCHMARK_MODES
-    "4-thread,little-core,full-inference"
+    "4-thread,little-core,full-inference,experimental-flags"
   TARGET_BACKEND
     "dylib-llvm-aot"
   TARGET_ARCHITECTURE
     "CPU-ARM64-v8A"
   TRANSLATION_FLAGS
-    "--iree-input-type=tosa"
-    "--iree-llvm-target-triple=aarch64-none-linux-android29"
+    ${ANDROID_CPU_TRANSLATION_FLAGS}
     "--iree-flow-inline-constants-max-byte-length=2048"
     "--iree-llvm-loop-unrolling=true"
   DRIVER
@@ -220,7 +354,7 @@
     "${POSENET_FP32_MODULE}"
 
   BENCHMARK_MODES
-    "full-inference"
+    "full-inference,experimental-flags"
   TARGET_BACKEND
     "vulkan-spirv"
   TARGET_ARCHITECTURE
@@ -242,7 +376,7 @@
     "${POSENET_FP32_MODULE}"
 
   BENCHMARK_MODES
-    "full-inference"
+    "full-inference,experimental-flags"
   TARGET_BACKEND
     "vulkan-spirv"
   TARGET_ARCHITECTURE
@@ -264,7 +398,7 @@
     "${POSENET_FP32_MODULE}"
 
   BENCHMARK_MODES
-    "kernel-execution"
+    "kernel-execution,experimental-flags"
   TARGET_BACKEND
     "vulkan-spirv"
   TARGET_ARCHITECTURE

diff --git a/benchmarks/TensorFlow/CMakeLists.txt b/benchmarks/TensorFlow/CMakeLists.txt
index 11ce458..b5ffbb5 100644
--- a/benchmarks/TensorFlow/CMakeLists.txt
+++ b/benchmarks/TensorFlow/CMakeLists.txt

@@ -89,7 +89,7 @@
     "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
-    "3-thread,little-core,full-inference"
+    "3-thread,little-core,full-inference,experimental-flags"
   TARGET_BACKEND
     "vmvx"
   TARGET_ARCHITECTURE
@@ -110,8 +110,8 @@
     "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
-    "big-core,full-inference"
-    "little-core,full-inference"
+    "big-core,full-inference,experimental-flags"
+    "little-core,full-inference,experimental-flags"
   TARGET_BACKEND
     "dylib-llvm-aot"
   TARGET_ARCHITECTURE
@@ -132,8 +132,8 @@
     "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
-    "1-thread,big-core,full-inference"
-    "1-thread,little-core,full-inference"
+    "1-thread,big-core,full-inference,experimental-flags"
+    "1-thread,little-core,full-inference,experimental-flags"
   TARGET_BACKEND
     "dylib-llvm-aot"
   TARGET_ARCHITECTURE
@@ -156,8 +156,8 @@
     "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
-    "3-thread,big-core,full-inference"
-    "3-thread,little-core,full-inference"
+    "3-thread,big-core,full-inference,experimental-flags"
+    "3-thread,little-core,full-inference,experimental-flags"
   TARGET_BACKEND
     "dylib-llvm-aot"
   TARGET_ARCHITECTURE
@@ -181,7 +181,7 @@
     "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
-    "full-inference"
+    "full-inference,experimental-flags"
   TARGET_BACKEND
     "vulkan-spirv"
   TARGET_ARCHITECTURE
@@ -202,7 +202,7 @@
     "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
-    "kernel-execution"
+    "kernel-execution,experimental-flags"
   TARGET_BACKEND
     "vulkan-spirv"
   TARGET_ARCHITECTURE
@@ -227,7 +227,7 @@
     "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
-    "full-inference"
+    "full-inference,experimental-flags"
   TARGET_BACKEND
     "vulkan-spirv"
   TARGET_ARCHITECTURE
@@ -248,7 +248,7 @@
     "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
-    "kernel-execution"
+    "kernel-execution,experimental-flags"
   TARGET_BACKEND
     "vulkan-spirv"
   TARGET_ARCHITECTURE
@@ -271,7 +271,7 @@
     "${MOBILEBERT_FP16_MODULE}"
 
   BENCHMARK_MODES
-    "kernel-execution"
+    "kernel-execution,experimental-flags"
   TARGET_BACKEND
     "vulkan-spirv"
   TARGET_ARCHITECTURE
@@ -295,7 +295,7 @@
     "${MOBILEBERT_FP16_MODULE}"
 
   BENCHMARK_MODES
-    "full-inference"
+    "full-inference,experimental-flags"
   TARGET_BACKEND
     "vulkan-spirv"
   TARGET_ARCHITECTURE
@@ -325,8 +325,8 @@
     "${MOBILEBERT_FP32_MODULE}"
 
   BENCHMARK_MODES
-    "big-core,full-inference"
-    "little-core,full-inference"
+    "big-core,full-inference,experimental-flags"
+    "little-core,full-inference,experimental-flags"
   TARGET_BACKEND
     "dylib-llvm-aot"
   TARGET_ARCHITECTURE
@@ -346,8 +346,8 @@
     "${MOBILEBERT_FP32_MODULE}"
 
   BENCHMARK_MODES
-    "1-thread,big-core,full-inference"
-    "1-thread,little-core,full-inference"
+    "1-thread,big-core,full-inference,experimental-flags"
+    "1-thread,little-core,full-inference,experimental-flags"
   TARGET_BACKEND
     "dylib-llvm-aot"
   TARGET_ARCHITECTURE
@@ -369,8 +369,8 @@
     "${MOBILEBERT_FP32_MODULE}"
 
   BENCHMARK_MODES
-    "3-thread,big-core,full-inference"
-    "3-thread,little-core,full-inference"
+    "3-thread,big-core,full-inference,experimental-flags"
+    "3-thread,little-core,full-inference,experimental-flags"
   TARGET_BACKEND
     "dylib-llvm-aot"
   TARGET_ARCHITECTURE

diff --git a/build_tools/buildkite/cmake/android/arm64-v8a/benchmark2.yml b/build_tools/buildkite/cmake/android/arm64-v8a/benchmark2.yml
index a6e8e35..56d17e1 100644
--- a/build_tools/buildkite/cmake/android/arm64-v8a/benchmark2.yml
+++ b/build_tools/buildkite/cmake/android/arm64-v8a/benchmark2.yml

@@ -75,6 +75,13 @@
     agents:
       - "queue=report"
 
+  # TODO(antiagainst): Remove this block step when the migration is complete.
+  - block: "Upload to Dashboard"
+    prompt: "Uploads are paused while we rename series in Dana. Don't unblock unless you know what you're doing."
+    key: "block-upload-to-dashboard"
+    branches: "main"
+
+
   - label: "Push benchmark results to dashboard"
     commands:
       - "git clean -f"
commit	575509fcc692d0544d6709033c64419621b343b4	[log] [tgz]
author	Geoffrey Martin-Noble <gcmn@google.com>	Sat Nov 13 10:46:35 2021 -0800
committer	GitHub <noreply@github.com>	Sat Nov 13 13:46:35 2021 -0500
tree	b4dfcac23cda8260b4c00d07f5e4e6b4690e0291
parent	bd2001aa8405373e5a06bf431e1b8b7459a5ddc0 [diff]