Add multi-threaded benchmarking of TFLite models (#7545) This benchmarks on 2-4 little cores. I did not include any benchmarks with multiple big cores, as I understand this to not be a particularly realistic scenario.

commit: dee536c79c5c11635867e5722b0b362b5d8c2683 [log] [tgz]
author: Geoffrey Martin-Noble <gcmn@google.com> Thu Nov 04 15:23:03 2021 -0700
committer: GitHub <noreply@github.com> Thu Nov 04 15:23:03 2021 -0700
tree: 33be84022019b487879649247fd0f6d21d2f507b
parent: a5ffc733e61ae0b9507949ad123949cbcb16dd35 [diff]
diff --git a/benchmarks/TFLite/CMakeLists.txt b/benchmarks/TFLite/CMakeLists.txt
index 6576533..be03631 100644
--- a/benchmarks/TFLite/CMakeLists.txt
+++ b/benchmarks/TFLite/CMakeLists.txt

@@ -116,6 +116,76 @@
     "--task_topology_group_count=1"
 )
 
+# CPU, Dylib, 2 through 4-thread, little-core, full-inference
+iree_benchmark_suite(
+  MODULES
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
+
+  BENCHMARK_MODES
+    "2-thread,little-core,full-inference"
+  TARGET_BACKEND
+    "dylib-llvm-aot"
+  TARGET_ARCHITECTURE
+    "CPU-ARM64-v8A"
+  TRANSLATION_FLAGS
+    "--iree-input-type=tosa"
+    "--iree-llvm-target-triple=aarch64-none-linux-android29"
+    "--iree-flow-inline-constants-max-byte-length=2048"
+    "--iree-llvm-loop-unrolling=true"
+  DRIVER
+    "dylib"
+  RUNTIME_FLAGS
+    "--task_topology_group_count=2"
+)
+
+iree_benchmark_suite(
+  MODULES
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
+
+  BENCHMARK_MODES
+    "3-thread,little-core,full-inference"
+  TARGET_BACKEND
+    "dylib-llvm-aot"
+  TARGET_ARCHITECTURE
+    "CPU-ARM64-v8A"
+  TRANSLATION_FLAGS
+    "--iree-input-type=tosa"
+    "--iree-llvm-target-triple=aarch64-none-linux-android29"
+    "--iree-flow-inline-constants-max-byte-length=2048"
+    "--iree-llvm-loop-unrolling=true"
+  DRIVER
+    "dylib"
+  RUNTIME_FLAGS
+    "--task_topology_group_count=3"
+)
+
+iree_benchmark_suite(
+  MODULES
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
+
+  BENCHMARK_MODES
+    "4-thread,little-core,full-inference"
+  TARGET_BACKEND
+    "dylib-llvm-aot"
+  TARGET_ARCHITECTURE
+    "CPU-ARM64-v8A"
+  TRANSLATION_FLAGS
+    "--iree-input-type=tosa"
+    "--iree-llvm-target-triple=aarch64-none-linux-android29"
+    "--iree-flow-inline-constants-max-byte-length=2048"
+    "--iree-llvm-loop-unrolling=true"
+  DRIVER
+    "dylib"
+  RUNTIME_FLAGS
+    "--task_topology_group_count=4"
+)
+
 # GPU, Vulkan, Adreno, full-inference
 iree_benchmark_suite(
   MODULES
commit	dee536c79c5c11635867e5722b0b362b5d8c2683	[log] [tgz]
author	Geoffrey Martin-Noble <gcmn@google.com>	Thu Nov 04 15:23:03 2021 -0700
committer	GitHub <noreply@github.com>	Thu Nov 04 15:23:03 2021 -0700
tree	33be84022019b487879649247fd0f6d21d2f507b
parent	a5ffc733e61ae0b9507949ad123949cbcb16dd35 [diff]