Add multi-threaded benchmarking of TFLite models (#7545)
This benchmarks on 2-4 little cores. I did not include any benchmarks
with multiple big cores, as I understand this to not be a particularly
realistic scenario.
diff --git a/benchmarks/TFLite/CMakeLists.txt b/benchmarks/TFLite/CMakeLists.txt
index 6576533..be03631 100644
--- a/benchmarks/TFLite/CMakeLists.txt
+++ b/benchmarks/TFLite/CMakeLists.txt
@@ -116,6 +116,76 @@
"--task_topology_group_count=1"
)
+# CPU, Dylib, 2 through 4-thread, little-core, full-inference
+iree_benchmark_suite(
+ MODULES
+ "${DEEPLABV3_FP32_MODULE}"
+ "${MOBILESSD_FP32_MODULE}"
+ "${POSENET_FP32_MODULE}"
+
+ BENCHMARK_MODES
+ "2-thread,little-core,full-inference"
+ TARGET_BACKEND
+ "dylib-llvm-aot"
+ TARGET_ARCHITECTURE
+ "CPU-ARM64-v8A"
+ TRANSLATION_FLAGS
+ "--iree-input-type=tosa"
+ "--iree-llvm-target-triple=aarch64-none-linux-android29"
+ "--iree-flow-inline-constants-max-byte-length=2048"
+ "--iree-llvm-loop-unrolling=true"
+ DRIVER
+ "dylib"
+ RUNTIME_FLAGS
+ "--task_topology_group_count=2"
+)
+
+iree_benchmark_suite(
+ MODULES
+ "${DEEPLABV3_FP32_MODULE}"
+ "${MOBILESSD_FP32_MODULE}"
+ "${POSENET_FP32_MODULE}"
+
+ BENCHMARK_MODES
+ "3-thread,little-core,full-inference"
+ TARGET_BACKEND
+ "dylib-llvm-aot"
+ TARGET_ARCHITECTURE
+ "CPU-ARM64-v8A"
+ TRANSLATION_FLAGS
+ "--iree-input-type=tosa"
+ "--iree-llvm-target-triple=aarch64-none-linux-android29"
+ "--iree-flow-inline-constants-max-byte-length=2048"
+ "--iree-llvm-loop-unrolling=true"
+ DRIVER
+ "dylib"
+ RUNTIME_FLAGS
+ "--task_topology_group_count=3"
+)
+
+iree_benchmark_suite(
+ MODULES
+ "${DEEPLABV3_FP32_MODULE}"
+ "${MOBILESSD_FP32_MODULE}"
+ "${POSENET_FP32_MODULE}"
+
+ BENCHMARK_MODES
+ "4-thread,little-core,full-inference"
+ TARGET_BACKEND
+ "dylib-llvm-aot"
+ TARGET_ARCHITECTURE
+ "CPU-ARM64-v8A"
+ TRANSLATION_FLAGS
+ "--iree-input-type=tosa"
+ "--iree-llvm-target-triple=aarch64-none-linux-android29"
+ "--iree-flow-inline-constants-max-byte-length=2048"
+ "--iree-llvm-loop-unrolling=true"
+ DRIVER
+ "dylib"
+ RUNTIME_FLAGS
+ "--task_topology_group_count=4"
+)
+
# GPU, Vulkan, Adreno, full-inference
iree_benchmark_suite(
MODULES