Measure MobileBERT fp16 full inference time for Mali GPUs (#7403)
We have configurations for fp32 kernel-execution and full-inference.
But for fp16 we only have kernel execution.
diff --git a/benchmarks/TensorFlow/CMakeLists.txt b/benchmarks/TensorFlow/CMakeLists.txt
index 6f95f86..48c79b3 100644
--- a/benchmarks/TensorFlow/CMakeLists.txt
+++ b/benchmarks/TensorFlow/CMakeLists.txt
@@ -270,6 +270,27 @@
"--batch_size=32"
)
+# GPU, Vulkan, Mali, full-inference
+iree_mlir_benchmark_suite(
+ MODULES
+ ${MOBILEBERT_FP16_MODULE}
+
+ BENCHMARK_MODES
+ "full-inference"
+ TARGET_BACKEND
+ "vulkan-spirv"
+ TARGET_ARCHITECTURE
+ "GPU-Mali-Valhall"
+ TRANSLATION_FLAGS
+ "--iree-input-type=mhlo"
+ "--iree-flow-demote-f32-to-f16"
+ "--iree-vulkan-target-triple=valhall-unknown-android11"
+ "--iree-flow-inline-constants-max-byte-length=16"
+ "--iree-enable-fusion-with-reduction-ops"
+ DRIVER
+ "vulkan"
+)
+
################################################################################
# #
# Speical benchmark configurations #