Measure MobileBERT fp16 full inference time for Mali GPUs (#7403) We have configurations for fp32 kernel-execution and full-inference. But for fp16 we only have kernel execution.

commit: f0bafecf59f5e9d0bb4fbed8198516e952dbf9cf [log] [tgz]
author: Lei Zhang <antiagainst@google.com> Tue Oct 19 17:18:33 2021 -0400
committer: GitHub <noreply@github.com> Tue Oct 19 17:18:33 2021 -0400
tree: 4929cbaaf742f357f93b7b65212768353010d34c
parent: ac0c7e7d27d3335c788959030214ce1d4bca5c92 [diff]
diff --git a/benchmarks/TensorFlow/CMakeLists.txt b/benchmarks/TensorFlow/CMakeLists.txt
index 6f95f86..48c79b3 100644
--- a/benchmarks/TensorFlow/CMakeLists.txt
+++ b/benchmarks/TensorFlow/CMakeLists.txt

@@ -270,6 +270,27 @@
     "--batch_size=32"
 )
 
+# GPU, Vulkan, Mali, full-inference
+iree_mlir_benchmark_suite(
+  MODULES
+    ${MOBILEBERT_FP16_MODULE}
+
+  BENCHMARK_MODES
+    "full-inference"
+  TARGET_BACKEND
+    "vulkan-spirv"
+  TARGET_ARCHITECTURE
+    "GPU-Mali-Valhall"
+  TRANSLATION_FLAGS
+    "--iree-input-type=mhlo"
+    "--iree-flow-demote-f32-to-f16"
+    "--iree-vulkan-target-triple=valhall-unknown-android11"
+    "--iree-flow-inline-constants-max-byte-length=16"
+    "--iree-enable-fusion-with-reduction-ops"
+  DRIVER
+    "vulkan"
+)
+
 ################################################################################
 #                                                                              #
 # Speical benchmark configurations                                             #
commit	f0bafecf59f5e9d0bb4fbed8198516e952dbf9cf	[log] [tgz]
author	Lei Zhang <antiagainst@google.com>	Tue Oct 19 17:18:33 2021 -0400
committer	GitHub <noreply@github.com>	Tue Oct 19 17:18:33 2021 -0400
tree	4929cbaaf742f357f93b7b65212768353010d34c
parent	ac0c7e7d27d3335c788959030214ce1d4bca5c92 [diff]