blob: 14344f3dbe00a6ef70c809bb6148df3b3c6479cd [file] [log] [blame]
# Copyright 2021 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
################################################################################
# #
# Benchmark models #
# #
# Each module specification should be a list that contains the following #
# fields: MODULE_NAME, MODULE_TAGS, MLIR_SOURCE, ENTRY_FUNCTION, #
# FUNCTION_INPUTS. See iree_mlir_benchmark_suite definition for details about #
# these fields. #
# #
################################################################################
set(MOBILEBERT_FP16_MODULE
"MobileBertSquad" # MODULE_NAME
"fp16" # MODULE_TAGS
# This uses the same input MLIR source as fp32 to save download time.
# It requires users to have "--iree-flow-demote-f32-to-f16".
"https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-9c0042d19.tar.gz" # MLIR_SOURCE
"serving_default" # ENTRY_FUNCTION
# The conversion done by "--iree-flow-demote-f32-to-f16" won't change the
# original input signature.
"1x384xi32,1x384xi32,1x384xi32" # FUNCTION_INPUTS
)
set(MOBILEBERT_FP32_MODULE
"MobileBertSquad" # MODULE_NAME
"fp32" # MODULE_TAGS
"https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-9c0042d19.tar.gz" # MLIR_SOURCE
"serving_default" # ENTRY_FUNCTION
"1x384xi32,1x384xi32,1x384xi32" # FUNCTION_INPUTS
)
set(MOBILENET_V2_MODULE
"MobileNetV2" # MODULE_NAME
"fp32,imagenet" # MODULE_TAGS
"https://storage.googleapis.com/iree-model-artifacts/MobileNetV2-9c0042d19.tar.gz" # MLIR_SOURCE
"call" # ENTRY_FUNCTION
"1x224x224x3xf32" # FUNCTION_INPUTS
)
set(MOBILENET_V3SMALL_MODULE
"MobileNetV3Small" # MODULE_NAME
"fp32,imagenet" # MODULE_TAGS
"https://storage.googleapis.com/iree-model-artifacts/MobileNetV3Small-9c0042d19.tar.gz" # MLIR_SOURCE
"call" # ENTRY_FUNCTION
"1x224x224x3xf32" # FUNCTION_INPUTS
)
################################################################################
# #
# Common benchmark configurations #
# #
# Each suite benchmarks a list of modules with some specific configuration, #
# typically involving different translation/runtime flags and targeting #
# different IREE drivers and hardware architectures. #
# #
################################################################################
# CPU, VMVX, 3-thread, little-core, full-inference
iree_mlir_benchmark_suite(
MODULES
${MOBILENET_V2_MODULE}
${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"3-thread,little-core,full-inference"
TARGET_BACKEND
"vmvx"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
TRANSLATION_FLAGS
"--iree-input-type=mhlo"
"--iree-flow-inline-constants-max-byte-length=2048"
DRIVER
"vmvx"
RUNTIME_FLAGS
"--task_topology_group_count=3"
)
# CPU, Dylib-Sync, big/little-core, full-inference
iree_mlir_benchmark_suite(
MODULES
${MOBILENET_V2_MODULE}
${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"big-core,full-inference"
"little-core,full-inference"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
TRANSLATION_FLAGS
"--iree-input-type=mhlo"
"--iree-llvm-target-triple=aarch64-none-linux-android29"
"--iree-flow-inline-constants-max-byte-length=2048"
# TODO(GH-5857): Enable this after fixing segfault.
#"--iree-flow-dispatch-formation-enable-operand-fusion"
"--iree-llvm-loop-unrolling=true"
"--iree-llvm-link-embedded=true"
DRIVER
"dylib-sync"
)
# CPU, Dylib, 1-thread, big/little-core, full-inference
iree_mlir_benchmark_suite(
MODULES
${MOBILENET_V2_MODULE}
${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"1-thread,big-core,full-inference"
"1-thread,little-core,full-inference"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
TRANSLATION_FLAGS
"--iree-input-type=mhlo"
"--iree-llvm-target-triple=aarch64-none-linux-android29"
"--iree-flow-inline-constants-max-byte-length=2048"
# TODO(GH-5857): Enable this after fixing segfault.
#"--iree-flow-dispatch-formation-enable-operand-fusion"
"--iree-llvm-loop-unrolling=true"
DRIVER
"dylib"
RUNTIME_FLAGS
"--task_topology_group_count=1"
)
# CPU, Dylib, 3-thread, big/little-core, full-inference
iree_mlir_benchmark_suite(
MODULES
${MOBILENET_V2_MODULE}
${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"3-thread,big-core,full-inference"
"3-thread,little-core,full-inference"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
TRANSLATION_FLAGS
"--iree-input-type=mhlo"
"--iree-llvm-target-triple=aarch64-none-linux-android29"
"--iree-flow-inline-constants-max-byte-length=2048"
# TODO(GH-5857): Enable this after fixing segfault.
#"--iree-flow-dispatch-formation-enable-operand-fusion"
"--iree-llvm-loop-unrolling=true"
DRIVER
"dylib"
RUNTIME_FLAGS
"--task_topology_group_count=3"
)
# GPU, Vulkan, Adreno, full-inference
iree_mlir_benchmark_suite(
MODULES
${MOBILEBERT_FP32_MODULE}
${MOBILENET_V2_MODULE}
${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"full-inference"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
"GPU-Adreno"
TRANSLATION_FLAGS
"--iree-input-type=mhlo"
"--iree-vulkan-target-triple=adreno-unknown-android11"
"--iree-flow-inline-constants-max-byte-length=2048"
"--iree-flow-dispatch-formation-enable-operand-fusion"
"--iree-enable-fusion-with-reduction-ops"
DRIVER
"vulkan"
)
# GPU, Vulkan, Adreno, kernel-execution
iree_mlir_benchmark_suite(
MODULES
${MOBILENET_V2_MODULE}
${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"kernel-execution"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
"GPU-Adreno"
TRANSLATION_FLAGS
"--iree-input-type=mhlo"
"--iree-vulkan-target-triple=adreno-unknown-android11"
"--iree-flow-inline-constants-max-byte-length=2048"
"--iree-flow-dispatch-formation-enable-operand-fusion"
"--iree-enable-fusion-with-reduction-ops"
"--iree-hal-benchmark-dispatch-repeat-count=16"
DRIVER
"vulkan"
RUNTIME_FLAGS
"--batch_size=16"
)
# GPU, Vulkan, Mali, full-inference
iree_mlir_benchmark_suite(
MODULES
${MOBILEBERT_FP32_MODULE}
${MOBILENET_V2_MODULE}
${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"full-inference"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
"GPU-Mali-Valhall"
TRANSLATION_FLAGS
"--iree-input-type=mhlo"
"--iree-vulkan-target-triple=valhall-unknown-android11"
"--iree-flow-inline-constants-max-byte-length=16"
"--iree-flow-dispatch-formation-enable-operand-fusion"
"--iree-enable-fusion-with-reduction-ops"
DRIVER
"vulkan"
)
# GPU, Vulkan, Mali, kernel-execution
iree_mlir_benchmark_suite(
MODULES
${MOBILENET_V2_MODULE}
${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"kernel-execution"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
"GPU-Mali-Valhall"
TRANSLATION_FLAGS
"--iree-input-type=mhlo"
"--iree-vulkan-target-triple=valhall-unknown-android11"
"--iree-flow-inline-constants-max-byte-length=16"
"--iree-flow-dispatch-formation-enable-operand-fusion"
"--iree-enable-fusion-with-reduction-ops"
"--iree-hal-benchmark-dispatch-repeat-count=32"
DRIVER
"vulkan"
RUNTIME_FLAGS
"--batch_size=32"
)
# GPU, Vulkan, Mali, kernel-execution
iree_mlir_benchmark_suite(
MODULES
${MOBILEBERT_FP16_MODULE}
BENCHMARK_MODES
"kernel-execution"
TARGET_BACKEND
"vulkan-spirv"
TARGET_ARCHITECTURE
"GPU-Mali-Valhall"
TRANSLATION_FLAGS
"--iree-input-type=mhlo"
"--iree-flow-demote-f32-to-f16"
"--iree-vulkan-target-triple=valhall-unknown-android11"
"--iree-flow-inline-constants-max-byte-length=16"
"--iree-flow-dispatch-formation-enable-operand-fusion"
"--iree-enable-fusion-with-reduction-ops"
"--iree-hal-benchmark-dispatch-repeat-count=32"
DRIVER
"vulkan"
RUNTIME_FLAGS
"--batch_size=32"
)
################################################################################
# #
# Speical benchmark configurations #
# #
# These are configurations that can only be enabled for some specific model. #
# However, THIS SHOULD REALLY BE TEMPORARY; we should strike for uniformity. #
# #
################################################################################
# CPU, Dylib-Sync, big/little-core, full-inference
iree_mlir_benchmark_suite(
MODULES
${MOBILEBERT_FP32_MODULE}
BENCHMARK_MODES
"big-core,full-inference"
"little-core,full-inference"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
TRANSLATION_FLAGS
# TODO: Merge this rule once we can use the same flags as the common one.
"--iree-input-type=mhlo"
"--iree-llvm-target-triple=aarch64-none-linux-android29"
"--iree-flow-inline-constants-max-byte-length=2048"
"--iree-llvm-link-embedded=true"
DRIVER
"dylib-sync"
)
# CPU, Dylib, 1-thread, big/little-core, full-inference
iree_mlir_benchmark_suite(
MODULES
${MOBILEBERT_FP32_MODULE}
BENCHMARK_MODES
"1-thread,big-core,full-inference"
"1-thread,little-core,full-inference"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
TRANSLATION_FLAGS
# TODO: Merge this rule once we can use the same flags as the common one.
"--iree-input-type=mhlo"
"--iree-llvm-target-triple=aarch64-none-linux-android29"
"--iree-flow-inline-constants-max-byte-length=2048"
DRIVER
"dylib"
RUNTIME_FLAGS
"--task_topology_group_count=1"
)
# CPU, Dylib, 3-thread, big/little-core, full-inference
iree_mlir_benchmark_suite(
MODULES
${MOBILEBERT_FP32_MODULE}
BENCHMARK_MODES
"3-thread,big-core,full-inference"
"3-thread,little-core,full-inference"
TARGET_BACKEND
"dylib-llvm-aot"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
TRANSLATION_FLAGS
# TODO: Merge this rule once we can use the same flags as the common one.
"--iree-input-type=mhlo"
"--iree-llvm-target-triple=aarch64-none-linux-android29"
"--iree-flow-inline-constants-max-byte-length=2048"
DRIVER
"dylib"
RUNTIME_FLAGS
"--task_topology_group_count=3"
)