blob: bae325be558ab1ee2ebd1a1304147cef86c4b177 [file] [log] [blame]
# Copyright 2022 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
################################################################################
# #
# Default benchmark configurations #
# #
# Each suite benchmarks a list of modules with configurations specifying a #
# target architecture and runtime characteristics (e.g. threads/cores). These #
# benchmarks only configure IREE compilation and runtime flags for the target #
# architecture and do *not* include any non-default flags. No non-default #
# flags should be added here. #
# #
################################################################################
set(ANDROID_CPU_COMPILATION_FLAGS
"--iree-input-type=tosa"
"--iree-llvm-target-triple=aarch64-none-linux-android29")
# CPU, LLVM, local-sync, big/little-core, full-inference
iree_benchmark_suite(
GROUP_NAME
"android-arm64-v8a"
MODULES
"${DEEPLABV3_FP32_MODULE}"
"${MOBILESSD_FP32_MODULE}"
"${POSENET_FP32_MODULE}"
"${MOBILEBERT_FP32_MODULE}"
"${MOBILEBERT_INT8_MODULE}"
"${MOBILENET_V2_MODULE}"
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
"big-core,full-inference,default-flags"
"little-core,full-inference,default-flags"
TARGET_BACKEND
"llvm-cpu"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
COMPILATION_FLAGS
${ANDROID_CPU_COMPILATION_FLAGS}
BENCHMARK_TOOL
iree-benchmark-module
CONFIG
"iree-llvm-cpu-sync"
DRIVER
"local-sync"
)
# CPU, LLVM, local-task, 1 through 4 threads, big/little-core, full-inference.
iree_benchmark_suite(
GROUP_NAME
"android-arm64-v8a"
MODULES
"${DEEPLABV3_FP32_MODULE}"
"${MOBILESSD_FP32_MODULE}"
"${POSENET_FP32_MODULE}"
"${MOBILEBERT_FP32_MODULE}"
"${MOBILEBERT_INT8_MODULE}"
"${MOBILENET_V2_MODULE}"
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
"1-thread,big-core,full-inference,default-flags"
"1-thread,little-core,full-inference,default-flags"
TARGET_BACKEND
"llvm-cpu"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
COMPILATION_FLAGS
${ANDROID_CPU_COMPILATION_FLAGS}
BENCHMARK_TOOL
iree-benchmark-module
CONFIG
"iree-llvm-cpu"
DRIVER
"local-task"
RUNTIME_FLAGS
"--task_topology_group_count=1"
)
# TODO(#7792): Re-enable these when we are able to run different benchmarks
# depending on use-case (presubmit, postsubmit, nightly, etc.)
# iree_benchmark_suite(
# GROUP_NAME
# "android-arm64-v8a"
#
# MODULES
# "${DEEPLABV3_FP32_MODULE}"
# "${MOBILESSD_FP32_MODULE}"
# "${POSENET_FP32_MODULE}"
# "${MOBILEBERT_FP32_MODULE}"
# "${MOBILENET_V2_MODULE}"
# "${MOBILENET_V3SMALL_MODULE}"
# BENCHMARK_MODES
# "2-thread,big-core,full-inference,default-flags"
# "2-thread,little-core,full-inference,default-flags"
# TARGET_BACKEND
# "llvm-cpu"
# TARGET_ARCHITECTURE
# "CPU-ARM64-v8A"
# COMPILATION_FLAGS
# ${ANDROID_CPU_COMPILATION_FLAGS}
# BENCHMARK_TOOL
# iree-benchmark-module
# CONFIG
# "iree-llvm-cpu"
# DRIVER
# "local-task"
# RUNTIME_FLAGS
# "--task_topology_group_count=2"
# )
# iree_benchmark_suite(
# GROUP_NAME
# "android-arm64-v8a"
#
# MODULES
# "${DEEPLABV3_FP32_MODULE}"
# "${MOBILESSD_FP32_MODULE}"
# "${POSENET_FP32_MODULE}"
# "${MOBILEBERT_FP32_MODULE}"
# "${MOBILENET_V2_MODULE}"
# "${MOBILENET_V3SMALL_MODULE}"
# BENCHMARK_MODES
# "3-thread,big-core,full-inference,default-flags"
# "3-thread,little-core,full-inference,default-flags"
# TARGET_BACKEND
# "llvm-cpu"
# TARGET_ARCHITECTURE
# "CPU-ARM64-v8A"
# COMPILATION_FLAGS
# ${ANDROID_CPU_COMPILATION_FLAGS}
# BENCHMARK_TOOL
# iree-benchmark-module
# CONFIG
# "iree-llvm-cpu"
# DRIVER
# "local-task"
# RUNTIME_FLAGS
# "--task_topology_group_count=3"
# )
iree_benchmark_suite(
GROUP_NAME
"android-arm64-v8a"
MODULES
"${DEEPLABV3_FP32_MODULE}"
"${MOBILESSD_FP32_MODULE}"
"${POSENET_FP32_MODULE}"
"${MOBILEBERT_FP32_MODULE}"
"${MOBILEBERT_INT8_MODULE}"
"${MOBILENET_V2_MODULE}"
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
"4-thread,big-core,full-inference,default-flags"
"4-thread,little-core,full-inference,default-flags"
TARGET_BACKEND
"llvm-cpu"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
COMPILATION_FLAGS
${ANDROID_CPU_COMPILATION_FLAGS}
BENCHMARK_TOOL
iree-benchmark-module
CONFIG
"iree-llvm-cpu"
DRIVER
"local-task"
RUNTIME_FLAGS
"--task_topology_group_count=4"
)
################################################################################
################################################################################
# #
# Specialized benchmark configurations #
# #
# Each suite benchmarks one or more module with configurations that can vary #
# on model or architecture characteristics. These are intended for providing #
# continuous benchmarks of experimental features that cannot be turned on by #
# default yet. It is primarily intended for whoever is actively investigating #
# optimizations for a feature exemplified in a specific model or architecture. #
# Due to our current benchmark setup, there can only be one experimental #
# configuration per model and other benchmark mode. #
# #
################################################################################
# CPU, LLVM, local-sync, big/little-core, full-inference
# NOTE: this is not enabling any SIMD extension beyond baseline Aarch64.
# At the moment we use that for fp32 models. We would change that when new
# devices support relevant fp32 SIMD extensions beyond that (e.g. +f32mm).
iree_benchmark_suite(
GROUP_NAME
"android-arm64-v8a"
MODULES
"${DEEPLABV3_FP32_MODULE}"
"${MOBILESSD_FP32_MODULE}"
"${POSENET_FP32_MODULE}"
"${MOBILEBERT_FP32_MODULE}"
"${MOBILENET_V2_MODULE}"
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
"big-core,full-inference,experimental-flags"
"little-core,full-inference,experimental-flags"
TARGET_BACKEND
"llvm-cpu"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
COMPILATION_FLAGS
${ANDROID_CPU_COMPILATION_FLAGS}
"--iree-flow-mmt4d-target-options=arch=aarch64"
BENCHMARK_TOOL
iree-benchmark-module
CONFIG
"iree-llvm-cpu-sync"
DRIVER
"local-sync"
)
# CPU, LLVM, local-sync, big/little-core, full-inference, +dotprod
# NOTE: +dotprod is only relevant to int8, not fp32.
# TODO: add a +i8mm variant, supported by new devices already. No rush: our i8mm
# kernel is currently naive, not ready for benchmarking.
iree_benchmark_suite(
GROUP_NAME
"android-arm64-v8a"
MODULES
"${MOBILEBERT_INT8_MODULE}"
BENCHMARK_MODES
"big-core,full-inference,experimental-flags"
"little-core,full-inference,experimental-flags"
TARGET_BACKEND
"llvm-cpu"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
COMPILATION_FLAGS
${ANDROID_CPU_COMPILATION_FLAGS}
"--iree-flow-mmt4d-target-options=arch=aarch64 features=+dotprod"
"--iree-llvm-target-cpu-features=+dotprod"
BENCHMARK_TOOL
iree-benchmark-module
CONFIG
"iree-llvm-cpu-sync"
DRIVER
"local-sync"
)
# TODO(#7792): Consider re-enabling little-core experimental-flags if we start
# optimizing for little cores or we can just run them occasionally
# CPU, LLVM, local-task, 1 through 4 threads, big/little-core, full-inference.
# NOTE: this is not enabling any SIMD extension beyond baseline Aarch64.
# At the moment we use that for fp32 models. We would change that when new
# devices support relevant fp32 SIMD extensions beyond that (e.g. f32mm).
iree_benchmark_suite(
GROUP_NAME
"android-arm64-v8a"
MODULES
"${DEEPLABV3_FP32_MODULE}"
"${MOBILESSD_FP32_MODULE}"
"${POSENET_FP32_MODULE}"
"${MOBILEBERT_FP32_MODULE}"
"${MOBILENET_V2_MODULE}"
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
"1-thread,big-core,full-inference,experimental-flags"
# "1-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"llvm-cpu"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
COMPILATION_FLAGS
${ANDROID_CPU_COMPILATION_FLAGS}
"--iree-flow-mmt4d-target-options=arch=aarch64"
BENCHMARK_TOOL
iree-benchmark-module
CONFIG
"iree-llvm-cpu"
DRIVER
"local-task"
RUNTIME_FLAGS
"--task_topology_group_count=1"
)
# CPU, LLVM, local-task, 1 through 4 threads, big/little-core, full-inference, +dotprod
# NOTE: +dotprod is only relevant to int8, not fp32.
# TODO: add a +i8mm variant, supported by new devices already. No rush: our i8mm
# kernel is currently naive, not ready for benchmarking.
iree_benchmark_suite(
GROUP_NAME
"android-arm64-v8a"
MODULES
"${MOBILEBERT_INT8_MODULE}"
BENCHMARK_MODES
"1-thread,big-core,full-inference,experimental-flags"
# "1-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"llvm-cpu"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
COMPILATION_FLAGS
${ANDROID_CPU_COMPILATION_FLAGS}
"--iree-flow-mmt4d-target-options=arch=aarch64 features=+dotprod"
"--iree-llvm-target-cpu-features=+dotprod"
BENCHMARK_TOOL
iree-benchmark-module
CONFIG
"iree-llvm-cpu"
DRIVER
"local-task"
RUNTIME_FLAGS
"--task_topology_group_count=1"
)
# TODO(#7792): Re-enable these when we are able to run different benchmarks
# depending on use-case (presubmit, postsubmit, nightly, etc.)
# iree_benchmark_suite(
# GROUP_NAME
# "android-arm64-v8a"
#
# MODULES
# "${DEEPLABV3_FP32_MODULE}"
# "${MOBILESSD_FP32_MODULE}"
# "${POSENET_FP32_MODULE}"
# "${MOBILEBERT_FP32_MODULE}"
# "${MOBILENET_V2_MODULE}"
# "${MOBILENET_V3SMALL_MODULE}"
# BENCHMARK_MODES
# "2-thread,big-core,full-inference,experimental-flags"
# "2-thread,little-core,full-inference,experimental-flags"
# TARGET_BACKEND
# "llvm-cpu"
# TARGET_ARCHITECTURE
# "CPU-ARM64-v8A"
# COMPILATION_FLAGS
# ${ANDROID_CPU_COMPILATION_FLAGS}
# "--iree-flow-mmt4d-target-options=arch=aarch64"
# BENCHMARK_TOOL
# iree-benchmark-module
# CONFIG
# "iree-llvm-cpu"
# DRIVER
# "local-task"
# RUNTIME_FLAGS
# "--task_topology_group_count=2"
# )
# iree_benchmark_suite(
# GROUP_NAME
# "android-arm64-v8a"
#
# MODULES
# "${DEEPLABV3_FP32_MODULE}"
# "${MOBILESSD_FP32_MODULE}"
# "${POSENET_FP32_MODULE}"
# "${MOBILEBERT_FP32_MODULE}"
# "${MOBILENET_V2_MODULE}"
# "${MOBILENET_V3SMALL_MODULE}"
# BENCHMARK_MODES
# "3-thread,big-core,full-inference,experimental-flags"
# "3-thread,little-core,full-inference,experimental-flags"
# TARGET_BACKEND
# "llvm-cpu"
# TARGET_ARCHITECTURE
# "CPU-ARM64-v8A"
# COMPILATION_FLAGS
# ${ANDROID_CPU_COMPILATION_FLAGS}
# "--iree-flow-mmt4d-target-options=arch=aarch64"
# BENCHMARK_TOOL
# iree-benchmark-module
# CONFIG
# "iree-llvm-cpu"
# DRIVER
# "local-task"
# RUNTIME_FLAGS
# "--task_topology_group_count=3"
# )
# CPU, LLVM, local-task, 1 through 4 threads, big/little-core, full-inference.
# NOTE: this is not enabling any SIMD extension beyond baseline Aarch64.
# At the moment we use that for fp32 models. We would change that when new
# devices support relevant fp32 SIMD extensions beyond that (e.g. f32mm).
iree_benchmark_suite(
GROUP_NAME
"android-arm64-v8a"
MODULES
"${DEEPLABV3_FP32_MODULE}"
"${MOBILESSD_FP32_MODULE}"
"${POSENET_FP32_MODULE}"
"${MOBILEBERT_FP32_MODULE}"
"${MOBILENET_V2_MODULE}"
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
"4-thread,big-core,full-inference,experimental-flags"
# "4-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"llvm-cpu"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
COMPILATION_FLAGS
${ANDROID_CPU_COMPILATION_FLAGS}
"--iree-flow-mmt4d-target-options=arch=aarch64"
BENCHMARK_TOOL
iree-benchmark-module
CONFIG
"iree-llvm-cpu"
DRIVER
"local-task"
RUNTIME_FLAGS
"--task_topology_group_count=4"
)
# CPU, LLVM, local-sync, big/little-core, full-inference, +dotprod
# NOTE: +dotprod is only relevant to int8, not fp32.
# TODO: add a +i8mm variant, supported by new devices already. No rush: our i8mm
# kernel is currently naive, not ready for benchmarking.
iree_benchmark_suite(
GROUP_NAME
"android-arm64-v8a"
MODULES
"${MOBILEBERT_INT8_MODULE}"
BENCHMARK_MODES
"4-thread,big-core,full-inference,experimental-flags"
# "4-thread,little-core,full-inference,experimental-flags"
TARGET_BACKEND
"llvm-cpu"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
COMPILATION_FLAGS
${ANDROID_CPU_COMPILATION_FLAGS}
"--iree-flow-mmt4d-target-options=arch=aarch64 features=+dotprod"
"--iree-llvm-target-cpu-features=+dotprod"
BENCHMARK_TOOL
iree-benchmark-module
CONFIG
"iree-llvm-cpu"
DRIVER
"local-task"
RUNTIME_FLAGS
"--task_topology_group_count=4"
)
# CPU, VMVX, 4-thread, big-core, full-inference
# VMVX is slow and we're not optimizing perf yet. Leaving in a single max-thread
# benchmark because it's useful to keep an eye on and helps disambiguate where a
# performance change may be coming from (e.g. if it's in vmvx as well, it's
# probably not a codegen issue).
iree_benchmark_suite(
GROUP_NAME
"android-arm64-v8a"
MODULES
"${MOBILENET_V2_MODULE}"
"${MOBILENET_V3SMALL_MODULE}"
BENCHMARK_MODES
"4-thread,big-core,full-inference,experimental-flags"
TARGET_BACKEND
"vmvx"
TARGET_ARCHITECTURE
"CPU-ARM64-v8A"
COMPILATION_FLAGS
"--iree-input-type=tosa"
BENCHMARK_TOOL
iree-benchmark-module
CONFIG
"iree-vmvx"
DRIVER
"local-task"
RUNTIME_FLAGS
"--task_topology_group_count=4"
)
################################################################################