Adjust module specification in `iree_mlir_benchmark_suite` (#6128)
Previously we list all fields separately, which means we need to
match the order in several different variables. It also causes
each call to iree_mlir_benchmark_suite somewhere lengthy. This
commit aggregates all bits for a module into one CMake variable
and deduplicates fields in iree_mlir_benchmark_suite calls.
Also moved the benchmark directory out of `iree/test/` and into a
more proper `iree/benchmark` place.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c495613..0edea98 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -446,6 +446,11 @@
add_subdirectory(iree/testing)
add_subdirectory(iree/test)
add_subdirectory(iree/vm)
+
+if(${IREE_BUILD_BENCHMARKS})
+ add_subdirectory(iree/benchmark)
+endif()
+
if(${IREE_BUILD_EXPERIMENTAL_ROCM})
add_subdirectory(build_tools/third_party/rocm EXCLUDE_FROM_ALL)
add_subdirectory(experimental/rocm)
diff --git a/build_tools/cmake/iree_mlir_benchmark_suite.cmake b/build_tools/cmake/iree_mlir_benchmark_suite.cmake
index f905453..8028809 100644
--- a/build_tools/cmake/iree_mlir_benchmark_suite.cmake
+++ b/build_tools/cmake/iree_mlir_benchmark_suite.cmake
@@ -4,36 +4,28 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-# iree_check_lists_have_same_size()
-#
-# Note that the caller should pass in the list variables themselves to
-# LIST1 and LIST2, not the list variables' values.
-function(iree_check_lists_have_same_size LIST1 LIST2)
- list(LENGTH "${LIST1}" _LIST1_COUNT)
- list(LENGTH "${LIST2}" _LIST2_COUNT)
- if(NOT _LIST1_COUNT EQUAL _LIST2_COUNT)
- message(SEND_ERROR "${LIST1} count ${_LIST1_COUNT} does not "
- "match ${LIST2} count ${_LIST2_COUNT}"
- )
- endif()
-endfunction()
-
# iree_mlir_benchmark_suite()
#
# Generates benchmark suites for MLIR input modules. The generated artifacts
-# will be executed with `iree-benchmark-module`.
+# will be placed in the "<binary-root>/benchmark_suites/<category>" directory,
+# where "<category>" is the name of the immediate directory containing the
+# CMakeLists.txt. The generated artifacts are expected to be executed with
+# `iree-benchmark-module`.
#
# Parameters:
-# MODULE_NAMES: A list of input module names.
-# MODULE_TAGS: A list of tags for each input module.
-# MODULE_SOURCES: The initial generating source for each input module.
-# MLIR_SOURCES: The input file for each input module. It can be a file in
-# checked in the repository; it can also be a URL for downloading from.
-# the web. When it's a URL, the file should be a a direct .mlir file
-# or a tarball containing a .mlir file; for both cases, the .mlir file
-# should have a name matching the one in MODULE_NAMES.
-# ENTRY_FUNCTIONS: The entry function name for each input module.
-# FUNCTION_INPUTS: A list of entry function inputs for each input module.
+# MODULES: A list for model specification. Due to CMake's lack of nested list
+# support, all model's specification is put in the same list, where each
+# model takes six consecutive elements for the following information:
+# - MODULE_NAMES: The input module's name.
+# - MODULE_TAGS: A list of comma-separated tags for the input module.
+# - MLIR_SOURCES: The input file for each input module. It can be a file
+# checked in the repository; it can also be a URL for downloading
+# from the web. When it's a URL, the file should be a a direct .mlir
+# file or a tarball containing a .mlir file; for both cases, the .mlir
+# file should have a name matching the one in MODULE_NAMES.
+# - ENTRY_FUNCTIONS: The entry function name for the input module.
+# - FUNCTION_INPUTS: A list of comma-separated entry function inputs for
+# the input module.
# BENCHMARK_MODES: A list strings, where ech one of them is a comma-
# separated list of benchmark mode tags.
# TARGET_BACKEND: The compiler target backend.
@@ -50,17 +42,12 @@
#
# 1)
#
-# MODULE_NAMES, MODULE_TAGS, MODULE_SOURCES, MLIR_SOURCES, ENTRY_FUNCTIONS,
-# and FUNCTION_INPUTS together provide good flexiblity for specifying the MLIR
-# input module and its metadata. For example, we can generate modules with
-# idential name from different sources (TensorFlow, TFLite, PyTorch, etc.),
-# and we can transform the same input module differently for benchmarking
-# different aspects like fp32 vs fp16.
-#
-# Note that the above parameters are all lists and they should have the name
-# number of elements. This enables us to use the same CMake function call to
-# generate benchmarks for many models and share the specification of
-# translation/runtime configurations.
+# MODULE_NAMES, MODULE_TAGS, MLIR_SOURCES, ENTRY_FUNCTIONS, and FUNCTION_INPUTS
+# together provide good flexiblity for specifying the MLIRinput module and its
+# metadata. For example, we can generate modules with idential name from
+# different sources (TensorFlow, TFLite, PyTorch, etc.), and we can transform
+# the same input module differently for benchmarking different aspects like
+# fp32 vs fp16.
#
# 2)
#
@@ -81,31 +68,45 @@
_RULE
""
"DRIVER;TARGET_BACKEND;TARGET_ARCHITECTURE"
- "BENCHMARK_MODES;ENTRY_FUNCTIONS;FUNCTION_INPUTS;MLIR_SOURCES;MODULE_NAMES;MODULE_SOURCES;MODULE_TAGS;TRANSLATION_FLAGS;RUNTIME_FLAGS"
+ "BENCHMARK_MODES;MODULES;TRANSLATION_FLAGS;RUNTIME_FLAGS"
)
- iree_check_lists_have_same_size(_RULE_MODULE_NAMES _RULE_MODULE_TAGS)
- iree_check_lists_have_same_size(_RULE_MODULE_NAMES _RULE_MODULE_SOURCES)
- iree_check_lists_have_same_size(_RULE_MODULE_NAMES _RULE_MLIR_SOURCES)
- iree_check_lists_have_same_size(_RULE_MODULE_NAMES _RULE_ENTRY_FUNCTIONS)
- iree_check_lists_have_same_size(_RULE_MODULE_NAMES _RULE_FUNCTION_INPUTS)
+ # All fields' names for each module.
+ set(_FIELD_NAMES "_MODULE_NAME" "_MODULE_TAGS"
+ "_MLIR_SOURCE" "_ENTRY_FUNCTION" "_FUNCTION_INPUTS")
+ list(LENGTH _FIELD_NAMES _FIELD_COUNT)
+ math(EXPR _MAX_FIELD_INDEX "${_FIELD_COUNT} - 1")
- # Loop over all modules and their sources to create targets.
- list(LENGTH _RULE_MODULE_NAMES _MODULE_NAMES_COUNT)
- math(EXPR _MAX_INDEX "${_MODULE_NAMES_COUNT} - 1")
- foreach(_INDEX RANGE 0 "${_MAX_INDEX}")
+ # Make sure we have some multiple of six elements.
+ list(LENGTH _RULE_MODULES _MODULE_TOTAL_ELEMENT_COUNT)
+ math(EXPR _MODULE_COUNT
+ "${_MODULE_TOTAL_ELEMENT_COUNT} / ${_FIELD_COUNT}")
+ math(EXPR _MODULE_ELEMENT_REMAINDER
+ "${_MODULE_TOTAL_ELEMENT_COUNT} % ${_FIELD_COUNT}")
+ if(NOT ${_MODULE_ELEMENT_REMAINDER} EQUAL 0)
+ message(SEND_ERROR "MODULES expected to have some multiple of six "
+ "elements; some module has missing/redundant fields.")
+ endif()
+
+ # Loop over all modules to create targets.
+ math(EXPR _MAX_MODULE_INDEX "${_MODULE_COUNT} - 1")
+ foreach(_MODULE_INDEX RANGE 0 "${_MAX_MODULE_INDEX}")
+ # Loop over all elements for the current module and assign them to the
+ # corresponding field names for later use.
+ foreach(_FIELD_INDEX RANGE 0 "${_MAX_FIELD_INDEX}")
+ list(GET _FIELD_NAMES ${_FIELD_INDEX} _FIELD_NAME)
+ math(EXPR _INDEX "${_MODULE_INDEX} * ${_FIELD_COUNT} + ${_FIELD_INDEX}")
+ list(GET _RULE_MODULES ${_INDEX} ${_FIELD_NAME})
+ endforeach()
+
+ # Use the last directory's name as the category.
+ get_filename_component(_CATEGORY "${CMAKE_CURRENT_SOURCE_DIR}" NAME)
+
# Generate all benchmarks to the root build directory. This helps for
# discovering them and execute them on devices.
- list(GET _RULE_MODULE_SOURCES ${_INDEX} _MODULE_SOURCE)
- set(_ROOT_ARTIFACTS_DIR "${IREE_BINARY_DIR}/benchmark_suites/${_MODULE_SOURCE}")
+ set(_ROOT_ARTIFACTS_DIR "${IREE_BINARY_DIR}/benchmark_suites/${_CATEGORY}")
set(_VMFB_ARTIFACTS_DIR "${_ROOT_ARTIFACTS_DIR}/vmfb")
- list(GET _RULE_MODULE_NAMES ${_INDEX} _MODULE_NAME)
- list(GET _RULE_MODULE_TAGS ${_INDEX} _MODULE_TAGS)
- list(GET _RULE_MLIR_SOURCES ${_INDEX} _MLIR_SOURCE)
- list(GET _RULE_ENTRY_FUNCTIONS ${_INDEX} _ENTRY_FUNCTION)
- list(GET _RULE_FUNCTION_INPUTS ${_INDEX} _FUNCTION_INPUTS)
-
# The source file used to generate benchmark artifacts.
set(_SOURCE_FILE "${_MLIR_SOURCE}")
# The CMake target's name if we need to download from the web.
@@ -240,5 +241,5 @@
# Mark dependency so that we have one target to drive them all.
add_dependencies(iree-benchmark-suites "${_FLAGFILE_GEN_TARGET_NAME}")
endforeach(_BENCHMARK_MODE IN LISTS _RULE_BENCHMARK_MODES)
- endforeach(_INDEX RANGE 0 "${_MAX_INDEX}")
+ endforeach(_MODULE_INDEX RANGE 0 "${_MAX_MODULE_INDEX}")
endfunction()
diff --git a/iree/benchmark/CMakeLists.txt b/iree/benchmark/CMakeLists.txt
new file mode 100644
index 0000000..0e9c88b
--- /dev/null
+++ b/iree/benchmark/CMakeLists.txt
@@ -0,0 +1 @@
+iree_add_all_subdirs()
diff --git a/iree/test/model_benchmarks/CMakeLists.txt b/iree/benchmark/TensorFlow/CMakeLists.txt
similarity index 63%
rename from iree/test/model_benchmarks/CMakeLists.txt
rename to iree/benchmark/TensorFlow/CMakeLists.txt
index a93a4a1..7e0df7d 100644
--- a/iree/test/model_benchmarks/CMakeLists.txt
+++ b/iree/benchmark/TensorFlow/CMakeLists.txt
@@ -9,68 +9,44 @@
# #
# Benchmark models #
# #
-# All the lists should have the same number of elements. Each list describes #
-# one aspect of the model. Elements at the same index are for the same model. #
-# #
-# Normally models to be benchmarked should be placed here becuase all #
-# benchmark cases will be enabled for them. There might exist cases where we #
-# cannot enable all the benchmark cases for one model; then they should be #
-# placed directly in the cmake function calls in the next section. #
+# Each module specification should be a list that contains the following #
+# fields: MODULE_NAME, MODULE_TAGS, MLIR_SOURCE, ENTRY_FUNCTION, #
+# FUNCTION_INPUTS. See iree_mlir_benchmark_suite definition for details about #
+# these fields. #
# #
################################################################################
-set(BENCHMARK_MODULE_NAMES
- "MobileNetV2"
- "MobileNetV3Small"
+set(MOBILENET_V2_MODULE
+ "MobileNetV2" # MODULE_NAME
+ "fp32,imagenet" # MODULE_TAGS
+ "https://storage.googleapis.com/iree-model-artifacts/MobileNetV2-b0c5c584.tar.gz" # MLIR_SOURCE
+ "call" # ENTRY_FUNCTION
+ "1x224x224x3xf32" # FUNCTION_INPUTS
)
-# Each element is a comma-separated list.
-set(BENCHMARK_MODULE_TAGS
- "fp32,imagenet"
- "fp32,imagenet"
-)
-
-set(BENCHMARK_MODULE_SOURCES
- "TensorFlow"
- "TensorFlow"
-)
-
-set(BENCHMARK_MLIR_SOURCES
- "https://storage.googleapis.com/iree-model-artifacts/MobileNetV2-b0c5c584.tar.gz"
- "https://storage.googleapis.com/iree-model-artifacts/MobileNetV3Small-b0c5c584.tar.gz"
-)
-
-set(BENCHMARK_ENTRY_FUNCTIONS
- "call"
- "call"
-)
-
-# Each element is a comma-separated list.
-set(BENCHMARK_FUNCTION_INPUTS
- "1x224x224x3xf32"
- "1x224x224x3xf32"
+set(MOBILENET_V3SMALL_MODULE
+ "MobileNetV3Small" # MODULE_NAME
+ "fp32,imagenet" # MODULE_TAGS
+ "https://storage.googleapis.com/iree-model-artifacts/MobileNetV3Small-b0c5c584.tar.gz" # MLIR_SOURCE
+ "call" # ENTRY_FUNCTION
+ "1x224x224x3xf32" # FUNCTION_INPUTS
)
################################################################################
# #
-# Benchmark cases #
+# Benchmark suites #
+# #
+# Each suite benchmarks a list of modules with some specific configuration, #
+# typically involving different translation/runtime flags and targeting #
+# different IREE drivers and hardware architectures. #
# #
################################################################################
# CPU, VMVX, 3-thread, little-core, full-inference
iree_mlir_benchmark_suite(
- MODULE_NAMES
- ${BENCHMARK_MODULE_NAMES}
- MODULE_TAGS
- ${BENCHMARK_MODULE_TAGS}
- MODULE_SOURCES
- ${BENCHMARK_MODULE_SOURCES}
- MLIR_SOURCES
- ${BENCHMARK_MLIR_SOURCES}
- ENTRY_FUNCTIONS
- ${BENCHMARK_ENTRY_FUNCTIONS}
- FUNCTION_INPUTS
- ${BENCHMARK_FUNCTION_INPUTS}
+ MODULES
+ ${MOBILENET_V2_MODULE}
+ ${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"3-thread,little-core,full-inference"
@@ -89,18 +65,9 @@
# CPU, Dylib-Sync, big/little-core, full-inference
iree_mlir_benchmark_suite(
- MODULE_NAMES
- ${BENCHMARK_MODULE_NAMES}
- MODULE_TAGS
- ${BENCHMARK_MODULE_TAGS}
- MODULE_SOURCES
- ${BENCHMARK_MODULE_SOURCES}
- MLIR_SOURCES
- ${BENCHMARK_MLIR_SOURCES}
- ENTRY_FUNCTIONS
- ${BENCHMARK_ENTRY_FUNCTIONS}
- FUNCTION_INPUTS
- ${BENCHMARK_FUNCTION_INPUTS}
+ MODULES
+ ${MOBILENET_V2_MODULE}
+ ${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"big-core,full-inference"
@@ -122,18 +89,9 @@
# CPU, Dylib, 1-thread, big/little-core, full-inference
iree_mlir_benchmark_suite(
- MODULE_NAMES
- ${BENCHMARK_MODULE_NAMES}
- MODULE_TAGS
- ${BENCHMARK_MODULE_TAGS}
- MODULE_SOURCES
- ${BENCHMARK_MODULE_SOURCES}
- MLIR_SOURCES
- ${BENCHMARK_MLIR_SOURCES}
- ENTRY_FUNCTIONS
- ${BENCHMARK_ENTRY_FUNCTIONS}
- FUNCTION_INPUTS
- ${BENCHMARK_FUNCTION_INPUTS}
+ MODULES
+ ${MOBILENET_V2_MODULE}
+ ${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"1-thread,big-core,full-inference"
@@ -157,18 +115,9 @@
# CPU, Dylib, 3-thread, big/little-core, full-inference
iree_mlir_benchmark_suite(
- MODULE_NAMES
- ${BENCHMARK_MODULE_NAMES}
- MODULE_TAGS
- ${BENCHMARK_MODULE_TAGS}
- MODULE_SOURCES
- ${BENCHMARK_MODULE_SOURCES}
- MLIR_SOURCES
- ${BENCHMARK_MLIR_SOURCES}
- ENTRY_FUNCTIONS
- ${BENCHMARK_ENTRY_FUNCTIONS}
- FUNCTION_INPUTS
- ${BENCHMARK_FUNCTION_INPUTS}
+ MODULES
+ ${MOBILENET_V2_MODULE}
+ ${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"3-thread,big-core,full-inference"
@@ -192,18 +141,9 @@
# GPU, Vulkan, Adreno, full-inference
iree_mlir_benchmark_suite(
- MODULE_NAMES
- ${BENCHMARK_MODULE_NAMES}
- MODULE_TAGS
- ${BENCHMARK_MODULE_TAGS}
- MODULE_SOURCES
- ${BENCHMARK_MODULE_SOURCES}
- MLIR_SOURCES
- ${BENCHMARK_MLIR_SOURCES}
- ENTRY_FUNCTIONS
- ${BENCHMARK_ENTRY_FUNCTIONS}
- FUNCTION_INPUTS
- ${BENCHMARK_FUNCTION_INPUTS}
+ MODULES
+ ${MOBILENET_V2_MODULE}
+ ${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"full-inference"
@@ -223,18 +163,9 @@
# GPU, Vulkan, Adreno, kernel-execution
iree_mlir_benchmark_suite(
- MODULE_NAMES
- ${BENCHMARK_MODULE_NAMES}
- MODULE_TAGS
- ${BENCHMARK_MODULE_TAGS}
- MODULE_SOURCES
- ${BENCHMARK_MODULE_SOURCES}
- MLIR_SOURCES
- ${BENCHMARK_MLIR_SOURCES}
- ENTRY_FUNCTIONS
- ${BENCHMARK_ENTRY_FUNCTIONS}
- FUNCTION_INPUTS
- ${BENCHMARK_FUNCTION_INPUTS}
+ MODULES
+ ${MOBILENET_V2_MODULE}
+ ${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"kernel-execution"
@@ -257,18 +188,9 @@
# GPU, Vulkan, Mali, full-inference
iree_mlir_benchmark_suite(
- MODULE_NAMES
- ${BENCHMARK_MODULE_NAMES}
- MODULE_TAGS
- ${BENCHMARK_MODULE_TAGS}
- MODULE_SOURCES
- ${BENCHMARK_MODULE_SOURCES}
- MLIR_SOURCES
- ${BENCHMARK_MLIR_SOURCES}
- ENTRY_FUNCTIONS
- ${BENCHMARK_ENTRY_FUNCTIONS}
- FUNCTION_INPUTS
- ${BENCHMARK_FUNCTION_INPUTS}
+ MODULES
+ ${MOBILENET_V2_MODULE}
+ ${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"full-inference"
@@ -288,18 +210,9 @@
# GPU, Vulkan, Mali, kernel-execution
iree_mlir_benchmark_suite(
- MODULE_NAMES
- ${BENCHMARK_MODULE_NAMES}
- MODULE_TAGS
- ${BENCHMARK_MODULE_TAGS}
- MODULE_SOURCES
- ${BENCHMARK_MODULE_SOURCES}
- MLIR_SOURCES
- ${BENCHMARK_MLIR_SOURCES}
- ENTRY_FUNCTIONS
- ${BENCHMARK_ENTRY_FUNCTIONS}
- FUNCTION_INPUTS
- ${BENCHMARK_FUNCTION_INPUTS}
+ MODULES
+ ${MOBILENET_V2_MODULE}
+ ${MOBILENET_V3SMALL_MODULE}
BENCHMARK_MODES
"kernel-execution"