Adjust module specification in `iree_mlir_benchmark_suite` (#6128)

Previously we list all fields separately, which means we need to
match the order in several different variables. It also causes
each call to iree_mlir_benchmark_suite somewhere lengthy. This
commit aggregates all bits for a module into one CMake variable
and deduplicates fields in iree_mlir_benchmark_suite calls.

Also moved the benchmark directory out of `iree/test/` and into a
more proper `iree/benchmark` place.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c495613..0edea98 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -446,6 +446,11 @@
 add_subdirectory(iree/testing)
 add_subdirectory(iree/test)
 add_subdirectory(iree/vm)
+
+if(${IREE_BUILD_BENCHMARKS})
+  add_subdirectory(iree/benchmark)
+endif()
+
 if(${IREE_BUILD_EXPERIMENTAL_ROCM})
   add_subdirectory(build_tools/third_party/rocm EXCLUDE_FROM_ALL)
   add_subdirectory(experimental/rocm)
diff --git a/build_tools/cmake/iree_mlir_benchmark_suite.cmake b/build_tools/cmake/iree_mlir_benchmark_suite.cmake
index f905453..8028809 100644
--- a/build_tools/cmake/iree_mlir_benchmark_suite.cmake
+++ b/build_tools/cmake/iree_mlir_benchmark_suite.cmake
@@ -4,36 +4,28 @@
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-# iree_check_lists_have_same_size()
-#
-# Note that the caller should pass in the list variables themselves to
-# LIST1 and LIST2, not the list variables' values.
-function(iree_check_lists_have_same_size LIST1 LIST2)
-  list(LENGTH "${LIST1}" _LIST1_COUNT)
-  list(LENGTH "${LIST2}" _LIST2_COUNT)
-  if(NOT _LIST1_COUNT EQUAL _LIST2_COUNT)
-    message(SEND_ERROR "${LIST1} count ${_LIST1_COUNT} does not "
-                       "match ${LIST2} count ${_LIST2_COUNT}"
-    )
-  endif()
-endfunction()
-
 # iree_mlir_benchmark_suite()
 #
 # Generates benchmark suites for MLIR input modules. The generated artifacts
-# will be executed with `iree-benchmark-module`.
+# will be placed in the "<binary-root>/benchmark_suites/<category>" directory,
+# where "<category>" is the name of the immediate directory containing the
+# CMakeLists.txt. The generated artifacts are expected to be executed with
+# `iree-benchmark-module`.
 #
 # Parameters:
-#   MODULE_NAMES: A list of input module names.
-#   MODULE_TAGS: A list of tags for each input module.
-#   MODULE_SOURCES: The initial generating source for each input module.
-#   MLIR_SOURCES: The input file for each input module. It can be a file in
-#       checked in the repository; it can also be a URL for downloading from.
-#       the web. When it's a URL, the file should be a a direct .mlir file
-#       or a tarball containing a .mlir file; for both cases, the .mlir file
-#       should have a name matching the one in MODULE_NAMES.
-#   ENTRY_FUNCTIONS: The entry function name for each input module.
-#   FUNCTION_INPUTS: A list of entry function inputs for each input module.
+#   MODULES: A list for model specification. Due to CMake's lack of nested list
+#       support, all model's specification is put in the same list, where each
+#       model takes six consecutive elements for the following information:
+#       - MODULE_NAMES: The input module's name.
+#       - MODULE_TAGS: A list of comma-separated tags for the input module.
+#       - MLIR_SOURCES: The input file for each input module. It can be a file
+#           checked in the repository; it can also be a URL for downloading
+#           from the web. When it's a URL, the file should be a a direct .mlir
+#           file or a tarball containing a .mlir file; for both cases, the .mlir
+#           file should have a name matching the one in MODULE_NAMES.
+#       - ENTRY_FUNCTIONS: The entry function name for the input module.
+#       - FUNCTION_INPUTS: A list of comma-separated entry function inputs for
+#           the input module.
 #   BENCHMARK_MODES: A list strings, where ech one of them is a comma-
 #       separated list of benchmark mode tags.
 #   TARGET_BACKEND: The compiler target backend.
@@ -50,17 +42,12 @@
 #
 # 1)
 #
-# MODULE_NAMES, MODULE_TAGS, MODULE_SOURCES, MLIR_SOURCES, ENTRY_FUNCTIONS,
-# and FUNCTION_INPUTS together provide good flexiblity for specifying the MLIR
-# input module and its metadata. For example, we can generate modules with
-# idential name from different sources (TensorFlow, TFLite, PyTorch, etc.),
-# and we can transform the same input module differently for benchmarking
-# different aspects like fp32 vs fp16.
-#
-# Note that the above parameters are all lists and they should have the name
-# number of elements. This enables us to use the same CMake function call to
-# generate benchmarks for many models and share the specification of
-# translation/runtime configurations.
+# MODULE_NAMES, MODULE_TAGS, MLIR_SOURCES, ENTRY_FUNCTIONS, and FUNCTION_INPUTS
+# together provide good flexiblity for specifying the MLIRinput module and its
+# metadata. For example, we can generate modules with idential name from
+# different sources (TensorFlow, TFLite, PyTorch, etc.), and we can transform
+# the same input module differently for benchmarking different aspects like
+# fp32 vs fp16.
 #
 # 2)
 #
@@ -81,31 +68,45 @@
     _RULE
     ""
     "DRIVER;TARGET_BACKEND;TARGET_ARCHITECTURE"
-    "BENCHMARK_MODES;ENTRY_FUNCTIONS;FUNCTION_INPUTS;MLIR_SOURCES;MODULE_NAMES;MODULE_SOURCES;MODULE_TAGS;TRANSLATION_FLAGS;RUNTIME_FLAGS"
+    "BENCHMARK_MODES;MODULES;TRANSLATION_FLAGS;RUNTIME_FLAGS"
   )
 
-  iree_check_lists_have_same_size(_RULE_MODULE_NAMES _RULE_MODULE_TAGS)
-  iree_check_lists_have_same_size(_RULE_MODULE_NAMES _RULE_MODULE_SOURCES)
-  iree_check_lists_have_same_size(_RULE_MODULE_NAMES _RULE_MLIR_SOURCES)
-  iree_check_lists_have_same_size(_RULE_MODULE_NAMES _RULE_ENTRY_FUNCTIONS)
-  iree_check_lists_have_same_size(_RULE_MODULE_NAMES _RULE_FUNCTION_INPUTS)
+  # All fields' names for each module.
+  set(_FIELD_NAMES "_MODULE_NAME" "_MODULE_TAGS"
+                   "_MLIR_SOURCE" "_ENTRY_FUNCTION" "_FUNCTION_INPUTS")
+  list(LENGTH _FIELD_NAMES _FIELD_COUNT)
+  math(EXPR _MAX_FIELD_INDEX "${_FIELD_COUNT} - 1")
 
-  # Loop over all modules and their sources to create targets.
-  list(LENGTH _RULE_MODULE_NAMES _MODULE_NAMES_COUNT)
-  math(EXPR _MAX_INDEX "${_MODULE_NAMES_COUNT} - 1")
-  foreach(_INDEX RANGE 0 "${_MAX_INDEX}")
+  # Make sure we have some multiple of six elements.
+  list(LENGTH _RULE_MODULES _MODULE_TOTAL_ELEMENT_COUNT)
+  math(EXPR _MODULE_COUNT
+       "${_MODULE_TOTAL_ELEMENT_COUNT} / ${_FIELD_COUNT}")
+  math(EXPR _MODULE_ELEMENT_REMAINDER
+       "${_MODULE_TOTAL_ELEMENT_COUNT} % ${_FIELD_COUNT}")
+  if(NOT ${_MODULE_ELEMENT_REMAINDER} EQUAL 0)
+    message(SEND_ERROR "MODULES expected to have some multiple of six "
+                       "elements; some module has missing/redundant fields.")
+  endif()
+
+  # Loop over all modules to create targets.
+  math(EXPR _MAX_MODULE_INDEX "${_MODULE_COUNT} - 1")
+  foreach(_MODULE_INDEX RANGE 0 "${_MAX_MODULE_INDEX}")
+    # Loop over all elements for the current module and assign them to the
+    # corresponding field names for later use.
+    foreach(_FIELD_INDEX RANGE 0 "${_MAX_FIELD_INDEX}")
+      list(GET _FIELD_NAMES ${_FIELD_INDEX} _FIELD_NAME)
+      math(EXPR _INDEX "${_MODULE_INDEX} * ${_FIELD_COUNT} + ${_FIELD_INDEX}")
+      list(GET _RULE_MODULES ${_INDEX} ${_FIELD_NAME})
+    endforeach()
+
+    # Use the last directory's name as the category.
+    get_filename_component(_CATEGORY "${CMAKE_CURRENT_SOURCE_DIR}" NAME)
+
     # Generate all benchmarks to the root build directory. This helps for
     # discovering them and execute them on devices.
-    list(GET _RULE_MODULE_SOURCES ${_INDEX} _MODULE_SOURCE)
-    set(_ROOT_ARTIFACTS_DIR "${IREE_BINARY_DIR}/benchmark_suites/${_MODULE_SOURCE}")
+    set(_ROOT_ARTIFACTS_DIR "${IREE_BINARY_DIR}/benchmark_suites/${_CATEGORY}")
     set(_VMFB_ARTIFACTS_DIR "${_ROOT_ARTIFACTS_DIR}/vmfb")
 
-    list(GET _RULE_MODULE_NAMES ${_INDEX} _MODULE_NAME)
-    list(GET _RULE_MODULE_TAGS ${_INDEX} _MODULE_TAGS)
-    list(GET _RULE_MLIR_SOURCES ${_INDEX} _MLIR_SOURCE)
-    list(GET _RULE_ENTRY_FUNCTIONS ${_INDEX} _ENTRY_FUNCTION)
-    list(GET _RULE_FUNCTION_INPUTS ${_INDEX} _FUNCTION_INPUTS)
-
     # The source file used to generate benchmark artifacts.
     set(_SOURCE_FILE "${_MLIR_SOURCE}")
     # The CMake target's name if we need to download from the web.
@@ -240,5 +241,5 @@
       # Mark dependency so that we have one target to drive them all.
       add_dependencies(iree-benchmark-suites "${_FLAGFILE_GEN_TARGET_NAME}")
     endforeach(_BENCHMARK_MODE IN LISTS _RULE_BENCHMARK_MODES)
-  endforeach(_INDEX RANGE 0 "${_MAX_INDEX}")
+  endforeach(_MODULE_INDEX RANGE 0 "${_MAX_MODULE_INDEX}")
 endfunction()
diff --git a/iree/benchmark/CMakeLists.txt b/iree/benchmark/CMakeLists.txt
new file mode 100644
index 0000000..0e9c88b
--- /dev/null
+++ b/iree/benchmark/CMakeLists.txt
@@ -0,0 +1 @@
+iree_add_all_subdirs()
diff --git a/iree/test/model_benchmarks/CMakeLists.txt b/iree/benchmark/TensorFlow/CMakeLists.txt
similarity index 63%
rename from iree/test/model_benchmarks/CMakeLists.txt
rename to iree/benchmark/TensorFlow/CMakeLists.txt
index a93a4a1..7e0df7d 100644
--- a/iree/test/model_benchmarks/CMakeLists.txt
+++ b/iree/benchmark/TensorFlow/CMakeLists.txt
@@ -9,68 +9,44 @@
 #                                                                              #
 # Benchmark models                                                             #
 #                                                                              #
-# All the lists should have the same number of elements. Each list describes   #
-# one aspect of the model. Elements at the same index are for the same model.  #
-#                                                                              #
-# Normally models to be benchmarked should be placed here becuase all          #
-# benchmark cases will be enabled for them. There might exist cases where we   #
-# cannot enable all the benchmark cases for one model; then they should be     #
-# placed directly in the cmake function calls in the next section.             #
+# Each module specification should be a list that contains the following       #
+# fields: MODULE_NAME, MODULE_TAGS, MLIR_SOURCE, ENTRY_FUNCTION,               #
+# FUNCTION_INPUTS. See iree_mlir_benchmark_suite definition for details about  #
+# these fields.                                                                #
 #                                                                              #
 ################################################################################
 
-set(BENCHMARK_MODULE_NAMES
-  "MobileNetV2"
-  "MobileNetV3Small"
+set(MOBILENET_V2_MODULE
+  "MobileNetV2"     # MODULE_NAME
+  "fp32,imagenet"   # MODULE_TAGS
+  "https://storage.googleapis.com/iree-model-artifacts/MobileNetV2-b0c5c584.tar.gz" # MLIR_SOURCE
+  "call"            # ENTRY_FUNCTION
+  "1x224x224x3xf32" # FUNCTION_INPUTS
 )
 
-# Each element is a comma-separated list.
-set(BENCHMARK_MODULE_TAGS
-  "fp32,imagenet"
-  "fp32,imagenet"
-)
-
-set(BENCHMARK_MODULE_SOURCES
-  "TensorFlow"
-  "TensorFlow"
-)
-
-set(BENCHMARK_MLIR_SOURCES
-  "https://storage.googleapis.com/iree-model-artifacts/MobileNetV2-b0c5c584.tar.gz"
-  "https://storage.googleapis.com/iree-model-artifacts/MobileNetV3Small-b0c5c584.tar.gz"
-)
-
-set(BENCHMARK_ENTRY_FUNCTIONS
-  "call"
-  "call"
-)
-
-# Each element is a comma-separated list.
-set(BENCHMARK_FUNCTION_INPUTS
-  "1x224x224x3xf32"
-  "1x224x224x3xf32"
+set(MOBILENET_V3SMALL_MODULE
+  "MobileNetV3Small" # MODULE_NAME
+  "fp32,imagenet"    # MODULE_TAGS
+  "https://storage.googleapis.com/iree-model-artifacts/MobileNetV3Small-b0c5c584.tar.gz" # MLIR_SOURCE
+  "call"             # ENTRY_FUNCTION
+  "1x224x224x3xf32"  # FUNCTION_INPUTS
 )
 
 ################################################################################
 #                                                                              #
-# Benchmark cases                                                              #
+# Benchmark suites                                                             #
+#                                                                              #
+# Each suite benchmarks a list of modules with some specific configuration,    #
+# typically involving different translation/runtime flags and targeting        #
+# different IREE drivers and hardware architectures.                           #
 #                                                                              #
 ################################################################################
 
 # CPU, VMVX, 3-thread, little-core, full-inference
 iree_mlir_benchmark_suite(
-  MODULE_NAMES
-    ${BENCHMARK_MODULE_NAMES}
-  MODULE_TAGS
-    ${BENCHMARK_MODULE_TAGS}
-  MODULE_SOURCES
-    ${BENCHMARK_MODULE_SOURCES}
-  MLIR_SOURCES
-    ${BENCHMARK_MLIR_SOURCES}
-  ENTRY_FUNCTIONS
-    ${BENCHMARK_ENTRY_FUNCTIONS}
-  FUNCTION_INPUTS
-    ${BENCHMARK_FUNCTION_INPUTS}
+  MODULES
+    ${MOBILENET_V2_MODULE}
+    ${MOBILENET_V3SMALL_MODULE}
 
   BENCHMARK_MODES
     "3-thread,little-core,full-inference"
@@ -89,18 +65,9 @@
 
 # CPU, Dylib-Sync, big/little-core, full-inference
 iree_mlir_benchmark_suite(
-  MODULE_NAMES
-    ${BENCHMARK_MODULE_NAMES}
-  MODULE_TAGS
-    ${BENCHMARK_MODULE_TAGS}
-  MODULE_SOURCES
-    ${BENCHMARK_MODULE_SOURCES}
-  MLIR_SOURCES
-    ${BENCHMARK_MLIR_SOURCES}
-  ENTRY_FUNCTIONS
-    ${BENCHMARK_ENTRY_FUNCTIONS}
-  FUNCTION_INPUTS
-    ${BENCHMARK_FUNCTION_INPUTS}
+  MODULES
+    ${MOBILENET_V2_MODULE}
+    ${MOBILENET_V3SMALL_MODULE}
 
   BENCHMARK_MODES
     "big-core,full-inference"
@@ -122,18 +89,9 @@
 
 # CPU, Dylib, 1-thread, big/little-core, full-inference
 iree_mlir_benchmark_suite(
-  MODULE_NAMES
-    ${BENCHMARK_MODULE_NAMES}
-  MODULE_TAGS
-    ${BENCHMARK_MODULE_TAGS}
-  MODULE_SOURCES
-    ${BENCHMARK_MODULE_SOURCES}
-  MLIR_SOURCES
-    ${BENCHMARK_MLIR_SOURCES}
-  ENTRY_FUNCTIONS
-    ${BENCHMARK_ENTRY_FUNCTIONS}
-  FUNCTION_INPUTS
-    ${BENCHMARK_FUNCTION_INPUTS}
+  MODULES
+    ${MOBILENET_V2_MODULE}
+    ${MOBILENET_V3SMALL_MODULE}
 
   BENCHMARK_MODES
     "1-thread,big-core,full-inference"
@@ -157,18 +115,9 @@
 
 # CPU, Dylib, 3-thread, big/little-core, full-inference
 iree_mlir_benchmark_suite(
-  MODULE_NAMES
-    ${BENCHMARK_MODULE_NAMES}
-  MODULE_TAGS
-    ${BENCHMARK_MODULE_TAGS}
-  MODULE_SOURCES
-    ${BENCHMARK_MODULE_SOURCES}
-  MLIR_SOURCES
-    ${BENCHMARK_MLIR_SOURCES}
-  ENTRY_FUNCTIONS
-    ${BENCHMARK_ENTRY_FUNCTIONS}
-  FUNCTION_INPUTS
-    ${BENCHMARK_FUNCTION_INPUTS}
+  MODULES
+    ${MOBILENET_V2_MODULE}
+    ${MOBILENET_V3SMALL_MODULE}
 
   BENCHMARK_MODES
     "3-thread,big-core,full-inference"
@@ -192,18 +141,9 @@
 
 # GPU, Vulkan, Adreno, full-inference
 iree_mlir_benchmark_suite(
-  MODULE_NAMES
-    ${BENCHMARK_MODULE_NAMES}
-  MODULE_TAGS
-    ${BENCHMARK_MODULE_TAGS}
-  MODULE_SOURCES
-    ${BENCHMARK_MODULE_SOURCES}
-  MLIR_SOURCES
-    ${BENCHMARK_MLIR_SOURCES}
-  ENTRY_FUNCTIONS
-    ${BENCHMARK_ENTRY_FUNCTIONS}
-  FUNCTION_INPUTS
-    ${BENCHMARK_FUNCTION_INPUTS}
+  MODULES
+    ${MOBILENET_V2_MODULE}
+    ${MOBILENET_V3SMALL_MODULE}
 
   BENCHMARK_MODES
     "full-inference"
@@ -223,18 +163,9 @@
 
 # GPU, Vulkan, Adreno, kernel-execution
 iree_mlir_benchmark_suite(
-  MODULE_NAMES
-    ${BENCHMARK_MODULE_NAMES}
-  MODULE_TAGS
-    ${BENCHMARK_MODULE_TAGS}
-  MODULE_SOURCES
-    ${BENCHMARK_MODULE_SOURCES}
-  MLIR_SOURCES
-    ${BENCHMARK_MLIR_SOURCES}
-  ENTRY_FUNCTIONS
-    ${BENCHMARK_ENTRY_FUNCTIONS}
-  FUNCTION_INPUTS
-    ${BENCHMARK_FUNCTION_INPUTS}
+  MODULES
+    ${MOBILENET_V2_MODULE}
+    ${MOBILENET_V3SMALL_MODULE}
 
   BENCHMARK_MODES
     "kernel-execution"
@@ -257,18 +188,9 @@
 
 # GPU, Vulkan, Mali, full-inference
 iree_mlir_benchmark_suite(
-  MODULE_NAMES
-    ${BENCHMARK_MODULE_NAMES}
-  MODULE_TAGS
-    ${BENCHMARK_MODULE_TAGS}
-  MODULE_SOURCES
-    ${BENCHMARK_MODULE_SOURCES}
-  MLIR_SOURCES
-    ${BENCHMARK_MLIR_SOURCES}
-  ENTRY_FUNCTIONS
-    ${BENCHMARK_ENTRY_FUNCTIONS}
-  FUNCTION_INPUTS
-    ${BENCHMARK_FUNCTION_INPUTS}
+  MODULES
+    ${MOBILENET_V2_MODULE}
+    ${MOBILENET_V3SMALL_MODULE}
 
   BENCHMARK_MODES
     "full-inference"
@@ -288,18 +210,9 @@
 
 # GPU, Vulkan, Mali, kernel-execution
 iree_mlir_benchmark_suite(
-  MODULE_NAMES
-    ${BENCHMARK_MODULE_NAMES}
-  MODULE_TAGS
-    ${BENCHMARK_MODULE_TAGS}
-  MODULE_SOURCES
-    ${BENCHMARK_MODULE_SOURCES}
-  MLIR_SOURCES
-    ${BENCHMARK_MLIR_SOURCES}
-  ENTRY_FUNCTIONS
-    ${BENCHMARK_ENTRY_FUNCTIONS}
-  FUNCTION_INPUTS
-    ${BENCHMARK_FUNCTION_INPUTS}
+  MODULES
+    ${MOBILENET_V2_MODULE}
+    ${MOBILENET_V3SMALL_MODULE}
 
   BENCHMARK_MODES
     "kernel-execution"