Relands "Switch e2e tests and benchmarks to `--iree-flow-enable-data-tiling` on `llvm-cpu`" (#11443)

This relands

https://github.com/iree-org/iree/commit/044017ff9af0bb94128fe0fc1e533952a7a9c013
except flipping the pipeline for MobileBertInt8.
diff --git a/benchmarks/TFLite/android-arm64-v8a.cmake b/benchmarks/TFLite/android-arm64-v8a.cmake
index 3d2e380..297cef3 100644
--- a/benchmarks/TFLite/android-arm64-v8a.cmake
+++ b/benchmarks/TFLite/android-arm64-v8a.cmake
@@ -221,7 +221,7 @@
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
     ${ANDROID_CPU_COMPILATION_FLAGS}
-    "--iree-flow-mmt4d-target-options=arch=aarch64"
+    "--iree-flow-enable-data-tiling"
     "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops"
     "--iree-llvmcpu-enable-pad-consumer-fusion"
   BENCHMARK_TOOL
@@ -252,6 +252,8 @@
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
     ${ANDROID_CPU_COMPILATION_FLAGS}
+    # TODO(#11434): Switch to use data tiling flag after fixing the long
+    # compilation time issue.
     "--iree-flow-mmt4d-target-options=arch=aarch64 features=+dotprod"
     "--iree-llvm-target-cpu-features=+dotprod"
   BENCHMARK_TOOL
@@ -290,7 +292,7 @@
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
     ${ANDROID_CPU_COMPILATION_FLAGS}
-    "--iree-flow-mmt4d-target-options=arch=aarch64"
+    "--iree-flow-enable-data-tiling"
     "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops"
     "--iree-llvmcpu-enable-pad-consumer-fusion"
   BENCHMARK_TOOL
@@ -323,6 +325,8 @@
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
     ${ANDROID_CPU_COMPILATION_FLAGS}
+    # TODO(#11434): Switch to use data tiling flag after fixing the long
+    # compilation time issue.
     "--iree-flow-mmt4d-target-options=arch=aarch64 features=+dotprod"
     "--iree-llvm-target-cpu-features=+dotprod"
     "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops"
@@ -428,7 +432,7 @@
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
     ${ANDROID_CPU_COMPILATION_FLAGS}
-    "--iree-flow-mmt4d-target-options=arch=aarch64"
+    "--iree-flow-enable-data-tiling"
     "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops"
     "--iree-llvmcpu-enable-pad-consumer-fusion"
 
@@ -462,6 +466,8 @@
     "CPU-ARM64-v8A"
   COMPILATION_FLAGS
     ${ANDROID_CPU_COMPILATION_FLAGS}
+    # TODO(#11434): Switch to use data tiling flag after fixing the long
+    # compilation time issue.
     "--iree-flow-mmt4d-target-options=arch=aarch64 features=+dotprod"
     "--iree-llvm-target-cpu-features=+dotprod"
     "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops"
diff --git a/build_tools/benchmarks/comparisons/setup_mobile.sh b/build_tools/benchmarks/comparisons/setup_mobile.sh
index e746934..302b5f7 100644
--- a/build_tools/benchmarks/comparisons/setup_mobile.sh
+++ b/build_tools/benchmarks/comparisons/setup_mobile.sh
@@ -120,7 +120,7 @@
     --iree-input-type=tosa \
     --iree-hal-target-backends=llvm-cpu \
     --iree-llvm-target-triple=aarch64-none-linux-android29 \
-    "--iree-flow-mmt4d-target-options=arch=aarch64 features=+dotprod" \
+    --iree-flow-enable-data-tiling \
     --iree-llvm-target-cpu-features=+dotprod \
     "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" \
     "--iree-llvmcpu-enable-pad-consumer-fusion" \
@@ -135,7 +135,7 @@
     --iree-input-type=tosa \
     --iree-hal-target-backends=llvm-cpu \
     --iree-llvm-target-triple=aarch64-none-linux-android29 \
-    "--iree-flow-mmt4d-target-options=arch=aarch64 features=+dotprod" \
+    --iree-flow-enable-data-tiling \
     --iree-llvm-target-cpu-features=+dotprod \
     "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops" \
     "--iree-llvmcpu-enable-pad-consumer-fusion" \
diff --git a/build_tools/python/benchmark_suites/iree/armv8_a_benchmarks.py b/build_tools/python/benchmark_suites/iree/armv8_a_benchmarks.py
index 5e0af1d..bf4a92b 100644
--- a/build_tools/python/benchmark_suites/iree/armv8_a_benchmarks.py
+++ b/build_tools/python/benchmark_suites/iree/armv8_a_benchmarks.py
@@ -41,7 +41,7 @@
       tags=["experimental-flags", "mmt4d"],
       compile_targets=[ARMV8_A_CPU_TARGET],
       extra_flags=[
-          "--iree-flow-mmt4d-target-options=arch=aarch64",
+          "--iree-flow-enable-data-tiling",
           "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops",
           "--iree-llvmcpu-enable-pad-consumer-fusion"
       ])
@@ -50,6 +50,8 @@
       tags=["experimental-flags", "mmt4d", "dotprod"],
       compile_targets=[ARMV8_A_CPU_TARGET],
       extra_flags=[
+          # TODO(#11434): Switch to use data tiling flag after fixing the long
+          # compilation time issue.
           "--iree-flow-mmt4d-target-options=arch=aarch64 features=+dotprod",
           "--iree-llvm-target-cpu-features=+dotprod",
           "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops",
diff --git a/tests/e2e/matmul/BUILD b/tests/e2e/matmul/BUILD
index 3f4d568..4a442c2 100644
--- a/tests/e2e/matmul/BUILD
+++ b/tests/e2e/matmul/BUILD
@@ -39,7 +39,7 @@
 [iree_generated_trace_runner_test(
     name = "e2e_matmul_mmt4d_%s_small" % lhs_rhs_type,
     compiler_flags = [
-        "--iree-flow-mmt4d-target-options=enable_generic_slow #pass_options_variant#",
+        "--iree-flow-enable-data-tiling",
     ],
     generator = ":generate_e2e_matmul_tests",
     generator_args = [
@@ -63,7 +63,7 @@
 [iree_generated_trace_runner_test(
     name = "e2e_matmul_mmt4d_%s_large" % lhs_rhs_type,
     compiler_flags = [
-        "--iree-flow-mmt4d-target-options=enable_generic_slow #pass_options_variant#",
+        "--iree-flow-enable-data-tiling",
     ],
     generator = ":generate_e2e_matmul_tests",
     generator_args = [
@@ -91,7 +91,7 @@
     name = "e2e_matmul_mmt4d_%s_intrinsics_%s" % (lhs_rhs_type, size),
     compiler_flags = [
         "--iree-codegen-mmt4d-use-intrinsics",
-        "--iree-flow-mmt4d-target-options=enable_generic_slow #pass_options_variant#",
+        "--iree-flow-enable-data-tiling",
     ],
     generator = ":generate_e2e_matmul_tests",
     generator_args = [
diff --git a/tests/e2e/matmul/CMakeLists.txt b/tests/e2e/matmul/CMakeLists.txt
index 0d17086..f8853c8 100644
--- a/tests/e2e/matmul/CMakeLists.txt
+++ b/tests/e2e/matmul/CMakeLists.txt
@@ -57,7 +57,7 @@
   DRIVERS
     "local-task"
   COMPILER_FLAGS
-    "--iree-flow-mmt4d-target-options=enable_generic_slow #pass_options_variant#"
+    "--iree-flow-enable-data-tiling"
   TARGET_CPU_FEATURES_VARIANTS
     "default"
     "aarch64:+dotprod"
@@ -79,7 +79,7 @@
   DRIVERS
     "local-task"
   COMPILER_FLAGS
-    "--iree-flow-mmt4d-target-options=enable_generic_slow #pass_options_variant#"
+    "--iree-flow-enable-data-tiling"
   TARGET_CPU_FEATURES_VARIANTS
     "default"
 )
@@ -99,7 +99,7 @@
   DRIVERS
     "local-task"
   COMPILER_FLAGS
-    "--iree-flow-mmt4d-target-options=enable_generic_slow #pass_options_variant#"
+    "--iree-flow-enable-data-tiling"
   TARGET_CPU_FEATURES_VARIANTS
     "default"
     "aarch64:+dotprod"
@@ -121,7 +121,7 @@
   DRIVERS
     "local-task"
   COMPILER_FLAGS
-    "--iree-flow-mmt4d-target-options=enable_generic_slow #pass_options_variant#"
+    "--iree-flow-enable-data-tiling"
   TARGET_CPU_FEATURES_VARIANTS
     "default"
 )
@@ -142,7 +142,7 @@
     "local-task"
   COMPILER_FLAGS
     "--iree-codegen-mmt4d-use-intrinsics"
-    "--iree-flow-mmt4d-target-options=enable_generic_slow #pass_options_variant#"
+    "--iree-flow-enable-data-tiling"
   TARGET_CPU_FEATURES_VARIANTS
     "default"
     "aarch64:+dotprod"
@@ -165,7 +165,7 @@
     "local-task"
   COMPILER_FLAGS
     "--iree-codegen-mmt4d-use-intrinsics"
-    "--iree-flow-mmt4d-target-options=enable_generic_slow #pass_options_variant#"
+    "--iree-flow-enable-data-tiling"
   TARGET_CPU_FEATURES_VARIANTS
     "default"
 )
diff --git a/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake b/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake
index fd8a96c..9f513d8 100644
--- a/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake
+++ b/tests/e2e/test_artifacts/generated_e2e_test_iree_artifacts.cmake
@@ -103,7 +103,7 @@
     "--iree-hal-target-backends=llvm-cpu"
     "--iree-input-type=tosa"
     "--iree-llvm-target-triple=aarch64-none-linux-android29"
-    "--iree-flow-mmt4d-target-options=arch=aarch64"
+    "--iree-flow-enable-data-tiling"
     "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops"
     "--iree-llvmcpu-enable-pad-consumer-fusion"
   PUBLIC
@@ -289,7 +289,7 @@
     "--iree-hal-target-backends=llvm-cpu"
     "--iree-input-type=tosa"
     "--iree-llvm-target-triple=aarch64-none-linux-android29"
-    "--iree-flow-mmt4d-target-options=arch=aarch64"
+    "--iree-flow-enable-data-tiling"
     "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops"
     "--iree-llvmcpu-enable-pad-consumer-fusion"
   PUBLIC
@@ -495,7 +495,7 @@
     "--iree-hal-target-backends=llvm-cpu"
     "--iree-input-type=tosa"
     "--iree-llvm-target-triple=aarch64-none-linux-android29"
-    "--iree-flow-mmt4d-target-options=arch=aarch64"
+    "--iree-flow-enable-data-tiling"
     "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops"
     "--iree-llvmcpu-enable-pad-consumer-fusion"
   PUBLIC
@@ -724,7 +724,7 @@
     "--iree-hal-target-backends=llvm-cpu"
     "--iree-input-type=tosa"
     "--iree-llvm-target-triple=aarch64-none-linux-android29"
-    "--iree-flow-mmt4d-target-options=arch=aarch64"
+    "--iree-flow-enable-data-tiling"
     "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops"
     "--iree-llvmcpu-enable-pad-consumer-fusion"
   PUBLIC
@@ -1215,7 +1215,7 @@
     "--iree-hal-target-backends=llvm-cpu"
     "--iree-input-type=tosa"
     "--iree-llvm-target-triple=aarch64-none-linux-android29"
-    "--iree-flow-mmt4d-target-options=arch=aarch64"
+    "--iree-flow-enable-data-tiling"
     "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops"
     "--iree-llvmcpu-enable-pad-consumer-fusion"
   PUBLIC
@@ -1438,7 +1438,7 @@
     "--iree-hal-target-backends=llvm-cpu"
     "--iree-input-type=tosa"
     "--iree-llvm-target-triple=aarch64-none-linux-android29"
-    "--iree-flow-mmt4d-target-options=arch=aarch64"
+    "--iree-flow-enable-data-tiling"
     "--iree-flow-enable-fuse-padding-into-linalg-consumer-ops"
     "--iree-llvmcpu-enable-pad-consumer-fusion"
   PUBLIC