Move more pass from Flow stage to GlobalOptimization stage. (#14707)

- Move four more passes to GlobalOptimization stage.
  - ConvertElementwiseToLinalgPass
  - GeneralizeLinalgNamedOpsPass
  - FuseDequantizationMatmulPass
  - FoldUnitExtentDimsPass
 - Move Flow transformation_pipeline.mlir test to GlobalOptimization/test. It is mainly for testing ConvertElementwiseToLinalg pass which is also tested upstream. We probably can remove it as a follow-up.
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
index 212e755..d4c3c69 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/Passes.cpp
@@ -132,12 +132,6 @@
 
   FunctionLikeNest(passManager)
       // Preprocess the input to a form more amenable for fusion
-      // - Convert all elementwise ops to Linalg
-      // - Remove unit-extent dimensions.
-      .addPass(mlir::createConvertElementwiseToLinalgPass)
-      .addPass(createGeneralizeLinalgNamedOpsPass)
-      .addPass(createFuseDequantizationMatmulPass)
-      .addPass(createFoldUnitExtentDimsPass)
       .addPass(createRaiseSpecialOps)
       .addPass(createInterchangeGenericOpsPass)
       .addPass(createCollapseDimsPass)
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/BUILD.bazel b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/BUILD.bazel
index d85235b..e506ec9 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/BUILD.bazel
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/BUILD.bazel
@@ -57,7 +57,6 @@
             "tensor_pad_to_tensor_insert_slice.mlir",
             "top_level_scf_to_cfg.mlir",
             "transform_dispatch_region_formation.mlir",
-            "transformation_pipeline.mlir",
             "verify_input_ir.mlir",
         ],
         include = ["*.mlir"],
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
index 4618c94..9c72420 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
@@ -55,7 +55,6 @@
     "tensor_pad_to_tensor_insert_slice.mlir"
     "top_level_scf_to_cfg.mlir"
     "transform_dispatch_region_formation.mlir"
-    "transformation_pipeline.mlir"
     "verify_input_ir.mlir"
   TOOLS
     FileCheck
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pipeline_tests.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pipeline_tests.mlir
index dc8c2ff..c1d2245 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pipeline_tests.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/pipeline_tests.mlir
@@ -1,4 +1,5 @@
-// RUN: iree-opt --iree-flow-transformation-pipeline --split-input-file %s | FileCheck %s
+// TODO(hanchung): Split the transformation pipeline tests into two mlir files.
+// RUN: iree-opt --iree-global-optimization-transformation-pipeline --iree-flow-transformation-pipeline --split-input-file %s | FileCheck %s
 
 #map = affine_map<(d0, d1) -> (d0)>
 #map1 = affine_map<(d0, d1) -> (d1)>
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transformation_pipeline.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transformation_pipeline.mlir
deleted file mode 100644
index 425cef7..0000000
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transformation_pipeline.mlir
+++ /dev/null
@@ -1,31 +0,0 @@
-// RUN: iree-opt --split-input-file --iree-flow-transformation-pipeline %s | FileCheck %s
-
-// CHECK-LABEL: @empty
-func.func @empty() {
-  // CHECK-NEXT: return
-  return
-}
-
-// -----
-
-func.func @elementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
-  %0 = arith.addf %arg0, %arg0 : tensor<4xf32>
-  %1 = arith.subf %0, %arg0 : tensor<4xf32>
-  %2 = arith.mulf %1, %arg0 : tensor<4xf32>
-  return %2 : tensor<4xf32>
-}
-
-// CHECK-LABEL: flow.executable private @elementwiseOps_dispatch_0 {
-//  CHECK-NEXT:   flow.executable.export public @elementwiseOps_dispatch_0{{.*}} workgroups() -> (index, index, index) {
-//       CHECK:     %x, %y, %z = flow.dispatch.workgroup_count_from_slice
-//       CHECK:     flow.return %x, %y, %z
-//       CHECK:   module {
-//  CHECK-NEXT:     func.func @elementwiseOps_dispatch_0{{.*}}(%arg0: !flow.dispatch.tensor<readonly:tensor<4xf32>>, %arg1: !flow.dispatch.tensor<writeonly:tensor<4xf32>>) {
-//       CHECK:       %{{.+}} = linalg.generic
-//       CHECK:         %{{.+}} = arith.addf %{{.+}}, %{{.+}} : f32
-//  CHECK-NEXT:         %{{.+}} = arith.subf %{{.+}}, %{{.+}} : f32
-//  CHECK-NEXT:         %{{.+}} = arith.mulf %{{.+}}, %{{.+}} : f32
-//       CHECK: func.func @elementwiseOps(%arg0: tensor<4xf32>) -> tensor<4xf32> {
-//  CHECK-NEXT:   %0 = flow.dispatch @elementwiseOps_dispatch_0::@elementwiseOps_dispatch_0{{.*}}(%arg0) : (tensor<4xf32>) -> tensor<4xf32>
-//  CHECK-NEXT:   return %0 : tensor<4xf32>
-//  CHECK-NEXT: }
diff --git a/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel b/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel
index 703fba9..53b5795 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel
+++ b/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel
@@ -29,6 +29,7 @@
         "@llvm-project//mlir:FuncDialect",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:LinalgTransforms",
+        "@llvm-project//mlir:MemRefTransforms",
         "@llvm-project//mlir:Pass",
         "@llvm-project//mlir:Transforms",
     ],
diff --git a/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt b/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt
index 09bbfb2..1679b24 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt
+++ b/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt
@@ -22,6 +22,7 @@
     MLIRFuncDialect
     MLIRIR
     MLIRLinalgTransforms
+    MLIRMemRefTransforms
     MLIRPass
     MLIRTransforms
     iree::compiler::Dialect::Flow::Transforms
diff --git a/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp b/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp
index 06f2014..e8f02cb 100644
--- a/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp
+++ b/compiler/src/iree/compiler/GlobalOptimization/Passes.cpp
@@ -10,6 +10,7 @@
 #include "iree/compiler/Utils/PassUtils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/Passes.h"
+#include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Transforms/Passes.h"
 
 namespace mlir {
@@ -51,12 +52,23 @@
   mainPassManager.addPass(IREE::Flow::createEraseUnusedLinalgOperands());
 
   // Expand tensor shapes into SSA values and optimize the whole program.
-  // The more we are able to equate shape dimensions at this level the better
-  // our fusions will be.
+  // The more we are able to equate shape dimensions at this level the
+  // better our fusions will be.
   FunctionLikeNest(mainPassManager)
       .addPass(IREE::Flow::createTopLevelSCFToCFGPass);
   mainPassManager.addPass(IREE::Flow::createExpandTensorShapesPass());
 
+  FunctionLikeNest(mainPassManager)
+      // Preprocess the input to a form more amenable for fusion
+      // - Convert all elementwise ops to Linalg
+      // - Remove unit-extent dimensions.
+      .addPass(mlir::createConvertElementwiseToLinalgPass)
+      .addPass(IREE::Flow::createGeneralizeLinalgNamedOpsPass)
+      .addPass(IREE::Flow::createFuseDequantizationMatmulPass)
+      .addPass(IREE::Flow::createFoldUnitExtentDimsPass)
+      .addPass(mlir::createCanonicalizerPass)
+      .addPass(mlir::createCSEPass);
+
   OpPassManager pipeline(ModuleOp::getOperationName());
   FunctionLikeNest(pipeline)
       // Simplify util.global accesses early on; this can help with dispatch
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/BUILD.bazel b/compiler/src/iree/compiler/GlobalOptimization/test/BUILD.bazel
new file mode 100644
index 0000000..01a1d90
--- /dev/null
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/BUILD.bazel
@@ -0,0 +1,28 @@
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")
+load("//build_tools/bazel:enforce_glob.bzl", "enforce_glob")
+
+package(
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+iree_lit_test_suite(
+    name = "lit",
+    srcs = enforce_glob(
+        [
+            "transformation_pipeline.mlir",
+        ],
+        include = ["*.mlir"],
+    ),
+    cfg = "//compiler:lit.cfg.py",
+    tools = [
+        "//tools:iree-opt",
+        "@llvm-project//llvm:FileCheck",
+    ],
+)
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/CMakeLists.txt b/compiler/src/iree/compiler/GlobalOptimization/test/CMakeLists.txt
new file mode 100644
index 0000000..4155595
--- /dev/null
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/CMakeLists.txt
@@ -0,0 +1,23 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
+# compiler/src/iree/compiler/GlobalOptimization/test/BUILD.bazel               #
+#                                                                              #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
+# CMake-only content.                                                          #
+#                                                                              #
+# To disable autogeneration for this file entirely, delete this header.        #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_lit_test_suite(
+  NAME
+    lit
+  SRCS
+    "transformation_pipeline.mlir"
+  TOOLS
+    FileCheck
+    iree-opt
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/transformation_pipeline.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/transformation_pipeline.mlir
new file mode 100644
index 0000000..0482ddf
--- /dev/null
+++ b/compiler/src/iree/compiler/GlobalOptimization/test/transformation_pipeline.mlir
@@ -0,0 +1,24 @@
+// RUN: iree-opt --split-input-file --iree-global-optimization-transformation-pipeline %s | FileCheck %s
+
+// CHECK-LABEL: @empty
+func.func @empty() {
+  // CHECK-NEXT: return
+  return
+}
+
+// -----
+
+func.func @elementwiseOps(%arg0 : tensor<4xf32>) -> tensor<4xf32> {
+  %0 = arith.addf %arg0, %arg0 : tensor<4xf32>
+  %1 = arith.subf %0, %arg0 : tensor<4xf32>
+  %2 = arith.mulf %1, %arg0 : tensor<4xf32>
+  return %2 : tensor<4xf32>
+}
+
+// CHECK-LABEL: func.func @elementwiseOps(%arg0: tensor<4xf32>) -> tensor<4xf32> {
+//       CHECK:   %{{.+}} = linalg.generic
+//       CHECK:     %{{.+}} = arith.addf %{{.+}}, %{{.+}} : f32
+//       CHECK:   %{{.+}} = linalg.generic
+//       CHECK:     %{{.+}} = arith.subf %{{.+}}, %{{.+}} : f32
+//       CHECK:   %{{.+}} = linalg.generic
+//       CHECK:     %{{.+}} = arith.mulf %{{.+}}, %{{.+}} : f32
diff --git a/tools/test/compile_pipelines.mlir b/tools/test/compile_pipelines.mlir
index 7275ec9..2fd4a6c 100644
--- a/tools/test/compile_pipelines.mlir
+++ b/tools/test/compile_pipelines.mlir
@@ -1,6 +1,7 @@
 // RUN: iree-opt --iree-common-input-transformation-pipeline %s | \
 // RUN: iree-opt --iree-abi-transformation-pipeline - | \
 // RUN: iree-opt --iree-common-input-transformation-pipeline - | \
+// RUN: iree-opt --iree-global-optimization-transformation-pipeline - | \
 // RUN: iree-opt --iree-flow-transformation-pipeline - | \
 // RUN: iree-opt --iree-stream-transformation-pipeline - | \
 // RUN: iree-opt --iree-hal-transformation-pipeline --iree-hal-target-backends=vmvx - | \