Revert "[DispatchCreation] Run preprocessing before..." (#18934)

This PR got merged before I was able to resolve the perf regressions in
VAE decode on MI250. See @ScottTodd's comment on the original PR. 
I need time to resolve the regressions but this can be relanded once
resolved


Reverts iree-org/iree#18920
diff --git a/compiler/src/iree/compiler/DispatchCreation/Passes.cpp b/compiler/src/iree/compiler/DispatchCreation/Passes.cpp
index 9cf5732..afee21c 100644
--- a/compiler/src/iree/compiler/DispatchCreation/Passes.cpp
+++ b/compiler/src/iree/compiler/DispatchCreation/Passes.cpp
@@ -127,12 +127,9 @@
 //===----------------------------------------------------------------------===//
 
 void addDispatchRegionCreationPreprocessingPasses(OpPassManager &passManager) {
+  // 1. Do some simple elementwise op fusion. This could be skipped,
+  //    but could reduce the surface area of ops to handle later.
   FunctionLikeNest(passManager)
-      .addPass(IREE::Flow::createCanonicalizerPass)
-      .addPass(mlir::createCSEPass)
-      .addPass(DispatchCreation::createFusionPreprocessingPass)
-      // 1. Do some simple elementwise op fusion. This could be skipped,
-      //    but could reduce the surface area of ops to handle later.
       .addPass([]() {
         return DispatchCreation::createElementwiseOpFusionPass(
             ElementwiseOpFusionPassOptions{
@@ -151,7 +148,6 @@
 
       // 3. Perform elementwise operation fusion again (now with higher
       //    dimensionality).
-      .addPass(DispatchCreation::createFusionPreprocessingPass)
       .addPass([]() {
         return DispatchCreation::createElementwiseOpFusionPass(
             ElementwiseOpFusionPassOptions{
@@ -298,6 +294,12 @@
         IREE::Util::createFixedPointIteratorPass(std::move(ipoPipeline)));
   }
 
+  FunctionLikeNest(passManager)
+      // Preprocess the input to a form more amenable for fusion.
+      .addPass(DispatchCreation::createFusionPreprocessingPass)
+      .addPass(IREE::Flow::createCanonicalizerPass)
+      .addPass(mlir::createCSEPass);
+
   addDispatchRegionCreationPreprocessingPasses(passManager);
   addDispatchRegionCreationPasses(passManager);