Merge pull request #6114 from rsuderman:main-to-google

PiperOrigin-RevId: 377579548
diff --git a/build_tools/android/run_benchmarks.py b/build_tools/android/run_benchmarks.py
index df083d3..53caac1 100755
--- a/build_tools/android/run_benchmarks.py
+++ b/build_tools/android/run_benchmarks.py
@@ -32,11 +32,11 @@
 BENCHMARK_SUITE_REL_PATH = "benchmark_suites"
 # Relative path against root benchmark suite directory.
 TENSORFLOW_MODEL_SUITE_REL_PATH = "TensorFlow"
+# Relative path against TensorFlow directory.
+VMFB_REL_PATH = "vmfb"
 
 # The flagfile's filename for compiled Python models.
 MODEL_FLAGFILE_NAME = "flagfile"
-# The artifact's filename for compiled Python models.
-MODEL_VMFB_NAME = "compiled.vmfb"
 
 # Root directory to perform benchmarks in on the Android device.
 ANDROID_TMP_DIR = "/data/local/tmp/iree-benchmarks"
@@ -209,14 +209,17 @@
   Returns:
   - A list containing (BenchmarkInfo, context, results) tuples.
   """
-  # Push the benchmark tool to the Android device first.
+  model_root_dir = os.path.join(root_build_dir, BENCHMARK_SUITE_REL_PATH,
+                                TENSORFLOW_MODEL_SUITE_REL_PATH)
+
+  # Push the benchmark vmfb and tool files to the Android device first.
+  adb_push_to_tmp_dir(os.path.join(model_root_dir, VMFB_REL_PATH),
+                      relative_dir="",
+                      verbose=verbose)
   android_tool_path = adb_push_to_tmp_dir(benchmark_tool,
                                           relative_dir="tools",
                                           verbose=verbose)
 
-  model_root_dir = os.path.join(root_build_dir, BENCHMARK_SUITE_REL_PATH,
-                                TENSORFLOW_MODEL_SUITE_REL_PATH)
-
   results = []
 
   # Push all model artifacts to the device and run them.
@@ -225,9 +228,6 @@
                                                    model_benchmark_dir)
     print(f"--> benchmark: {benchmark_info} <--")
     android_relative_dir = os.path.relpath(model_benchmark_dir, model_root_dir)
-    adb_push_to_tmp_dir(os.path.join(model_benchmark_dir, MODEL_VMFB_NAME),
-                        android_relative_dir,
-                        verbose=verbose)
     android_flagfile_path = adb_push_to_tmp_dir(os.path.join(
         model_benchmark_dir, MODEL_FLAGFILE_NAME),
                                                 android_relative_dir,
diff --git a/build_tools/cmake/iree_mlir_benchmark_suite.cmake b/build_tools/cmake/iree_mlir_benchmark_suite.cmake
index ac03097..f905453 100644
--- a/build_tools/cmake/iree_mlir_benchmark_suite.cmake
+++ b/build_tools/cmake/iree_mlir_benchmark_suite.cmake
@@ -98,6 +98,7 @@
     # discovering them and execute them on devices.
     list(GET _RULE_MODULE_SOURCES ${_INDEX} _MODULE_SOURCE)
     set(_ROOT_ARTIFACTS_DIR "${IREE_BINARY_DIR}/benchmark_suites/${_MODULE_SOURCE}")
+    set(_VMFB_ARTIFACTS_DIR "${_ROOT_ARTIFACTS_DIR}/vmfb")
 
     list(GET _RULE_MODULE_NAMES ${_INDEX} _MODULE_NAME)
     list(GET _RULE_MODULE_TAGS ${_INDEX} _MODULE_TAGS)
@@ -152,27 +153,8 @@
     foreach (_BENCHMARK_MODE IN LISTS _RULE_BENCHMARK_MODES)
       set(_BENCHMARK_DIR_NAME
           "iree-${_RULE_DRIVER}__${_RULE_TARGET_ARCHITECTURE}__${_BENCHMARK_MODE}")
-      set(_ARTIFACTS_DIR "${_ROOT_ARTIFACTS_DIR}/${_MODULE_DIR_NAME}/${_BENCHMARK_DIR_NAME}")
 
-      set(_TRANSLATION_ARGS "--iree-mlir-to-vm-bytecode-module")
-      list(APPEND _TRANSLATION_ARGS "--iree-hal-target-backends=${_RULE_TARGET_BACKEND}")
-      list(APPEND _TRANSLATION_ARGS ${_RULE_TRANSLATION_FLAGS})
-
-      set(_VMFB_FILE "${_ARTIFACTS_DIR}/compiled.vmfb")
-      add_custom_command(
-        OUTPUT "${_VMFB_FILE}"
-        COMMAND
-          "$<TARGET_FILE:iree_tools_iree-translate>"
-            ${_TRANSLATION_ARGS}
-            "${_SOURCE_FILE}"
-            -o "${_VMFB_FILE}"
-        WORKING_DIRECTORY "${_ARTIFACTS_DIR}"
-        DEPENDS
-          iree_tools_iree-translate
-          "${_DOWNLOAD_TARGET_NAME}"
-        COMMENT "Generating ${_VMFB_FILE}"
-      )
-
+      # A list of name segments for composing unique CMake target names.
       set(_COMMON_NAME_SEGMENTS "${_MODULE_NAME}")
       string(REPLACE "," "-" _TAGS "${_MODULE_TAGS}")
       string(REPLACE "," "-" _MODE "${_BENCHMARK_MODE}")
@@ -180,28 +162,62 @@
            "${_TAGS}" "${_MODE}" "${_RULE_TARGET_BACKEND}"
            "${_RULE_TARGET_ARCHITECTURE}")
 
-      # Construct the benchmark artifact generation target name, which is the module
-      # name, followed by benchmark mode, target backend, and configuration.
-      set(_TRANSLATION_TARGET_NAME_LIST "iree-generate-benchmark-artifact")
-      list(APPEND _TRANSLATION_TARGET_NAME_LIST ${_COMMON_NAME_SEGMENTS})
-      list(JOIN _TRANSLATION_TARGET_NAME_LIST "__" _TRANSLATION_TARGET_NAME)
+      # The full list of translation flags.
+      set(_TRANSLATION_ARGS "--iree-mlir-to-vm-bytecode-module")
+      list(APPEND _TRANSLATION_ARGS "--iree-hal-target-backends=${_RULE_TARGET_BACKEND}")
+      list(SORT _RULE_TRANSLATION_FLAGS)
+      list(APPEND _TRANSLATION_ARGS ${_RULE_TRANSLATION_FLAGS})
 
-      add_custom_target("${_TRANSLATION_TARGET_NAME}"
-        DEPENDS "${_VMFB_FILE}"
+      # Get a unique identifier for this IREE module file by hashing the command
+      # line flags and input file. We will also use this for the CMake target.
+      string(SHA1 _VMFB_HASH "${_TRANSLATION_ARGS};${_SOURCE_FILE}")
+
+      set(_TRANSLATION_TARGET_NAME "iree-generate-benchmark-artifact-${_VMFB_HASH}")
+
+      # Register the target once and share across all benchmarks having the same
+      # MLIR source and translation flags.
+      if(NOT TARGET "${_TRANSLATION_TARGET_NAME}")
+        set(_VMFB_FILE "${_VMFB_ARTIFACTS_DIR}/compiled-${_VMFB_HASH}.vmfb")
+        add_custom_command(
+          OUTPUT "${_VMFB_FILE}"
+          COMMAND
+            "$<TARGET_FILE:iree_tools_iree-translate>"
+              ${_TRANSLATION_ARGS}
+              "${_SOURCE_FILE}"
+              -o "${_VMFB_FILE}"
+          WORKING_DIRECTORY "${_VMFB_ARTIFACTS_DIR}"
+          DEPENDS
+            iree_tools_iree-translate
+            "${_DOWNLOAD_TARGET_NAME}"
+            COMMENT "Generating VMFB for ${_COMMON_NAME_SEGMENTS}"
+        )
+
+        add_custom_target("${_TRANSLATION_TARGET_NAME}"
+          DEPENDS "${_VMFB_FILE}"
+        )
+
+        # Mark dependency so that we have one target to drive them all.
+        add_dependencies(iree-benchmark-suites "${_TRANSLATION_TARGET_NAME}")
+      endif(NOT TARGET "${_TRANSLATION_TARGET_NAME}")
+
+      # Add a friendly target alias for this particular benchmark.
+      set(_FRIENDLY_TARGET_NAME_LIST "iree-generate-benchmark-artifact")
+      list(APPEND _FRIENDLY_TARGET_NAME_LIST ${_COMMON_NAME_SEGMENTS})
+      list(JOIN _FRIENDLY_TARGET_NAME_LIST "__" _FRIENDLY_TARGET_NAME)
+      add_custom_target("${_FRIENDLY_TARGET_NAME}"
+        DEPENDS "${_TRANSLATION_TARGET_NAME}"
       )
 
-      # Mark dependency so that we have one target to drive them all.
-      add_dependencies(iree-benchmark-suites "${_TRANSLATION_TARGET_NAME}")
-
       # Finally create the command and target for the flagfile used to execute the
       # generated artifacts.
-      set(_FLAG_FILE "${_ARTIFACTS_DIR}/flagfile")
+      set(_FLAGFILE_ARTIFACTS_DIR "${_ROOT_ARTIFACTS_DIR}/${_MODULE_DIR_NAME}/${_BENCHMARK_DIR_NAME}")
+      set(_FLAG_FILE "${_FLAGFILE_ARTIFACTS_DIR}/flagfile")
       set(_ADDITIONAL_ARGS_CL "--additional_args=\"${_RULE_RUNTIME_FLAGS}\"")
       add_custom_command(
         OUTPUT "${_FLAG_FILE}"
         COMMAND
           "${Python3_EXECUTABLE}" "${IREE_ROOT_DIR}/scripts/generate_flagfile.py"
-            --module_file=compiled.vmfb
+            --module_file="../../vmfb/compiled-${_VMFB_HASH}.vmfb"
             --driver=${_RULE_DRIVER}
             --entry_function=${_ENTRY_FUNCTION}
             --function_inputs=${_FUNCTION_INPUTS}
@@ -209,7 +225,7 @@
             -o "${_FLAG_FILE}"
         DEPENDS
           "${IREE_ROOT_DIR}/scripts/generate_flagfile.py"
-        WORKING_DIRECTORY "${_ARTIFACTS_DIR}"
+        WORKING_DIRECTORY "${_FLAGFILE_ARTIFACTS_DIR}"
         COMMENT "Generating ${_FLAG_FILE}"
       )
 
diff --git a/integrations/tensorflow/e2e/keras/applications/BUILD b/integrations/tensorflow/e2e/keras/applications/BUILD
index 35373f9..cd360b9 100644
--- a/integrations/tensorflow/e2e/keras/applications/BUILD
+++ b/integrations/tensorflow/e2e/keras/applications/BUILD
@@ -108,7 +108,10 @@
                 "iree_llvmaot",
                 "iree_vulkan",
             ],
-            "model": "VGG19",
+            "model": [
+                "VGG16",
+                "VGG19",
+            ],
         },
     ],
     matrix = {
diff --git a/integrations/tensorflow/e2e/keras/applications/CMakeLists.txt b/integrations/tensorflow/e2e/keras/applications/CMakeLists.txt
index 8b0ef04..448e0e4 100644
--- a/integrations/tensorflow/e2e/keras/applications/CMakeLists.txt
+++ b/integrations/tensorflow/e2e/keras/applications/CMakeLists.txt
@@ -27,8 +27,11 @@
     "MobileNet;MobileNetV2;ResNet50;VGG16;VGG19"
     "tf;tflite;iree_llvmaot;iree_vulkan"
   FAILING_CONFIGURATIONS
+    ",,,VGG16,tflite"
     ",,,VGG19,tflite"
+    ",,,VGG16,iree_llvmaot"
     ",,,VGG19,iree_llvmaot"
+    ",,,VGG16,iree_vulkan"
     ",,,VGG19,iree_vulkan"
   LABELS
     "manual"
diff --git a/integrations/tensorflow/iree_tf_compiler/BUILD b/integrations/tensorflow/iree_tf_compiler/BUILD
index 18fdf5b..793693e 100644
--- a/integrations/tensorflow/iree_tf_compiler/BUILD
+++ b/integrations/tensorflow/iree_tf_compiler/BUILD
@@ -103,8 +103,11 @@
         "//iree_tf_compiler/MHLO",
         "@llvm-project//llvm:Support",
         "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Parser",
         "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:StandardOps",
         "@llvm-project//mlir:Support",
+        "@org_tensorflow//tensorflow/compiler/mlir/hlo:hlo_dialect_registration",
         "@org_tensorflow//tensorflow/compiler/mlir/xla:hlo_to_mlir_hlo",
         "@org_tensorflow//tensorflow/compiler/xla/service:hlo_parser",
         "@org_tensorflow//tensorflow/compiler/xla/service:hlo_proto_cc",
diff --git a/integrations/tensorflow/iree_tf_compiler/MHLO/BUILD b/integrations/tensorflow/iree_tf_compiler/MHLO/BUILD
index 81092f6..986b490 100644
--- a/integrations/tensorflow/iree_tf_compiler/MHLO/BUILD
+++ b/integrations/tensorflow/iree_tf_compiler/MHLO/BUILD
@@ -42,6 +42,8 @@
         "@llvm-project//llvm:Support",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:SCFToStandard",
+        "@llvm-project//mlir:SCFTransforms",
         "@llvm-project//mlir:Shape",
         "@llvm-project//mlir:ShapeTransforms",
         "@llvm-project//mlir:StandardOps",
diff --git a/integrations/tensorflow/iree_tf_compiler/MHLO/Passes.cpp b/integrations/tensorflow/iree_tf_compiler/MHLO/Passes.cpp
index 4a71f1f..2719f23 100644
--- a/integrations/tensorflow/iree_tf_compiler/MHLO/Passes.cpp
+++ b/integrations/tensorflow/iree_tf_compiler/MHLO/Passes.cpp
@@ -19,6 +19,8 @@
 #include "iree/compiler/Dialect/Shape/Conversion/Passes.h"
 #include "iree/compiler/Dialect/Shape/Transforms/Passes.h"
 #include "mlir-hlo/Dialect/mhlo/transforms/passes.h"
+#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
+#include "mlir/Dialect/SCF/Passes.h"
 #include "mlir/Dialect/Shape/Transforms/Passes.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Pass/PassRegistry.h"
@@ -42,7 +44,9 @@
   pm.addPass(mlir::createInlinerPass());
   pm.addNestedPass<FuncOp>(mhlo::createControlFlowToScfPass());
   pm.addNestedPass<FuncOp>(mhlo::createLegalizeControlFlowPass());
+  pm.addNestedPass<FuncOp>(mlir::createLowerToCFGPass());
   pm.addPass(createFlattenTuplesInCFGPass());
+  pm.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
 
   // Mostly delicate to the IREE side MHLO legalization pipeline, now that
   // we have handled the weird that comes from legacy HLO clients.
diff --git a/integrations/tensorflow/iree_tf_compiler/iree-import-xla-main.cpp b/integrations/tensorflow/iree_tf_compiler/iree-import-xla-main.cpp
index 07562db..dbff7f9 100644
--- a/integrations/tensorflow/iree_tf_compiler/iree-import-xla-main.cpp
+++ b/integrations/tensorflow/iree_tf_compiler/iree-import-xla-main.cpp
@@ -15,12 +15,15 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "mlir-hlo/Dialect/mhlo/IR/register.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/AsmState.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/IR/OperationSupport.h"
 #include "mlir/IR/SymbolTable.h"
+#include "mlir/Parser.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Support/FileUtilities.h"
 #include "tensorflow/compiler/mlir/xla/hlo_to_mlir_hlo.h"
@@ -37,6 +40,7 @@
   binary_proto,
   text_proto,
   hlo_text,
+  mlir_text,
 };
 
 // Error collector that prints errors.
@@ -103,69 +107,40 @@
       llvm::cl::init(""));
   static llvm::cl::opt<XlaFormat> inputFormat(
       "xla-format", cl::desc("XLA Format"),
-      cl::values(clEnumVal(binary_proto, "Parse a binary protocol buffer"),
-                 clEnumVal(text_proto, "Parse a text protocol buffer"),
-                 clEnumVal(hlo_text,
-                           "Parse an HLO module in its native text format")));
+      cl::values(
+          clEnumVal(binary_proto, "Parse a binary protocol buffer"),
+          clEnumVal(text_proto, "Parse a text protocol buffer"),
+          clEnumVal(hlo_text, "Parse an HLO module in its native text format"),
+          clEnumVal(mlir_text, "Parse MLIR text containing MHLO ops")));
 
   // Register any command line options.
   registerAsmPrinterCLOptions();
   registerMLIRContextCLOptions();
+  registerPassManagerCLOptions();
   registerDefaultTimingManagerCLOptions();
   cl::ParseCommandLineOptions(argc, argv);
 
+  auto openInputStream =
+      [&]() -> llvm::Optional<
+                std::pair<std::istream *, std::unique_ptr<std::ifstream>>> {
+    auto fileInputStream = std::make_unique<std::ifstream>();
+    std::istream *inputStream;
+    if (inputPath == "-") {
+      inputStream = &std::cin;
+    } else {
+      fileInputStream->open(inputPath, std::ios::in | std::ios::binary);
+      if (!fileInputStream->is_open()) {
+        llvm::errs() << "Unable to open input file " << inputPath << "\n";
+        return llvm::None;
+      }
+      inputStream = fileInputStream.get();
+    }
+    return std::make_pair(inputStream, std::move(fileInputStream));
+  };
+
   DialectRegistry registry;
-
-  // Read the protocol buffer.
-  std::ifstream fileInputStream;
-  std::istream *inputStream;
-  if (inputPath == "-") {
-    inputStream = &std::cin;
-  } else {
-    fileInputStream.open(inputPath, std::ios::in | std::ios::binary);
-    if (!fileInputStream.is_open()) {
-      llvm::errs() << "Unable to open input file " << inputPath << "\n";
-      return 1;
-    }
-    inputStream = &fileInputStream;
-  }
-
-  xla::HloProto hloProto;
-  switch (inputFormat) {
-    case binary_proto: {
-      if (!hloProto.mutable_hlo_module()->ParseFromIstream(inputStream)) {
-        llvm::errs() << "Could not parse binary protocol buffer from "
-                     << inputPath << "\n";
-        return 1;
-      }
-      break;
-    }
-    case text_proto: {
-      tensorflow::protobuf::TextFormat::Parser parser;
-      PrintErrorCollector collector(inputPath);
-      IStreamCopyingInputStream copyingStream(inputStream);
-      tensorflow::protobuf::io::CopyingInputStreamAdaptor streamAdaptor(
-          &copyingStream);
-      parser.RecordErrorsTo(&collector);
-      parser.Parse(&streamAdaptor, hloProto.mutable_hlo_module());
-      if (collector.hadError) {
-        llvm::errs() << "Unable to parse text format protocol buffer\n";
-        return 1;
-      }
-      break;
-    }
-    case hlo_text: {
-      if (failed(ReadHloTextFormatFromStream(inputStream,
-                                             hloProto.mutable_hlo_module()))) {
-        return 1;
-      }
-      break;
-    }
-    default:
-      llvm_unreachable("illegal XlaFormat");
-  }
-
-  // Convert the Module proto into MLIR.
+  mlir::mhlo::registerAllMhloDialects(registry);
+  registry.insert<mlir::StandardOpsDialect>();
   MLIRContext context;
   OwningModuleRef module = ModuleOp::create(mlir::UnknownLoc::get(&context));
   context.appendDialectRegistry(registry);
@@ -174,12 +149,79 @@
   llvm::SourceMgr sourceMgr;
   mlir::SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context);
 
-  auto status =
-      ConvertHloToMlirHlo(module.get(), hloProto.mutable_hlo_module());
-  if (!status.ok()) {
-    llvm::errs() << "Error converting HLO Module Proto to MLIR: "
-                 << status.ToString() << "\n";
-    return 2;
+  auto loadHloProtoIntoModule = [&](xla::HloProto &hloProto) -> LogicalResult {
+    auto status =
+        ConvertHloToMlirHlo(module.get(), hloProto.mutable_hlo_module());
+    if (!status.ok()) {
+      llvm::errs() << "Error converting HLO Module Proto to MLIR: "
+                   << status.ToString() << "\n";
+      return failure();
+    }
+    return success();
+  };
+
+  switch (inputFormat) {
+    case binary_proto: {
+      xla::HloProto hloProto;
+      auto input = openInputStream();
+      if (!input) {
+        return 1;
+      }
+      if (!hloProto.mutable_hlo_module()->ParseFromIstream(input->first)) {
+        llvm::errs() << "Could not parse binary protocol buffer from "
+                     << inputPath << "\n";
+        return 1;
+      }
+      if (failed(loadHloProtoIntoModule(hloProto))) return 2;
+      break;
+    }
+    case text_proto: {
+      xla::HloProto hloProto;
+      auto input = openInputStream();
+      if (!input) {
+        return 1;
+      }
+      tensorflow::protobuf::TextFormat::Parser parser;
+      PrintErrorCollector collector(inputPath);
+      IStreamCopyingInputStream copyingStream(input->first);
+      tensorflow::protobuf::io::CopyingInputStreamAdaptor streamAdaptor(
+          &copyingStream);
+      parser.RecordErrorsTo(&collector);
+      parser.Parse(&streamAdaptor, hloProto.mutable_hlo_module());
+      if (collector.hadError) {
+        llvm::errs() << "Unable to parse text format protocol buffer\n";
+        return 1;
+      }
+      if (failed(loadHloProtoIntoModule(hloProto))) return 2;
+      break;
+    }
+    case hlo_text: {
+      xla::HloProto hloProto;
+      auto input = openInputStream();
+      if (!input) {
+        return 1;
+      }
+      if (failed(ReadHloTextFormatFromStream(input->first,
+                                             hloProto.mutable_hlo_module()))) {
+        return 1;
+      }
+      if (failed(loadHloProtoIntoModule(hloProto))) return 2;
+      break;
+    }
+    case mlir_text: {
+      std::string errorMessage;
+      auto file = openInputFile(inputPath, &errorMessage);
+      if (!file) {
+        llvm::errs() << errorMessage << "\n";
+        return 1;
+      }
+      sourceMgr.AddNewSourceBuffer(std::move(file), SMLoc());
+      module = parseSourceFile(sourceMgr, &context);
+      if (!module) return 2;
+      break;
+    }
+    default:
+      llvm_unreachable("illegal XlaFormat");
   }
 
   // Find the entry function and annotate it as exported.
diff --git a/iree/compiler/Conversion/LinalgToLLVM/BUILD b/iree/compiler/Conversion/LinalgToLLVM/BUILD
index 18241c8..1fe6525 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/BUILD
+++ b/iree/compiler/Conversion/LinalgToLLVM/BUILD
@@ -15,7 +15,6 @@
     srcs = [
         "ConvertToLLVM.cpp",
         "KernelDispatch.cpp",
-        "LLVMCodeGenOptions.cpp",
         "LinalgTileAndVectorizePass.cpp",
         "LinalgVectorizePass.cpp",
         "LowerExecutableTargetPass.cpp",
@@ -26,7 +25,6 @@
     ],
     hdrs = [
         "KernelDispatch.h",
-        "LLVMCodeGenOptions.h",
         "Passes.h",
     ],
     deps = [
diff --git a/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt b/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
index 2fdfd52..5a8318e 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
+++ b/iree/compiler/Conversion/LinalgToLLVM/CMakeLists.txt
@@ -15,12 +15,10 @@
     LinalgToLLVM
   HDRS
     "KernelDispatch.h"
-    "LLVMCodeGenOptions.h"
     "Passes.h"
   SRCS
     "ConvertToLLVM.cpp"
     "KernelDispatch.cpp"
-    "LLVMCodeGenOptions.cpp"
     "LinalgTileAndVectorizePass.cpp"
     "LinalgVectorizePass.cpp"
     "LowerExecutableTargetPass.cpp"
diff --git a/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp b/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
index 3a4645c..b7354c1 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/ConvertToLLVM.cpp
@@ -5,7 +5,6 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 #include "iree/compiler/Conversion/CodegenUtils/FunctionUtils.h"
-#include "iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h"
 #include "iree/compiler/Conversion/LinalgToLLVM/Passes.h"
 #include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
@@ -594,8 +593,10 @@
 class ConvertToLLVMPass
     : public PassWrapper<ConvertToLLVMPass, OperationPass<ModuleOp>> {
  public:
-  ConvertToLLVMPass(LLVMCodegenOptions options) : options_(options) {}
-
+  ConvertToLLVMPass(bool unfuseFMA = false) { unfuseFMAOps = unfuseFMA; }
+  ConvertToLLVMPass(const ConvertToLLVMPass &pass) {
+    unfuseFMAOps = pass.unfuseFMAOps;
+  }
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<LLVM::LLVMDialect>();
   }
@@ -603,7 +604,10 @@
   void runOnOperation() override;
 
  private:
-  LLVMCodegenOptions options_;
+  Option<bool> unfuseFMAOps{
+      *this, "unfuse-fma-ops",
+      llvm::cl::desc("Enable rewriting llvm.fma to its unfused version."),
+      llvm::cl::init(false)};
 };
 
 }  // namespace
@@ -710,7 +714,7 @@
   // Post conversion patterns.
   {
     OwningRewritePatternList postPatterns(&getContext());
-    if (options_.unfuseFMAOps) {
+    if (unfuseFMAOps) {
       populateUnfusedFMAOpsPassPatterns(&getContext(), postPatterns);
       (void)applyPatternsAndFoldGreedily(module, std::move(postPatterns));
     }
@@ -718,18 +722,15 @@
 }
 
 std::unique_ptr<OperationPass<ModuleOp>> createConvertToLLVMPass(
-    LLVMCodegenOptions options) {
-  return std::make_unique<ConvertToLLVMPass>(options);
+    bool unfuseFMAOps) {
+  return std::make_unique<ConvertToLLVMPass>(unfuseFMAOps);
 }
 
 static PassRegistration<ConvertToLLVMPass> pass(
     "iree-codegen-convert-to-llvm",
     "Perform final conversion from Linalg/HAL/Shape/Vector/Standard to "
     "LLVMIR dialect",
-    [] {
-      return std::make_unique<ConvertToLLVMPass>(
-          getLLVMCodegenOptionsFromClOptions());
-    });
+    [] { return std::make_unique<ConvertToLLVMPass>(); });
 
 }  // namespace iree_compiler
 }  // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.cpp b/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.cpp
deleted file mode 100644
index b38838b..0000000
--- a/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#include "iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h"
-
-#include "llvm/Support/CommandLine.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-static llvm::cl::opt<bool> clConvImg2ColConversion(
-    "iree-codegen-linalg-to-llvm-conv-img2col-conversion",
-    llvm::cl::desc("Enable rewriting linalg.conv_2d_input_nhwc_filter_hwcf "
-                   "linalg.generic that does img2col buffer packing + "
-                   "linag.matmul"),
-    llvm::cl::init(false));
-
-static llvm::cl::opt<bool> clUnfusedFMA(
-    "iree-codegen-linalg-to-llvm-use-unfused-fma",
-    llvm::cl::desc("Enable rewriting llvm.fma to its unfused version."),
-    llvm::cl::init(false));
-
-static llvm::cl::opt<bool> clEnableLinalgOnTensorsToVectors(
-    "iree-codegen-linalg-to-llvm-linalg-on-tensors-to-vectors",
-    llvm::cl::desc("Enable rewriting llvm.fma to its unfused version."),
-    llvm::cl::init(false));
-
-LLVMCodegenOptions getLLVMCodegenOptionsFromClOptions() {
-  LLVMCodegenOptions options;
-  options.useConvImg2Col = clConvImg2ColConversion;
-  options.unfuseFMAOps = clUnfusedFMA;
-  options.useLinalgOnTensorsToVectors = clEnableLinalgOnTensorsToVectors;
-  return options;
-}
-
-}  // namespace iree_compiler
-}  // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h b/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h
deleted file mode 100644
index b9fe214..0000000
--- a/iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef IREE_COMPILER_CONVERSION_LINALGTOLLVM_LLVMCODEGENOPTIONS_H_
-#define IREE_COMPILER_CONVERSION_LINALGTOLLVM_LLVMCODEGENOPTIONS_H_
-
-#include "llvm/ADT/SmallVector.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-// Options used to configure LLVM passes.
-struct LLVMCodegenOptions {
-  bool useConvImg2Col = false;
-  // Target specific options.
-  bool unfuseFMAOps = false;
-  bool useVectorToAarch64 = false;
-  bool useLinalgOnTensorsToVectors = false;
-};
-
-// Returns LLVM CodeGen options from command-line options.
-LLVMCodegenOptions getLLVMCodegenOptionsFromClOptions();
-
-}  // namespace iree_compiler
-}  // namespace mlir
-
-#endif  // IREE_COMPILER_CONVERSION_LINALGTOLLVM_LLVMCODEGENOPTIONS_H_
diff --git a/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndVectorizePass.cpp b/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndVectorizePass.cpp
index 65f83b5..fce276c 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndVectorizePass.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/LinalgTileAndVectorizePass.cpp
@@ -68,11 +68,24 @@
 namespace {
 struct TileAndVectorizeWorkgroups
     : public PassWrapper<TileAndVectorizeWorkgroups, FunctionPass> {
+  TileAndVectorizeWorkgroups(bool vectorize = true)
+      : lowerToVectors(vectorize) {}
+  TileAndVectorizeWorkgroups(const TileAndVectorizeWorkgroups &pass) {
+    lowerToVectors = pass.lowerToVectors;
+  }
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<linalg::LinalgDialect, AffineDialect, scf::SCFDialect,
                     vector::VectorDialect>();
   }
   void runOnFunction() override;
+
+ private:
+  /// TODO(ravishankarm): Option to not generate any `vector.` instructions. The
+  /// VMVX backend uses the same lowering as the CPU pass but there is no
+  /// lowering of these `vector.` operations to scalar code. So as a WAR do the
+  /// same tiling scheme but avoid generating vector instructions. When VMVX can
+  /// handle vector instructions, drop this options.
+  bool lowerToVectors;
 };
 }  // namespace
 
@@ -209,6 +222,10 @@
     }
   }
 
+  if (!lowerToVectors) {
+    return;
+  }
+
   // Apply vectorization patterns.
   {
     OwningRewritePatternList vectorizationPatterns(&getContext());
@@ -281,8 +298,9 @@
   }
 }
 
-std::unique_ptr<FunctionPass> createLinalgTileAndVectorizeWorkgroupsPass() {
-  return std::make_unique<TileAndVectorizeWorkgroups>();
+std::unique_ptr<FunctionPass> createLinalgTileAndVectorizeWorkgroupsPass(
+    bool lowerToVectors) {
+  return std::make_unique<TileAndVectorizeWorkgroups>(lowerToVectors);
 }
 
 static PassRegistration<TileAndVectorizeWorkgroups> pass(
diff --git a/iree/compiler/Conversion/LinalgToLLVM/LowerExecutableTargetPass.cpp b/iree/compiler/Conversion/LinalgToLLVM/LowerExecutableTargetPass.cpp
index 0354a35..d307ea2 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/LowerExecutableTargetPass.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/LowerExecutableTargetPass.cpp
@@ -32,9 +32,12 @@
                     LLVM::LLVMDialect>();
   }
 
-  LowerExecutableTargetPass(LLVMCodegenOptions options) : options(options) {}
-  LowerExecutableTargetPass(const LowerExecutableTargetPass &pass)
-      : options(pass.options) {}
+  LowerExecutableTargetPass(bool vectorize = true)
+      : lowerToVectors(vectorize) {}
+  LowerExecutableTargetPass(const LowerExecutableTargetPass &pass) {
+    invokeLoweringPipelines = pass.invokeLoweringPipelines;
+    lowerToVectors = pass.lowerToVectors;
+  }
 
   void runOnOperation() override;
 
@@ -47,7 +50,12 @@
           "can be set to false for testing purposes."),
       llvm::cl::init(true)};
 
-  LLVMCodegenOptions options;
+  /// TODO(ravishankarm): Option to not generate any `vector.` instructions. The
+  /// VMVX backend uses the same lowering as the CPU pass but there is no
+  /// lowering of these `vector.` operations to scalar code. So as a WAR do the
+  /// same tiling scheme but avoid generating vector instructions. When VMVX can
+  /// handle vector instructions, drop this options.
+  bool lowerToVectors;
 };
 }  // namespace
 
@@ -64,19 +72,19 @@
   OpPassManager executableLoweringPipeline(
       IREE::HAL::ExecutableTargetOp::getOperationName());
   executableLoweringPipeline.addPass(createSetNumWorkgroupsPass());
+  OpPassManager &nestedModulePM = executableLoweringPipeline.nest<ModuleOp>();
 
   if (invokeLoweringPipelines) {
     IREE::HAL::DispatchLoweringPassPipeline passPipeline =
         setPipeline.getValue();
     switch (passPipeline) {
       case IREE::HAL::DispatchLoweringPassPipeline::CPUDefault:
-        addCPUDefaultPassPipeline(executableLoweringPipeline, options);
+        addCPUDefaultPassPipeline(nestedModulePM);
         break;
       case IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization:
-        addCPUVectorizationPassPipeline(executableLoweringPipeline, options);
+        addCPUVectorizationPassPipeline(nestedModulePM, lowerToVectors);
         break;
     }
-    addLowerToLLVMPasses(executableLoweringPipeline, options);
   }
 
   if (failed(runPipeline(executableLoweringPipeline, targetOp))) {
@@ -85,18 +93,15 @@
 }
 
 std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>>
-createLowerExecutableTargetPass(LLVMCodegenOptions options) {
-  return std::make_unique<LowerExecutableTargetPass>(options);
+createLowerExecutableTargetPass(bool lowerToVectors) {
+  return std::make_unique<LowerExecutableTargetPass>(lowerToVectors);
 }
 
 static PassRegistration<LowerExecutableTargetPass> pass(
     "iree-lower-executable-target-pass",
-    "Perform lowering of executable target to export dialects. Currently "
-    "lowers to LLVM dialect",
-    [] {
-      return std::make_unique<LowerExecutableTargetPass>(
-          getLLVMCodegenOptionsFromClOptions());
-    });
+    "Perform lowering of executable target using one of the "
+    "IREE::HAL::DispatchLoweringPassPipeline",
+    [] { return std::make_unique<LowerExecutableTargetPass>(); });
 
 }  // namespace iree_compiler
 }  // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp b/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
index 9cc021b..3264e6d 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
+++ b/iree/compiler/Conversion/LinalgToLLVM/Passes.cpp
@@ -26,77 +26,72 @@
 }
 
 void addCPUVectorizationPassPipeline(OpPassManager &passManager,
-                                     LLVMCodegenOptions options) {
-  OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
-  nestedModulePM.addPass(createCanonicalizerPass());
+                                     bool lowerToVectors) {
+  passManager.addPass(createCanonicalizerPass());
 
   // TODO(ataei): This causes segmentation fault on Android. Fix it and
   // re-enable.
-  // nestedModulePM.addNestedPass<FuncOp>(createPadLinalgWorkgroupTilesPass());
+  // passManager.addNestedPass<FuncOp>(createPadLinalgWorkgroupTilesPass());
 
-  // TODO(ataei): We want to enable when tensor -> vector pass is fully
-  // supported which requires first moving vector-tiling before this step.
-  if (options.useLinalgOnTensorsToVectors) {
-    nestedModulePM.addNestedPass<FuncOp>(createLinalgVectorizePass());
-  }
   // Use stack allocation on CPU side.
-  addLinalgBufferizePasses(nestedModulePM, cpuAllocationFunction);
+  addLinalgBufferizePasses(passManager, cpuAllocationFunction);
 
   // Tile and vectorize linalg ops.
-  nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
-  nestedModulePM.addNestedPass<FuncOp>(
-      createLinalgTileAndVectorizeWorkgroupsPass());
-  nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
-  nestedModulePM.addNestedPass<FuncOp>(createForOpCanonicalizationPass());
+  passManager.addNestedPass<FuncOp>(createCanonicalizerPass());
+  passManager.addNestedPass<FuncOp>(
+      createLinalgTileAndVectorizeWorkgroupsPass(lowerToVectors));
+  passManager.addNestedPass<FuncOp>(createCanonicalizerPass());
+  passManager.addNestedPass<FuncOp>(createForOpCanonicalizationPass());
 
-  nestedModulePM.addNestedPass<FuncOp>(createPlanConvLoopOrderPass());
+  passManager.addNestedPass<FuncOp>(createPlanConvLoopOrderPass());
 }
 
-void addCPUDefaultPassPipeline(OpPassManager &passManager,
-                               LLVMCodegenOptions options) {
-  OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
-  nestedModulePM.addPass(createCanonicalizerPass());
+void addCPUDefaultPassPipeline(OpPassManager &passManager) {
+  passManager.addPass(createCanonicalizerPass());
   // Use stack allocation on CPU side.
-  addLinalgBufferizePasses(nestedModulePM, cpuAllocationFunction);
-  nestedModulePM.addNestedPass<FuncOp>(createPlanConvLoopOrderPass());
+  addLinalgBufferizePasses(passManager, cpuAllocationFunction);
+  passManager.addNestedPass<FuncOp>(createPlanConvLoopOrderPass());
 }
 
-void addLowerToLLVMPasses(OpPassManager &passManager,
-                          LLVMCodegenOptions options) {
-  OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
+static void addLowerToLLVMPasses(
+    OpPassManager &passManager,
+    const LLVMTransformPassPipelineOptions &options) {
   // Linalg -> SCF
-  nestedModulePM.addNestedPass<FuncOp>(createConvertLinalgToLoopsPass());
-  nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
-  nestedModulePM.addNestedPass<FuncOp>(createCSEPass());
+  passManager.addNestedPass<FuncOp>(createConvertLinalgToLoopsPass());
+  passManager.addNestedPass<FuncOp>(createCanonicalizerPass());
+  passManager.addNestedPass<FuncOp>(createCSEPass());
 
   // SCF -> STD
-  nestedModulePM.addNestedPass<FuncOp>(createLowerToCFGPass());
-  nestedModulePM.addNestedPass<FuncOp>(createCanonicalizerPass());
-  nestedModulePM.addNestedPass<FuncOp>(createCSEPass());
+  passManager.addNestedPass<FuncOp>(createLowerToCFGPass());
+  passManager.addNestedPass<FuncOp>(createCanonicalizerPass());
+  passManager.addNestedPass<FuncOp>(createCSEPass());
 
   // Handled tensor-type constants.
-  nestedModulePM.addPass(createTensorConstantBufferizePass());
-  nestedModulePM.addPass(createFoldTensorExtractOpPass());
+  passManager.addPass(createTensorConstantBufferizePass());
+  passManager.addPass(createFoldTensorExtractOpPass());
 
   // (HAL, IREE, Linalg, STD) -> LLVM
-  nestedModulePM.addPass(createConvertToLLVMPass(options));
+  passManager.addPass(createConvertToLLVMPass());
 
-  nestedModulePM.addPass(createCanonicalizerPass());
-  nestedModulePM.addPass(createCSEPass());
+  passManager.addPass(createCanonicalizerPass());
+  passManager.addPass(createCSEPass());
 }
 
-void buildLLVMTransformPassPipeline(OpPassManager &passManager,
-                                    LLVMCodegenOptions options) {
-  passManager.addPass(createLowerExecutableTargetPass(options));
+void buildLLVMTransformPassPipeline(
+    OpPassManager &passManager,
+    const LLVMTransformPassPipelineOptions &options) {
+  OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
+  addLowerToLLVMPasses(nestedModulePM, options);
 }
 
-static PassPipelineRegistration<> linalgLLVMVPipeline(
-    "iree-codegen-linalg-to-llvm-pipeline",
-    "Runs the progressive lowering pipeline from Linalg to LLVM",
-    [](OpPassManager &passManager) {
-      buildLLVMTransformPassPipeline(passManager,
-                                     getLLVMCodegenOptionsFromClOptions());
-    });
+static PassPipelineRegistration<LLVMTransformPassPipelineOptions>
+    linalgLLVMVPipeline(
+        "iree-codegen-linalg-to-llvm-pipeline",
+        "Runs the progressive lowering pipeline from Linalg to LLVM",
+        [](OpPassManager &passManager,
+           const LLVMTransformPassPipelineOptions &options) {
+          buildLLVMTransformPassPipeline(passManager, options);
+        });
 
 }  // namespace iree_compiler
 }  // namespace mlir
diff --git a/iree/compiler/Conversion/LinalgToLLVM/Passes.h b/iree/compiler/Conversion/LinalgToLLVM/Passes.h
index 7f626d6..252c2b0 100644
--- a/iree/compiler/Conversion/LinalgToLLVM/Passes.h
+++ b/iree/compiler/Conversion/LinalgToLLVM/Passes.h
@@ -7,9 +7,10 @@
 #ifndef IREE_COMPILER_CONVERSION_LINALGTOLLVM_PASSES_H_
 #define IREE_COMPILER_CONVERSION_LINALGTOLLVM_PASSES_H_
 
-#include "iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h"
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
+#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
 #include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassOptions.h"
 
 namespace mlir {
 namespace iree_compiler {
@@ -22,12 +23,9 @@
 /// vector size.
 std::unique_ptr<OperationPass<FuncOp>> createPadLinalgWorkgroupTilesPass();
 
-/// Distributes linalg ops among hal.interface.workgroup logical threads.
-std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>>
-createLinalgTileAndDistributePass();
-
 /// Vectorizes linalg ops executed in the same hal.interface.workgroup.
-std::unique_ptr<FunctionPass> createLinalgTileAndVectorizeWorkgroupsPass();
+std::unique_ptr<FunctionPass> createLinalgTileAndVectorizeWorkgroupsPass(
+    bool lowerToVectors = true);
 
 /// Replaces llvm.intr.fma with its unfused mul and add ops.
 std::unique_ptr<FunctionPass> createUnfusedFMAOpsPass();
@@ -37,42 +35,50 @@
 
 /// Performs the final conversion to LLVM dialect.
 std::unique_ptr<OperationPass<ModuleOp>> createConvertToLLVMPass(
-    LLVMCodegenOptions options);
+    bool unfuseFMAOps = false);
 
 /// Pass to convert Linalg ops into vector operations.
 std::unique_ptr<FunctionPass> createLinalgVectorizePass();
 
 //===----------------------------------------------------------------------===//
-// Pass Pipelines for CPU Lowering
+// Pass Pipelines for CPU Lowering.
 //===----------------------------------------------------------------------===//
 
 /// Populates the passes to lower to scalars operations for linalg based
 /// code-generation. This pipeline does not vectorize, but instead just converts
 /// to memrefs
-void addCPUDefaultPassPipeline(OpPassManager &passManager,
-                               LLVMCodegenOptions options);
+void addCPUDefaultPassPipeline(OpPassManager &passManager);
 
 /// Populates the passes needed to lower to vector operations using linalg based
 /// progressive lowering with vectorization after bufferization.
 void addCPUVectorizationPassPipeline(OpPassManager &passManager,
-                                     LLVMCodegenOptions options);
-
-/// Populates the passes needed to lower scalar/native vector code to LLVM
-/// Dialect.
-void addLowerToLLVMPasses(OpPassManager &passManager,
-                          LLVMCodegenOptions options);
+                                     bool lowerToVectors = true);
 
 /// Pass to lower the module an hal.executable.target operation to external
 /// dialect. Currently this pass lowers to LLVM dialect, but could be
 /// generalized to lower to any "final" dialect like SPIR-V/NVVM, etc.
 std::unique_ptr<OperationPass<IREE::HAL::ExecutableTargetOp>>
-createLowerExecutableTargetPass(LLVMCodegenOptions options);
+createLowerExecutableTargetPass(bool lowerToVectors = true);
+
+//===----------------------------------------------------------------------===//
+// Pass Pipelines for lowering to LLVM dialect.
+//===----------------------------------------------------------------------===//
+
+/// Options for LLVM pipeline.
+struct LLVMTransformPassPipelineOptions
+    : public PassPipelineOptions<LLVMTransformPassPipelineOptions> {
+  Option<bool> unfuseFMAOps{
+      *this, "unfuse-fma-ops",
+      llvm::cl::desc("Enable rewriting llvm.fma to its unfused version."),
+      llvm::cl::init(false)};
+};
 
 /// Populates passes needed to lower a XLA HLO op to LLVM dialect via the
 /// structured ops path. The pass manager `pm` in here should operate on the
 /// module within the IREE::HAL::ExecutableOp.
-void buildLLVMTransformPassPipeline(OpPassManager &passManager,
-                                    LLVMCodegenOptions options);
+void buildLLVMTransformPassPipeline(
+    OpPassManager &passManager,
+    const LLVMTransformPassPipelineOptions &options);
 
 }  // namespace iree_compiler
 }  // namespace mlir
diff --git a/iree/compiler/Conversion/init_conversions.h b/iree/compiler/Conversion/init_conversions.h
index 60c27fd..7a74a17 100644
--- a/iree/compiler/Conversion/init_conversions.h
+++ b/iree/compiler/Conversion/init_conversions.h
@@ -57,7 +57,8 @@
 
 inline void registerLinalgToLLVMPasses() {
   static bool init_once = []() {
-    createLowerExecutableTargetPass(LLVMCodegenOptions());
+    createLowerExecutableTargetPass();
+    createLinalgVectorizePass();
     // LinalgToLLVM
     createLinalgTileAndVectorizeWorkgroupsPass();
     createUnfusedFMAOpsPass();
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
index 6c79804..ad08cb0 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
@@ -8,7 +8,6 @@
 
 #include <cstdlib>
 
-#include "iree/compiler/Conversion/LinalgToLLVM/LLVMCodeGenOptions.h"
 #include "iree/compiler/Conversion/LinalgToLLVM/Passes.h"
 #include "iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h"
 #include "iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.h"
@@ -74,13 +73,13 @@
   }
 
   void buildTranslationPassPipeline(OpPassManager &passManager) override {
-    auto codeGenOptions = getLLVMCodegenOptionsFromClOptions();
+    passManager.addPass(createLowerExecutableTargetPass());
     // Set target specific options.
     // TODO(ataei): This is temporary here, should move when target specific
     // overrides options grows.
     llvm::Triple triple(options_.targetTriple);
+    LLVMTransformPassPipelineOptions codeGenOptions;
     if (triple.isWasm()) {
-      // WebAssembly does not (yet) support FMA ops natively, so unfuse them.
       codeGenOptions.unfuseFMAOps = true;
     }
     buildLLVMTransformPassPipeline(passManager, codeGenOptions);
diff --git a/iree/compiler/Dialect/Modules/VMVX/Transforms/Passes.cpp b/iree/compiler/Dialect/Modules/VMVX/Transforms/Passes.cpp
index 94829f1..9438429 100644
--- a/iree/compiler/Dialect/Modules/VMVX/Transforms/Passes.cpp
+++ b/iree/compiler/Dialect/Modules/VMVX/Transforms/Passes.cpp
@@ -27,44 +27,18 @@
 namespace IREE {
 namespace VMVX {
 
-// NOTE:
-// NOTE:    THIS IS ALL JUST A HACK
-// NOTE:
-// NOTE:    this entire pipeline needs to be reworked - it's been randomly
-// NOTE:    constructed to "work" for a few samples by someone who does not
-// NOTE:    understand the codegen system :)
-// NOTE:
-
 static void buildVectorVMVXTransformPassPipeline(OpPassManager &passManager) {
+  // For now lower using the default CPU pass-pipeline which doesn't
+  // vectorize. When VMVX can lower vector operations, this can be relaxed.
+  passManager.addPass(
+      createLowerExecutableTargetPass(/*lowerToVectors=*/false));
+
   OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
 
   // ---------------------------------------------------------------------------
-  // Configuration
-  // ---------------------------------------------------------------------------
-
-  // TODO(#5925): This can also be modified to just use the dynamic pass
-  // pipeline like the CPU side.
-  // passManager.addPass(createMaterializeCPULaunchConfigurationPass());
-  passManager.addPass(createSetNumWorkgroupsPass());
-
-  // ---------------------------------------------------------------------------
   // Linalg -> Vectors
   // ---------------------------------------------------------------------------
 
-  nestedModulePM.addNestedPass<FuncOp>(createLinalgVectorizePass());
-
-  // Use stack allocation for transient buffers.
-  WorkgroupMemoryAllocationFn allocationFn =
-      [](OpBuilder &builder, Location loc, ArrayRef<int64_t> staticShape,
-         Type elementType, ArrayRef<Value> dynamicSizes) {
-        MemRefType allocType = MemRefType::get(staticShape, elementType);
-        return builder.create<memref::AllocaOp>(loc, allocType, dynamicSizes);
-      };
-  addLinalgBufferizePasses(nestedModulePM, allocationFn);
-  nestedModulePM.addPass(createPromoteBuffersToStackPass(
-      /*maxAllocSizeInBytes=*/1 << 10, /*bitwidthOfIndexType=*/32,
-      /*maxRankOfAllocatedMemRef=*/10));
-
   nestedModulePM.addNestedPass<FuncOp>(createResolveShapeOpsPass());
   nestedModulePM.addNestedPass<FuncOp>(
       Shape::createCleanupShapePlaceholdersPass());
diff --git a/iree/test/e2e/llvm_specific/BUILD b/iree/test/e2e/llvm_specific/BUILD
deleted file mode 100644
index e79dfcf..0000000
--- a/iree/test/e2e/llvm_specific/BUILD
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright 2019 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-# Tests for end-to-end IREE support specific to the LLVM lowering.
-# TODO(ravishankarm): Reorganize these tests.
-
-load("//build_tools/bazel:iree_check_test.bzl", "iree_check_single_backend_test_suite")
-
-package(
-    default_visibility = ["//visibility:public"],
-    features = ["layering_check"],
-    licenses = ["notice"],  # Apache 2.0
-)
-
-iree_check_single_backend_test_suite(
-    name = "check_llvm-aot-conv_img2col",
-    srcs = [
-        "conv.mlir",
-    ],
-    compiler_flags = [
-        "-iree-input-type=mhlo",
-        "-iree-codegen-linalg-to-llvm-conv-img2col-conversion=true",
-    ],
-    driver = "dylib",
-    target_backend = "dylib-llvm-aot",
-)
diff --git a/iree/test/e2e/llvm_specific/CMakeLists.txt b/iree/test/e2e/llvm_specific/CMakeLists.txt
deleted file mode 100644
index ab49aff..0000000
--- a/iree/test/e2e/llvm_specific/CMakeLists.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-################################################################################
-# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
-# iree/test/e2e/llvm_specific/BUILD                                            #
-#                                                                              #
-# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
-# CMake-only content.                                                          #
-#                                                                              #
-# To disable autogeneration for this file entirely, delete this header.        #
-################################################################################
-
-iree_add_all_subdirs()
-
-iree_check_single_backend_test_suite(
-  NAME
-    check_llvm-aot-conv_img2col
-  SRCS
-    "conv.mlir"
-  TARGET_BACKEND
-    "dylib-llvm-aot"
-  DRIVER
-    "dylib"
-  COMPILER_FLAGS
-    "-iree-input-type=mhlo"
-    "-iree-codegen-linalg-to-llvm-conv-img2col-conversion=true"
-)
-
-### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/test/e2e/llvm_specific/conv.mlir b/iree/test/e2e/llvm_specific/conv.mlir
deleted file mode 100644
index e969347..0000000
--- a/iree/test/e2e/llvm_specific/conv.mlir
+++ /dev/null
@@ -1,257 +0,0 @@
-func @conv2d_nopadding() attributes { iree.module.export } {
-  %inputs = iree.unfoldable_constant dense<[[
-      [[ 1.0,  2.0], [ 3.0,  4.0], [ 5.0,  6.0], [ 7.0,  8.0]],
-      [[11.0, 12.0], [13.0, 14.0], [15.0, 16.0], [17.0, 18.0]],
-      [[21.0, 22.0], [23.0, 24.0], [25.0, 26.0], [27.0, 28.0]],
-      [[31.0, 32.0], [33.0, 34.0], [35.0, 36.0], [37.0, 38.0]]]]> : tensor<1x4x4x2xf32>
-  %weights = iree.unfoldable_constant dense<[
-      [[[ 1.0], [ 2.0]], [[ 3.0], [ 4.0]]],
-      [[[ 5.0], [ 6.0]], [[ 7.0], [ 8.0]]],
-      [[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
-  %res = "mhlo.convolution"(%inputs, %weights) {
-        batch_group_count = 1 : i64,
-        dimension_numbers = {
-          input_batch_dimension = 0 : i64,
-          input_feature_dimension = 3 : i64,
-          input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
-          kernel_input_feature_dimension = 2 : i64,
-          kernel_output_feature_dimension = 3 : i64,
-          kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
-          output_batch_dimension = 0 : i64,
-          output_feature_dimension = 3 : i64,
-          output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
-        feature_group_count = 1 : i64,
-        rhs_dilation = dense<1> : tensor<2xi64>,
-        window_strides = dense<1> : tensor<2xi64>} : (tensor<1x4x4x2xf32>, tensor<3x2x2x1xf32>) -> tensor<1x2x3x1xf32>
-  check.expect_almost_eq_const(%res, dense<[[
-      [[1310.0],[1466.0],[1622.0]],
-      [[2090.0],[2246.0],[2402.0]]
-  ]]> : tensor<1x2x3x1xf32>) : tensor<1x2x3x1xf32>
-  return
-}
-
-func @conv2d_1452x3221_same() attributes { iree.module.export } {
-  %inputs = iree.unfoldable_constant dense<[[
-      [[ 1.0,  2.0], [ 3.0,  4.0], [ 5.0,  6.0], [ 7.0,  8.0], [ 9.0, 10.0]],
-      [[11.0, 12.0], [13.0, 14.0], [15.0, 16.0], [17.0, 18.0], [19.0, 20.0]],
-      [[21.0, 22.0], [23.0, 24.0], [25.0, 26.0], [27.0, 28.0], [29.0, 30.0]],
-      [[31.0, 32.0], [33.0, 34.0], [35.0, 36.0], [37.0, 38.0], [39.0, 40.0]]]]> : tensor<1x4x5x2xf32>
-  %weights = iree.unfoldable_constant dense<[
-      [[[ 1.0], [ 2.0]], [[ 3.0], [ 4.0]]],
-      [[[ 5.0], [ 6.0]], [[ 7.0], [ 8.0]]],
-      [[[ 9.0], [10.0]], [[11.0], [12.0]]]]> : tensor<3x2x2x1xf32>
-  %res = "mhlo.convolution"(%inputs, %weights) {
-       batch_group_count = 1 : i64,
-       dimension_numbers = {
-         input_batch_dimension = 0 : i64,
-         input_feature_dimension = 3 : i64,
-         input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
-         kernel_input_feature_dimension = 2 : i64,
-         kernel_output_feature_dimension = 3 : i64,
-         kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
-         output_batch_dimension = 0 : i64,
-         output_feature_dimension = 3 : i64,
-         output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
-       feature_group_count = 1 : i64,
-       padding = dense<[[1, 1], [0, 1]]> : tensor<2x2xi64>,
-       rhs_dilation = dense<1> : tensor<2xi64>,
-       window_strides = dense<1> : tensor<2xi64>} :
-       (tensor<1x4x5x2xf32>, tensor<3x2x2x1xf32>) -> tensor<1x4x5x1xf32>
-  check.expect_almost_eq_const(%res,  dense<[[
-    [[ 600.0], [ 736.0], [ 872.0], [1008.0], [ 476.0]],
-    [[1310.0], [1466.0], [1622.0], [1778.0], [ 805.0]],
-    [[2090.0], [2246.0], [2402.0], [2558.0], [1135.0]],
-    [[1080.0], [1152.0], [1224.0], [1296.0], [ 524.0]]]]> : tensor<1x4x5x1xf32>) : tensor<1x4x5x1xf32>
-  return
-}
-
-func @conv2d_2451x2311_same() attributes { iree.module.export } {
-  %inputs = iree.unfoldable_constant dense<[
-      [[[ 1.0], [ 2.0], [ 3.0], [ 4.0], [ 5.0]],
-       [[ 6.0], [ 7.0], [ 8.0], [ 9.0], [10.0]],
-       [[11.0], [12.0], [13.0], [14.0], [15.0]],
-       [[16.0], [17.0], [18.0], [19.0], [20.0]]],
-      [[[21.0], [22.0], [23.0], [24.0], [25.0]],
-       [[26.0], [27.0], [28.0], [29.0], [30.0]],
-       [[31.0], [32.0], [33.0], [34.0], [35.0]],
-       [[36.0], [37.0], [38.0], [39.0], [40.0]]]]> : tensor <2x4x5x1xf32>
-  %weights = iree.unfoldable_constant dense<[
-      [[[1.0]], [[2.0]], [[3.0]]],
-      [[[4.0]], [[5.0]], [[6.0]]]]> : tensor <2x3x1x1xf32>
-  %res = "mhlo.convolution"(%inputs, %weights) {
-       batch_group_count = 1 : i64,
-       dimension_numbers = {
-         input_batch_dimension = 0 : i64,
-         input_feature_dimension = 3 : i64,
-         input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
-         kernel_input_feature_dimension = 2 : i64,
-         kernel_output_feature_dimension = 3 : i64,
-         kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
-         output_batch_dimension = 0 : i64,
-         output_feature_dimension = 3 : i64,
-         output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
-       feature_group_count = 1 : i64,
-       padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>,
-       rhs_dilation = dense<1> : tensor<2xi64>,
-       window_strides = dense<1> : tensor<2xi64>} :
-       (tensor<2x4x5x1xf32>, tensor<2x3x1x1xf32>) -> tensor<2x4x5x1xf32>
-  check.expect_almost_eq_const(%res, dense<[
-    [[[ 80.0], [121.0], [142.0], [163.0], [100.0]],
-     [[160.0], [226.0], [247.0], [268.0], [160.0]],
-     [[240.0], [331.0], [352.0], [373.0], [220.0]],
-     [[ 83.0], [104.0], [110.0], [116.0], [ 59.0]]],
-    [[[400.0], [541.0], [562.0], [583.0], [340.0]],
-     [[480.0], [646.0], [667.0], [688.0], [400.0]],
-     [[560.0], [751.0], [772.0], [793.0], [460.0]],
-     [[183.0], [224.0], [230.0], [236.0], [119.0]]]]> : tensor<2x4x5x1xf32>) : tensor<2x4x5x1xf32>
-  return
-}
-
-func @conv2d_no_padding2() attributes { iree.module.export } {
-  %inputs = iree.unfoldable_constant dense<[
-       [[[  1.0,   2.0,   3.0],
-         [  4.0,   5.0,   6.0],
-         [  7.0,   8.0,   9.0],
-         [ 10.0,  11.0,  12.0],
-         [ 13.0,  14.0,  15.0]],
-        [[ 16.0,  17.0,  18.0],
-         [ 19.0,  20.0,  21.0],
-         [ 22.0,  23.0,  24.0],
-         [ 25.0,  26.0,  27.0],
-         [ 28.0,  29.0,  30.0]],
-        [[ 31.0,  32.0,  33.0],
-         [ 34.0,  35.0,  36.0],
-         [ 37.0,  38.0,  39.0],
-         [ 40.0,  41.0,  42.0],
-         [ 43.0,  44.0,  45.0]],
-        [[ 46.0,  47.0,  48.0],
-         [ 49.0,  50.0,  51.0],
-         [ 52.0,  53.0,  54.0],
-         [ 55.0,  56.0,  57.0],
-         [ 58.0,  59.0,  60.0]]],
-       [[[ 61.0,  62.0,  63.0],
-         [ 64.0,  65.0,  66.0],
-         [ 67.0,  68.0,  69.0],
-         [ 70.0,  71.0,  72.0],
-         [ 73.0,  74.0,  75.0]],
-        [[ 76.0,  77.0,  78.0],
-         [ 79.0,  80.0,  81.0],
-         [ 82.0,  83.0,  84.0],
-         [ 85.0,  86.0,  87.0],
-         [ 88.0,  89.0,  90.0]],
-        [[ 91.0,  92.0,  93.0],
-         [ 94.0,  95.0,  96.0],
-         [ 97.0,  98.0,  99.0],
-         [100.0, 101.0, 102.0],
-         [103.0, 104.0, 105.0]],
-        [[106.0, 107.0, 108.0],
-         [109.0, 110.0, 111.0],
-         [112.0, 113.0, 114.0],
-         [115.0, 116.0, 117.0],
-         [118.0, 119.0, 120.0]]]]> : tensor<2x4x5x3xf32>
-  %weights = iree.unfoldable_constant dense<[
-      [[[  1.0,   2.0,   3.0,   4.0,   5.0,   6.0],
-        [  7.0,   8.0,   9.0,  10.0,  11.0,  12.0],
-        [ 13.0,  14.0,  15.0,  16.0,  17.0,  18.0]],
-       [[ 19.0,  20.0,  21.0,  22.0,  23.0,  24.0],
-        [ 25.0,  26.0,  27.0,  28.0,  29.0,  30.0],
-        [ 31.0,  32.0,  33.0,  34.0,  35.0,  36.0]],
-       [[ 37.0,  38.0,  39.0,  40.0,  41.0,  42.0],
-        [ 43.0,  44.0,  45.0,  46.0,  47.0,  48.0],
-        [ 49.0,  50.0,  51.0,  52.0,  53.0,  54.0]]],
-      [[[ 55.0,  56.0,  57.0,  58.0,  59.0,  60.0],
-        [ 61.0,  62.0,  63.0,  64.0,  65.0,  66.0],
-        [ 67.0,  68.0,  69.0,  70.0,  71.0,  72.0]],
-       [[ 73.0,  74.0,  75.0,  76.0,  77.0,  78.0],
-        [ 79.0,  80.0,  81.0,  82.0,  83.0,  84.0],
-        [ 85.0,  86.0,  87.0,  88.0,  89.0,  90.0]],
-       [[ 91.0,  92.0,  93.0,  94.0,  95.0,  96.0],
-        [ 97.0,  98.0,  99.0, 100.0, 101.0, 102.0],
-        [103.0, 104.0, 105.0, 106.0, 107.0, 108.0]]]]> : tensor<2x3x3x6xf32>
-  %res = "mhlo.convolution"(%inputs, %weights) {
-       batch_group_count = 1 : i64,
-       dimension_numbers = {
-         input_batch_dimension = 0 : i64,
-         input_feature_dimension = 3 : i64,
-         input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>,
-         kernel_input_feature_dimension = 2 : i64,
-         kernel_output_feature_dimension = 3 : i64,
-         kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>,
-         output_batch_dimension = 0 : i64,
-         output_feature_dimension = 3 : i64,
-         output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>},
-       feature_group_count = 1 : i64,
-       rhs_dilation = dense<1> : tensor<2xi64>,
-       window_strides = dense<1> : tensor<2xi64>} :
-       (tensor<2x4x5x3xf32>, tensor<2x3x3x6xf32>) -> tensor<2x3x3x6xf32>
-  check.expect_almost_eq_const(%res, dense<[
-      [[[16065.0,  16290.0,  16515.0,  16740.0,  16965.0,  17190.0],
-        [18873.0,  19152.0,  19431.0,  19710.0,  19989.0,  20268.0],
-        [21681.0,  22014.0,  22347.0,  22680.0,  23013.0,  23346.0]],
-       [[30105.0,  30600.0,  31095.0,  31590.0,  32085.0,  32580.0],
-        [32913.0,  33462.0,  34011.0,  34560.0,  35109.0,  35658.0],
-        [35721.0,  36324.0,  36927.0,  37530.0,  38133.0,  38736.0]],
-       [[44145.0,  44910.0,  45675.0,  46440.0,  47205.0,  47970.0],
-        [46953.0,  47772.0,  48591.0,  49410.0,  50229.0,  51048.0],
-        [49761.0,  50634.0,  51507.0,  52380.0,  53253.0,  54126.0]]],
-      [[[72225.0,  73530.0,  74835.0,  76140.0,  77445.0,  78750.0],
-        [75033.0,  76392.0,  77751.0,  79110.0,  80469.0,  81828.0],
-        [77841.0,  79254.0,  80667.0,  82080.0,  83493.0,  84906.0]],
-       [[86265.0,  87840.0,  89415.0,  90990.0,  92565.0,  94140.0],
-        [89073.0,  90702.0,  92331.0,  93960.0,  95589.0,  97218.0],
-        [91881.0,  93564.0,  95247.0,  96930.0,  98613.0, 100296.0]],
-       [[100305.0, 102150.0, 103995.0, 105840.0, 107685.0, 109530.0],
-        [103113.0, 105012.0, 106911.0, 108810.0, 110709.0, 112608.0],
-        [105921.0, 107874.0, 109827.0, 111780.0, 113733.0, 115686.0]]]]> : tensor<2x3x3x6xf32>) : tensor<2x3x3x6xf32>
-  return
-}
-
-func @conv_1d() {
-  %inputs = iree.unfoldable_constant dense<2.0> : tensor<3x8x1xf32>
-  %weights = iree.unfoldable_constant dense<2.0> : tensor<3x1x1xf32>
-  %res = "mhlo.convolution"(%inputs, %weights) {
-    batch_group_count = 1 : i64,
-    dimension_numbers = {
-      input_batch_dimension = 0 : i64,
-      input_feature_dimension = 2 : i64,
-      input_spatial_dimensions = dense<[1]> : tensor<1xi64>,
-      kernel_input_feature_dimension = 1 : i64,
-      kernel_output_feature_dimension = 2 : i64,
-      kernel_spatial_dimensions = dense<[0]> : tensor<1xi64>,
-      output_batch_dimension = 0 : i64,
-      output_feature_dimension = 2 : i64,
-      output_spatial_dimensions = dense<[1]> : tensor<1xi64>
-    },
-    feature_group_count = 1 : i64,
-    padding = dense<0> : tensor<1x2xi64>,
-    rhs_dilation = dense<1> : tensor<1xi64>,
-    window_strides = dense<1> : tensor<1xi64>
-  } : (tensor<3x8x1xf32>, tensor<3x1x1xf32>) -> tensor<3x6x1xf32>
-  check.expect_almost_eq_const(%res, dense<12.0> : tensor<3x6x1xf32>) : tensor<3x6x1xf32>
-  return
-}
-
-func @conv_3d() {
-  %inputs = iree.unfoldable_constant dense<1.0> : tensor<2x8x8x8x3xf32>
-  %weights = iree.unfoldable_constant dense<1.0> : tensor<2x2x2x3x2xf32>
-  %res = "mhlo.convolution"(%inputs, %weights) {
-    batch_group_count = 1 : i64,
-    dimension_numbers = {
-      input_batch_dimension = 0 : i64,
-      input_feature_dimension = 4 : i64,
-      input_spatial_dimensions = dense<[1, 2, 3]> : tensor<3xi64>,
-      kernel_input_feature_dimension = 3 : i64,
-      kernel_output_feature_dimension = 4 : i64,
-      kernel_spatial_dimensions = dense<[0, 1, 2]> : tensor<3xi64>,
-      output_batch_dimension = 0 : i64,
-      output_feature_dimension = 4 : i64,
-      output_spatial_dimensions = dense<[1, 2, 3]> : tensor<3xi64>
-    },
-    feature_group_count = 1 : i64,
-    padding = dense<0> : tensor<3x2xi64>,
-    rhs_dilation = dense<1> : tensor<3xi64>,
-    window_strides = dense<1> : tensor<3xi64>
-  } : (tensor<2x8x8x8x3xf32>, tensor<2x2x2x3x2xf32>) -> tensor<2x7x7x7x2xf32>
-  check.expect_almost_eq_const(%res, dense<24.0> : tensor<2x7x7x7x2xf32>) : tensor<2x7x7x7x2xf32>
-  return
-}
diff --git a/iree/tools/init_mlir_passes.h b/iree/tools/init_mlir_passes.h
index e1e337e..8c076d0 100644
--- a/iree/tools/init_mlir_passes.h
+++ b/iree/tools/init_mlir_passes.h
@@ -64,6 +64,7 @@
   // SCF
   registerSCFParallelLoopFusionPass();
   registerSCFParallelLoopTilingPass();
+  registerSCFToStandardPass();
 
   // Quant
   quant::registerQuantPasses();