Connect transform dialect passes into IREE. (#8602)

* Add a hook to invoke passes from Sandbox.

This commit adds a flag `-iree-codegen-use-sandbox-passes` that can be
used to transformations from sandbox within IREE in an e2e manner.
For now the pipeline just
- Sets the number of workgroups to {1, 1, 1} for sequential execution.
- Bufferizes the operations and lowers them to loops and LLVM IR.

* Connect transform dialect passes into IREE.

This revision provides a first connection between IREE and the transform dialect by
allowing the orthogonal specification of a linalg_transform file (policy) that is
parsed and applied on the fly.

Co-authored-by: Mahesh Ravishankar <ravishankarm@google.com>
diff --git a/build_tools/bazel_to_cmake/bazel_to_cmake_targets.py b/build_tools/bazel_to_cmake/bazel_to_cmake_targets.py
index e35479c..3c3faf1 100644
--- a/build_tools/bazel_to_cmake/bazel_to_cmake_targets.py
+++ b/build_tools/bazel_to_cmake/bazel_to_cmake_targets.py
@@ -28,6 +28,11 @@
     "//llvm-external-projects/iree-dialects:IREEPyDMTransforms": [
         "IREEPyDMPasses"
     ],
+    "//llvm-external-projects/iree-dialects:IREELinalgTransformDialect": [
+        "IREELinalgTransformDialect"
+    ],
+    "//llvm-external-projects/iree-dialects:IREELinalgTransformDialectTransforms":
+        ["IREELinalgTransformDialectTransforms"],
 
     # Disable all hard-coded codegen targets (they are expanded dynamically
     # in CMake).
@@ -82,6 +87,8 @@
     "@llvm-project//mlir:TensorDialect": ["MLIRTensor"],
     "@llvm-project//mlir:NVVMDialect": ["MLIRNVVMIR"],
     "@llvm-project//mlir:ROCDLDialect": ["MLIRROCDLIR"],
+    "@llvm-project//mlir:PDLDialect": ["MLIRPDL"],
+    "@llvm-project//mlir:PDLInterpDialect": ["MLIRPDLInterp"],
     # MHLO.
     # TODO: Rework this upstream so that Bazel and CMake rules match up
     # better.
@@ -175,6 +182,7 @@
     "@vulkan_memory_allocator//:impl_header_only": ["vulkan_memory_allocator"],
 }
 
+
 def _convert_mlir_target(target):
   # Default to a pattern substitution approach.
   # Take "MLIR" and append the name part of the full target identifier, e.g.
diff --git a/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp b/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp
index 69ad361..049cc15 100644
--- a/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp
+++ b/iree/compiler/Codegen/Common/IREEComprehensiveBufferizePass.cpp
@@ -75,12 +75,20 @@
         memCpyFn(memCpyFn) {}
 
   void getDependentDialects(DialectRegistry &registry) const override {
+    // clang-format off
     registry
-        .insert<arith::ArithmeticDialect, IREE::Util::UtilDialect,
-                linalg::LinalgDialect, memref::MemRefDialect, scf::SCFDialect,
-                func::FuncDialect, tensor::TensorDialect, vector::VectorDialect,
-                AffineDialect, IREE::Flow::FlowDialect,
-                bufferization::BufferizationDialect>();
+        .insert<AffineDialect,
+                arith::ArithmeticDialect,
+                bufferization::BufferizationDialect,
+                func::FuncDialect,
+                IREE::Flow::FlowDialect,
+                IREE::Util::UtilDialect,
+                linalg::LinalgDialect,
+                memref::MemRefDialect,
+                scf::SCFDialect,
+                tensor::TensorDialect,
+                vector::VectorDialect>();
+    // clang-format on
   }
 
   void runOnOperation() override;
diff --git a/iree/compiler/Codegen/Dialect/LoweringConfig.td b/iree/compiler/Codegen/Dialect/LoweringConfig.td
index 65b2593..3e3fdc4 100644
--- a/iree/compiler/Codegen/Dialect/LoweringConfig.td
+++ b/iree/compiler/Codegen/Dialect/LoweringConfig.td
@@ -21,27 +21,29 @@
     : I32EnumAttrCase<"CPUConvTileAndDecomposeExpert", 3>;
 def CPU_TileFuseAndVectorize
     : I32EnumAttrCase<"CPUTileFuseAndVectorize", 4>;
+def CPU_SandboxCodegen
+    : I32EnumAttrCase<"LinalgTransformInterpCodegen", 5>;
 
 def LLVMGPU_SimpleDistribute
-    : I32EnumAttrCase<"LLVMGPUDistribute", 5>;
+    : I32EnumAttrCase<"LLVMGPUDistribute",6>;
 def LLVMGPU_Vectorize
-    : I32EnumAttrCase<"LLVMGPUVectorize", 6>;
+    : I32EnumAttrCase<"LLVMGPUVectorize", 7>;
 def LLVMGPU_MatmulSimt
-    : I32EnumAttrCase<"LLVMGPUMatmulSimt", 7>;
+    : I32EnumAttrCase<"LLVMGPUMatmulSimt", 8>;
 def LLVMGPU_MatmulTensorCore
-    : I32EnumAttrCase<"LLVMGPUMatmulTensorCore", 8>;
+    : I32EnumAttrCase<"LLVMGPUMatmulTensorCore", 9>;
 
 def SPIRV_Distribute
-    : I32EnumAttrCase<"SPIRVDistribute", 9>;
+    : I32EnumAttrCase<"SPIRVDistribute", 10>;
 def SPIRV_DistributeCopy
-    : I32EnumAttrCase<"SPIRVDistributeCopy", 10>;
+    : I32EnumAttrCase<"SPIRVDistributeCopy", 11>;
 def SPIRV_Vectorize
-    : I32EnumAttrCase<"SPIRVVectorize", 11>;
+    : I32EnumAttrCase<"SPIRVVectorize", 12>;
 def SPIRV_VectorizeToCooperativeOps
-    : I32EnumAttrCase<"SPIRVVectorizeToCooperativeOps", 12>;
+    : I32EnumAttrCase<"SPIRVVectorizeToCooperativeOps", 13>;
 
 def None
-    : I32EnumAttrCase<"None", 13>;
+    : I32EnumAttrCase<"None", 14>;
 
 // EnumAttrCase for all known lowerings for ops within dispatch region
 // to scalar/native-vector code.
@@ -50,9 +52,9 @@
     "identifier for pass pipeline use to lower dispatch region",
     [CPU_Default, CPU_SingleTilingExpert, CPU_DoubleTilingExpert,
      CPU_ConvTileAndDecomposeExpert, CPU_TileFuseAndVectorize,
-     LLVMGPU_SimpleDistribute, LLVMGPU_Vectorize, LLVMGPU_MatmulSimt,
-     LLVMGPU_MatmulTensorCore, SPIRV_Distribute, SPIRV_DistributeCopy,
-     SPIRV_Vectorize, SPIRV_VectorizeToCooperativeOps,
+     CPU_SandboxCodegen, LLVMGPU_SimpleDistribute, LLVMGPU_Vectorize,
+     LLVMGPU_MatmulSimt, LLVMGPU_MatmulTensorCore, SPIRV_Distribute,
+     SPIRV_DistributeCopy, SPIRV_Vectorize, SPIRV_VectorizeToCooperativeOps,
      None]> {
   let cppNamespace = "::mlir::iree_compiler::IREE::Codegen";
   // Don't generate a C++ class! We want to use the AttrDef
diff --git a/iree/compiler/Codegen/LLVMCPU/BUILD b/iree/compiler/Codegen/LLVMCPU/BUILD
index 2a7956d..1402e67 100644
--- a/iree/compiler/Codegen/LLVMCPU/BUILD
+++ b/iree/compiler/Codegen/LLVMCPU/BUILD
@@ -42,6 +42,8 @@
         "//iree/compiler/Utils",
         "//llvm-external-projects/iree-dialects:IREELinalgExtDialect",
         "//llvm-external-projects/iree-dialects:IREELinalgExtPasses",
+        "//llvm-external-projects/iree-dialects:IREELinalgTransformDialect",
+        "//llvm-external-projects/iree-dialects:IREELinalgTransformDialectTransforms",
         "@llvm-project//llvm:Support",
         "@llvm-project//mlir:AffineToStandardTransforms",
         "@llvm-project//mlir:Analysis",
@@ -49,6 +51,7 @@
         "@llvm-project//mlir:ArithmeticTransforms",
         "@llvm-project//mlir:ArmNeon",
         "@llvm-project//mlir:ArmNeon2dToIntr",
+        "@llvm-project//mlir:BufferizationDialect",
         "@llvm-project//mlir:CFGTransforms",
         "@llvm-project//mlir:ControlFlowToLLVM",
         "@llvm-project//mlir:DialectUtils",
@@ -68,6 +71,8 @@
         "@llvm-project//mlir:MemRefDialect",
         "@llvm-project//mlir:MemRefToLLVM",
         "@llvm-project//mlir:MemRefTransforms",
+        "@llvm-project//mlir:PDLDialect",
+        "@llvm-project//mlir:PDLInterpDialect",
         "@llvm-project//mlir:Pass",
         "@llvm-project//mlir:ReconcileUnrealizedCasts",
         "@llvm-project//mlir:SCFDialect",
diff --git a/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt b/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt
index 8e004cd..c3f0c3d 100644
--- a/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt
+++ b/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt
@@ -28,6 +28,8 @@
   DEPS
     IREELinalgExtDialect
     IREELinalgExtPasses
+    IREELinalgTransformDialect
+    IREELinalgTransformDialectTransforms
     LLVMSupport
     MLIRAffineToStandard
     MLIRAnalysis
@@ -35,6 +37,7 @@
     MLIRArithmeticTransforms
     MLIRArmNeon
     MLIRArmNeon2dToIntr
+    MLIRBufferization
     MLIRControlFlowToLLVM
     MLIRFunc
     MLIRFuncToLLVM
@@ -51,6 +54,8 @@
     MLIRMemRef
     MLIRMemRefToLLVM
     MLIRMemRefTransforms
+    MLIRPDL
+    MLIRPDLInterp
     MLIRPass
     MLIRReconcileUnrealizedCasts
     MLIRSCF
diff --git a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index db883f2..234d588 100644
--- a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -63,6 +63,12 @@
         "linalg.generic and linalg.indexed_generic workgroup tile size"),
     llvm::cl::init(64));
 
+static llvm::cl::opt<bool> useLinalgTransformInterp(
+    "iree-codegen-use-linalg-transform-interp",
+    llvm::cl::desc(
+        "experimental path to use the linalg transform dialect interpreter"),
+    llvm::cl::init(false));
+
 using IREE::Codegen::DispatchLoweringPassPipeline;
 
 /// Looks for the `native_vector_size` attribute in the hal.executable.variant
@@ -913,6 +919,17 @@
     auto entryPointOp = entryPointOps.lookup(funcOp.getName());
     if (!entryPointOp) continue;
     if (getTranslationInfo(entryPointOp)) continue;
+
+    // If using sandbox passes, currently set the workload_per_wg to be
+    // empty for single-threaded execution.
+    if (useLinalgTransformInterp) {
+      auto translationInfo = IREE::Codegen::TranslationInfoAttr::get(
+          moduleOp.getContext(), IREE::Codegen::DispatchLoweringPassPipeline::
+                                     LinalgTransformInterpCodegen);
+      setTranslationInfo(funcOp, translationInfo);
+      continue;
+    }
+
     SmallVector<Operation *> computeOps;
     SmallVector<LoopTilingAndDistributionInfo> tiledLoops;
 
diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
index 4d7f89d..e087c25 100644
--- a/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
+++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
@@ -4,13 +4,17 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+#include "iree-dialects/Dialect/LinalgTransform/LinalgTransformOps.h"
 #include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
 #include "iree/compiler/Codegen/LLVMCPU/KernelDispatch.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
 #include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/PDL/IR/PDL.h"
+#include "mlir/Dialect/PDLInterp/IR/PDLInterp.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Pass/PassRegistry.h"
@@ -33,9 +37,19 @@
   LLVMCPULowerExecutableTargetPass(
       const LLVMCPULowerExecutableTargetPass &pass) {}
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<IREE::Codegen::IREECodegenDialect, IREE::HAL::HALDialect,
-                    linalg::LinalgDialect, LLVM::LLVMDialect, scf::SCFDialect,
+    // clang-format off
+    registry.insert<IREE::Codegen::IREECodegenDialect,
+                    IREE::HAL::HALDialect,
+                    bufferization::BufferizationDialect,
+                    linalg::LinalgDialect,
+                    linalg::transform::LinalgTransformDialect,
+                    LLVM::LLVMDialect,
+                    pdl::PDLDialect,
+                    pdl_interp::PDLInterpDialect,
+                    scf::SCFDialect,
+                    tensor::TensorDialect,
                     vector::VectorDialect>();
+    // clang-format on
   }
 
   void runOnOperation() override;
@@ -101,6 +115,11 @@
 void LLVMCPULowerExecutableTargetPass::runOnOperation() {
   IREE::HAL::ExecutableVariantOp variantOp = getOperation();
   ModuleOp moduleOp = variantOp.getInnerModule();
+  if (!variantOp || !moduleOp) {
+    getOperation()->emitError(
+        "Expected a variantOp root with an inner ModuleOp");
+    return signalPassFailure();
+  }
 
   OpPassManager executableLoweringPipeline(
       IREE::HAL::ExecutableVariantOp::getOperationName());
@@ -186,6 +205,10 @@
             addConvTileAndDecomposeExpertPassPipeline(nestedModulePM);
             break;
           case IREE::Codegen::DispatchLoweringPassPipeline::
+              LinalgTransformInterpCodegen:
+            addLinalgTransformInterpPasses(executableLoweringPipeline);
+            break;
+          case IREE::Codegen::DispatchLoweringPassPipeline::
               CPUTileFuseAndVectorize:
             addTileFuseAndVectorizePassPipeline(nestedModulePM, lowerToVectors);
             break;
diff --git a/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/iree/compiler/Codegen/LLVMCPU/Passes.cpp
index 1d376b2..ed19536 100644
--- a/iree/compiler/Codegen/LLVMCPU/Passes.cpp
+++ b/iree/compiler/Codegen/LLVMCPU/Passes.cpp
@@ -8,6 +8,7 @@
 
 #include "iree-dialects/Dialect/LinalgExt/IR/TiledOpInterface.h"
 #include "iree-dialects/Dialect/LinalgExt/Passes/Passes.h"
+#include "iree-dialects/Dialect/LinalgTransform/Passes.h"
 #include "iree/compiler/Codegen/LLVMCPU/KernelDispatch.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Sandbox/Passes.h"
@@ -357,6 +358,29 @@
   addLinalgBufferizePasses(passManager, cpuAllocationFunction);
 }
 
+void addLinalgTransformInterpPasses(OpPassManager &passManager) {
+  // Sets the number of workgroups to be {1, 1, 1} for now.
+  passManager.addPass(createSetNumWorkgroupsPass());
+
+  // Give control to the linalg_transform dialect.
+  passManager.addPass(createLinalgTransformInterpreterPass());
+  // Dropping the schedule is only needed if we want to embed the transform in
+  // the module: we should drop the schedule once applied.
+  // This pass does nothing in the case where we apply a separate policy
+  // through a file.
+  passManager.addPass(createDropSchedulePass());
+
+  OpPassManager &modulePM = passManager.nest<ModuleOp>();
+  // Bufferize the dispatch.
+  BufferizationOptions::AllocationFn allocationFn =
+      cpuComprehensiveBufferizeAllocationFn;
+  BufferizationOptions::DeallocationFn deallocationFn =
+      cpuComprehensiveBufferizeDeallocationFn;
+  BufferizationOptions::MemCpyFn memcpyFn = cpuComprehensiveBufferizeCopyFn;
+  addIREEComprehensiveBufferizePasses(modulePM, allocationFn, deallocationFn,
+                                      memcpyFn);
+}
+
 static void addLowerToLLVMPasses(OpPassManager &passManager) {
   // LinalgExt -> SCF
   passManager.addNestedPass<FuncOp>(
diff --git a/iree/compiler/Codegen/LLVMCPU/test/BUILD b/iree/compiler/Codegen/LLVMCPU/test/BUILD
index 57e06d0..7124d07 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/BUILD
+++ b/iree/compiler/Codegen/LLVMCPU/test/BUILD
@@ -25,6 +25,7 @@
             "hal_interface_constants.mlir",
             "hal_interface_workgroup_info.mlir",
             "illegal_configuration.mlir",
+            "linalg_transform.mlir",
             "materialize_launch_configuration.mlir",
             "synchronize_symbol_visibility.mlir",
             "test_config_mmt4d.mlir",
@@ -34,8 +35,13 @@
             "vector_contract_to_arm_intrinsics.mlir",
         ],
         include = ["*.mlir"],
+        # linalg_transform_spec is a an MLIR file that specifies a
+        # transformation, it needs to be included as data.
+        exclude = ["linalg_transform_spec.mlir"],
     ),
+    data = ["linalg_transform_spec.mlir"],
     tools = [
+        "//iree/tools:iree-compile",
         "//iree/tools:iree-opt",
         "@llvm-project//llvm:FileCheck",
     ],
diff --git a/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt b/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt
index 1cb2a67..b776444 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt
+++ b/iree/compiler/Codegen/LLVMCPU/test/CMakeLists.txt
@@ -19,6 +19,7 @@
     "hal_interface_constants.mlir"
     "hal_interface_workgroup_info.mlir"
     "illegal_configuration.mlir"
+    "linalg_transform.mlir"
     "materialize_launch_configuration.mlir"
     "synchronize_symbol_visibility.mlir"
     "test_config_mmt4d.mlir"
@@ -28,7 +29,10 @@
     "vector_contract_to_arm_intrinsics.mlir"
   TOOLS
     FileCheck
+    iree::tools::iree-compile
     iree::tools::iree-opt
+  DATA
+    linalg_transform_spec.mlir
 )
 
 ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/compiler/Codegen/LLVMCPU/test/linalg_transform.mlir b/iree/compiler/Codegen/LLVMCPU/test/linalg_transform.mlir
new file mode 100644
index 0000000..68bcef4
--- /dev/null
+++ b/iree/compiler/Codegen/LLVMCPU/test/linalg_transform.mlir
@@ -0,0 +1,32 @@
+// RUN: iree-opt %s  -pass-pipeline='hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target))' --iree-codegen-use-linalg-transform-interp --linalg-transform-file-name=%p/linalg_transform_spec.mlir | FileCheck %s
+
+#device_target_cpu = #hal.device.target<"cpu", {executable_targets = [#hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>]}>
+#executable_layout = #hal.executable.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>
+#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>
+
+hal.executable private @pad_matmul_static_dispatch_0 {
+  hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+    hal.executable.entry_point public @pad_matmul_static_dispatch_0 ordinal(0) layout(#executable_layout)
+    builtin.module {
+      func @pad_matmul_static_dispatch_0() {
+        %c0 = arith.constant 0 : index
+        %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:250x500xf32>
+        %1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readonly:500x1020xf32>
+        %2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) offset(%c0) alignment(64) : !flow.dispatch.tensor<readwrite:250x1020xf32>
+        %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [250, 500], strides = [1, 1] : !flow.dispatch.tensor<readonly:250x500xf32> -> tensor<250x500xf32>
+        %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [500, 1020], strides = [1, 1] : !flow.dispatch.tensor<readonly:500x1020xf32> -> tensor<500x1020xf32>
+        %5 = flow.dispatch.tensor.load %2, offsets = [0, 0], sizes = [250, 1020], strides = [1, 1] : !flow.dispatch.tensor<readwrite:250x1020xf32> -> tensor<250x1020xf32>
+        //  CHECK-NOT: flow
+        //      CHECK: scf.for
+        // CHECK-NEXT:   subview
+        // CHECK-NEXT:   subview
+        // CHECK-NEXT:   matmul{{.*}}ins{{.*}}outs
+        // CHECK-NEXT: }
+        // CHECK-NEXT: return
+        %6 = linalg.matmul ins(%3, %4 : tensor<250x500xf32>, tensor<500x1020xf32>) outs(%5 : tensor<250x1020xf32>) -> tensor<250x1020xf32>
+        flow.dispatch.tensor.store %6, %2, offsets = [0, 0], sizes = [250, 1020], strides = [1, 1] : tensor<250x1020xf32> -> !flow.dispatch.tensor<readwrite:250x1020xf32>
+        return
+      }
+    }
+  }
+}
diff --git a/iree/compiler/Codegen/LLVMCPU/test/linalg_transform_spec.mlir b/iree/compiler/Codegen/LLVMCPU/test/linalg_transform_spec.mlir
new file mode 100644
index 0000000..31803ef
--- /dev/null
+++ b/iree/compiler/Codegen/LLVMCPU/test/linalg_transform_spec.mlir
@@ -0,0 +1,14 @@
+// RUN: iree-opt %s 
+
+pdl.pattern @pdl_target : benefit(1) {
+  %args = operands
+  %results = types
+  %0 = operation "linalg.matmul"(%args : !pdl.range<value>) -> (%results : !pdl.range<type>)
+  // TODO: we don't want this, but it is the required terminator for pdl.pattern
+  rewrite %0 with "iree_linalg_transform.apply"
+}
+
+iree_linalg_transform.sequence {
+  %0 = match @pdl_target
+  tile %0 {sizes = [2]}
+}
diff --git a/iree/compiler/Codegen/Passes.h b/iree/compiler/Codegen/Passes.h
index ddda5eb..12beb29 100644
--- a/iree/compiler/Codegen/Passes.h
+++ b/iree/compiler/Codegen/Passes.h
@@ -248,6 +248,11 @@
 // convolution ops using the Codegen drivers from sandbox.
 void addConvTileAndDecomposeExpertPassPipeline(OpPassManager &passManager);
 
+/// Populates the passes from Sandbox for testing transformations from sandbox.
+/// Unlike other pipelines this pass mangaer is nested at the
+/// `hal.executable.variant` op.
+void addLinalgTransformInterpPasses(OpPassManager &passManager);
+
 /// Populates the passes needed to multi level tile, fuse and vectorize lowering
 /// of linalg ops on tensors to vectors operations.
 void addTileFuseAndVectorizePassPipeline(OpPassManager &passManager,
diff --git a/iree/tools/init_iree_dialects.h b/iree/tools/init_iree_dialects.h
index d32c5f4..755c7c3 100644
--- a/iree/tools/init_iree_dialects.h
+++ b/iree/tools/init_iree_dialects.h
@@ -15,6 +15,7 @@
 #include "iree-dialects/Dialect/Input/InputDialect.h"
 #include "iree-dialects/Dialect/LinalgExt/IR/LinalgExtDialect.h"
 #include "iree-dialects/Dialect/LinalgExt/IR/TiledOpInterface.h"
+#include "iree-dialects/Dialect/LinalgTransform/LinalgTransformOps.h"
 #include "iree-dialects/Dialect/PyDM/IR/PyDMDialect.h"
 #include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
 #include "iree/compiler/Codegen/Interfaces/Interfaces.h"
@@ -37,6 +38,7 @@
                   IREE::Flow::FlowDialect,
                   IREE::HAL::HALDialect,
                   IREE::LinalgExt::IREELinalgExtDialect,
+                  mlir::linalg::transform::LinalgTransformDialect,
                   IREE::Stream::StreamDialect,
                   IREE::Util::UtilDialect,
                   IREE::VM::VMDialect,
diff --git a/iree/tools/init_mlir_dialects.h b/iree/tools/init_mlir_dialects.h
index 198f98c..b429cdf 100644
--- a/iree/tools/init_mlir_dialects.h
+++ b/iree/tools/init_mlir_dialects.h
@@ -22,6 +22,8 @@
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/PDL/IR/PDL.h"
+#include "mlir/Dialect/PDLInterp/IR/PDLInterp.h"
 #include "mlir/Dialect/Quant/QuantOps.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
@@ -49,6 +51,8 @@
                   linalg::LinalgDialect,
                   math::MathDialect,
                   memref::MemRefDialect,
+                  pdl::PDLDialect,
+                  pdl_interp::PDLInterpDialect,
                   scf::SCFDialect,
                   quant::QuantizationDialect,
                   spirv::SPIRVDialect,
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/Transforms/TransformInterpreter.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/Transforms/TransformInterpreter.cpp
index 286e313..96bd277 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/Transforms/TransformInterpreter.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/Transforms/TransformInterpreter.cpp
@@ -279,12 +279,16 @@
     // same module as the IR. The considered ModuleOp is either `getOperation()`
     // if it is already a ModuleOp, or the first parent ModuleOp.
     if (clTransformFileName.empty()) {
+      LLVM_DEBUG(DBGS() << getArgument()
+                        << " with transform embedded in module\n");
       ModuleOp module = dyn_cast<ModuleOp>(getOperation());
       if (!module)
         module = getOperation()->getParentOfType<ModuleOp>();
       return runTransformModuleOnOperation(module, getOperation());
     }
 
+    LLVM_DEBUG(DBGS() << getArgument() << " with transform "
+                      << clTransformFileName << "\n");
     // If a transform file is specified, parse its content into a ModuleOp.
     std::string errorMessage;
     auto memoryBuffer = openInputFile(clTransformFileName, &errorMessage);