iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp - 3p/openxla/iree - Git at Google

 // Copyright 2020 The IREE Authors
 //
 // Licensed under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 #include "iree/compiler/Codegen/LLVMCPU/KernelDispatch.h"

 #include "iree/compiler/Codegen/Transforms/Transforms.h"
 #include "iree/compiler/Codegen/Utils/MarkerUtils.h"
 #include "iree/compiler/Codegen/Utils/Utils.h"
 #include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
 #include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
 #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/CommandLine.h"
 #include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"

 namespace mlir {
 namespace iree_compiler {

 // TODO(ravishankarm): This needs to be put in a common place for the CPU and
 // GPU backends to use.
 static llvm::cl::list<unsigned> clLLVMTileSizes(
     "iree-llvm-tile-size",
     llvm::cl::desc("Set tile sizes to use for tiling Linalg operations in "
                    "LLVM code generation"),
     llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated);

 static llvm::cl::opt<int> matmulWorkgroupTileSize(
     "iree-codegen-llvm-matmul-workgroup-size",
     llvm::cl::desc(
         "linalg.matmul tile size for workgroups spliting of M, N dimension"),
     llvm::cl::init(64));
 static llvm::cl::opt<int> matmulL1TileSize(
     "iree-codegen-llvm-matmul-l1-size",
     llvm::cl::desc(
         "linalg.matmul tile size for L1 spliting of M, N, K dimension"),
     llvm::cl::init(32));
 static llvm::cl::opt<int> matmulVectorSize(
     "iree-codegen-llvm-matmul-vector-size",
     llvm::cl::desc("linalg.matmul vector tile size"), llvm::cl::init(4));

 static llvm::cl::opt<int> batchMatmulWorkgroupTileSize(
     "iree-codegen-llvm-batch-matmul-workgroup-size",
     llvm::cl::desc("linalg.batch_matmul tile size for workgroups spliting of "
                    "M, N dimension"),
     llvm::cl::init(32));
 static llvm::cl::opt<int> batchMatmulL1TileSize(
     "iree-codegen-llvm-batch-matmul-l1-size",
     llvm::cl::desc("linalg.batch_matmul tile size for L1 spliting of M, N, K "
                    "dimensions"),
     llvm::cl::init(16));
 static llvm::cl::opt<int> batchMatmulL2TileSize(
     "iree-codegen-llvm-batch-matmul-vector-size",
     llvm::cl::desc("linalg.batch_matmul vector tile size"), llvm::cl::init(4));

 static llvm::cl::list<int> mmt4dWorkgroupTileSizes(
     "iree-codegen-llvm-mmt4d-workgroup-tile-sizes",
     llvm::cl::desc("linalg.mmt4d workgroup tile size"), llvm::cl::ZeroOrMore,
     llvm::cl::MiscFlags::CommaSeparated);

 static llvm::cl::list<int> mmt4dL1TileSizes(
     "iree-codegen-llvm-mmt4d-l1-tile-size",
     llvm::cl::desc("linalg.mmt4d L1 tile size"), llvm::cl::ZeroOrMore,
     llvm::cl::MiscFlags::CommaSeparated);

 static llvm::cl::list<int> mmt4dVectorSizes(
     "iree-codegen-llvm-mmt4d-vector-size",
     llvm::cl::desc("linalg.mmt4d vector tile size"), llvm::cl::ZeroOrMore,
     llvm::cl::MiscFlags::CommaSeparated);

 static llvm::cl::opt<int> defaultWorkgroupTileSize(
     "iree-codegen-llvm-generic-ops-workgroup-size",
     llvm::cl::desc(
         "linalg.generic and linalg.indexed_generic workgroup tile size"),
     llvm::cl::init(128));

 /// Sets the lowering configuration for dispatch region with root op that
 /// implements the contraction operation interface.
 static LogicalResult setRootConfig(
     FuncOp entryPointFn, linalg::ContractionOpInterface contractionOp) {
   if (getLoweringConfig(contractionOp)) return success();
   if (contractionOp.isRowMajorMatmul()) {
     int mWorkgroupSize = matmulWorkgroupTileSize;
     int nWorkgroupSize = matmulWorkgroupTileSize;
     int mL1TileSize = matmulL1TileSize;
     int nL1TileSize = matmulL1TileSize;
     int kL1TileSize = matmulL1TileSize;
     auto lhsShape = getUntiledShape(contractionOp.lhs());
     auto rhsShape = getUntiledShape(contractionOp.rhs());
     if (!lhsShape.empty() && !rhsShape.empty()) {
       // Find largest tile size that is a multiple of the vector size.
       auto getTileSize = [](int dim, int maxSize) {
         if (dim == ShapedType::kDynamicSize) return maxSize;
         if (dim < matmulVectorSize) return matmulVectorSize.getValue();
         for (int i = std::min(maxSize, dim); i > 0; --i) {
           if (dim % i == 0 && i % matmulVectorSize == 0) {
             return i;
           }
         }
         return maxSize;
       };
       mWorkgroupSize = getTileSize(lhsShape[0], mWorkgroupSize);
       nWorkgroupSize = getTileSize(rhsShape[1], nWorkgroupSize);
       mL1TileSize = getTileSize(mWorkgroupSize, mL1TileSize);
       nL1TileSize = getTileSize(nWorkgroupSize, nL1TileSize);
       kL1TileSize = getTileSize(rhsShape[0], kL1TileSize);
     }
     TileSizesListType tileSizes = {
         {mWorkgroupSize, nWorkgroupSize},
         {mL1TileSize, nL1TileSize, kL1TileSize},
         {matmulVectorSize, matmulVectorSize, matmulVectorSize}};
     SmallVector<int64_t, 4> nativeVectorSize = {
         matmulVectorSize, matmulVectorSize, matmulVectorSize};
     return setOpConfigAndEntryPointFnTranslation(
         entryPointFn, contractionOp, tileSizes, nativeVectorSize,
         IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization);
   }
   if (contractionOp.isRowMajorBatchMatmul()) {
     // TODO(ataei, ravishankarm): This should just use the configuration for
     // matmul above. setting the tile size to 1 for all the batch dimensions.
     TileSizesListType tileSizes = {
         {1, batchMatmulWorkgroupTileSize, batchMatmulWorkgroupTileSize},
         {1, batchMatmulL1TileSize, batchMatmulL1TileSize,
          batchMatmulL1TileSize},
         {1, batchMatmulL2TileSize, batchMatmulL2TileSize,
          batchMatmulL2TileSize}};
     SmallVector<int64_t, 4> nativeVectorSize = {
         1, batchMatmulL2TileSize, batchMatmulL2TileSize, batchMatmulL2TileSize};
     return setOpConfigAndEntryPointFnTranslation(
         entryPointFn, contractionOp, tileSizes, nativeVectorSize,
         IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization);
   }
   return success();
 }

 /// Sets the lowering configuration for dispatch region for linalg.mmt4d root op
 static LogicalResult setRootConfig(FuncOp entryPointFn,
                                    linalg::Mmt4DOp mmt4dOp) {
   // TODO(ataei): These are hand tuned for some performance benchmarks for now,
   // we want to adapt the same strategy as matmul that dynamically sets tile
   // size.
   auto getWorkgroupTileSizes = [&]() -> SmallVector<int64_t> {
     if (!mmt4dWorkgroupTileSizes.empty()) {
       return SmallVector<int64_t>(mmt4dWorkgroupTileSizes.begin(),
                                   mmt4dWorkgroupTileSizes.end());
     }
     return {64, 32};
   };

   auto getL1TileSizes = [&]() -> SmallVector<int64_t> {
     if (!mmt4dL1TileSizes.empty()) {
       return SmallVector<int64_t>(mmt4dL1TileSizes.begin(),
                                   mmt4dL1TileSizes.end());
     }
     return {1, 1, 4, 4, 1, 4};
   };

   auto getVectorSizes = [&]() -> SmallVector<int64_t> {
     if (!mmt4dVectorSizes.empty()) {
       return SmallVector<int64_t>(mmt4dVectorSizes.begin(),
                                   mmt4dVectorSizes.end());
     }
     return {1, 1, 4, 4, 1, 4};
   };

   SmallVector<int64_t, 4> nativeVectorSize = getVectorSizes();

   TileSizesListType tileSizes = {getWorkgroupTileSizes(), getL1TileSizes(),
                                  nativeVectorSize};

   return setOpConfigAndEntryPointFnTranslation(
       entryPointFn, mmt4dOp, tileSizes, nativeVectorSize,
       IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization);
 }

 /// Sets the lowering configuration for dispatch region with root op being a
 /// generic op.
 static LogicalResult setDefaultRootConfig(FuncOp entryPointFn, Operation *op) {
   if (getLoweringConfig(op)) return success();
   auto partitionedLoops = getPartitionedLoops(op);
   if (partitionedLoops.empty()) {
     // Return success without doing anything. Eventually default will be used.
     return success();
   }
   unsigned maxDepth = partitionedLoops.back() + 1;
   SmallVector<int64_t, 4> workgroupTileSizes(maxDepth,
                                              defaultWorkgroupTileSize);
   llvm::DenseSet<unsigned> partitionedLoopsSet(partitionedLoops.begin(),
                                                partitionedLoops.end());
   for (auto dim : llvm::seq<int64_t>(0, workgroupTileSizes.size())) {
     if (!partitionedLoopsSet.count(dim)) {
       workgroupTileSizes[dim] = 0;
     }
   }
   TileSizesListType tileSizes = {workgroupTileSizes};
   return setOpConfigAndEntryPointFnTranslation(
       entryPointFn, op, tileSizes, /*nativeVectorSizes=*/ArrayRef<int64_t>{},
       IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization);
 }

 /// Finds the root operation in the given list of linalg operations and sets its
 /// configuration. Returns the root operation.
 static LogicalResult setRootConfig(FuncOp entryPointFn,
                                    ArrayRef<Operation *> computeOps) {
   Operation *rootOp = nullptr;
   for (auto computeOp : computeOps) {
     if (!hasMarker(computeOp, getWorkgroupMarker())) continue;

     auto setRootConfigFn = [&](Operation *op) -> LogicalResult {
       return TypeSwitch<Operation *, LogicalResult>(op)
           .Case<linalg::Mmt4DOp, linalg::ContractionOpInterface>(
               [&](auto op) { return setRootConfig(entryPointFn, op); })
           .Default([&](Operation *op) { return success(); });
     };

     if (failed(setRootConfigFn(computeOp))) {
       return failure();
     }

     if (getLoweringConfig(computeOp)) {
       if (rootOp) {
         return computeOp->emitError(
             "unhandled multiple roots in dispatch region");
       }
       rootOp = computeOp;
       continue;
     }
   }

   // If no root operation found, check if the dispatch region contains a single
   // generic op and chose pipeline based on that.
   if (!rootOp) {
     for (auto computeOp : computeOps) {
       if (!hasMarker(computeOp, getWorkgroupMarker())) continue;
       // Ignore fill ops. They never end up in their own dispatch, so are never
       // root ops.
       if (isa<linalg::FillOp>(computeOp)) continue;
       if (failed(setDefaultRootConfig(entryPointFn, computeOp))) {
         return failure();
       }
       if (getLoweringConfig(computeOp)) {
         if (rootOp) {
           return computeOp->emitError(
               "unhandled multiple roots in dispatch region");
         }
         rootOp = computeOp;
         continue;
       }
     }
   }
   return success();
 }

 LogicalResult initCPULaunchConfig(ModuleOp moduleOp) {
   llvm::StringMap<IREE::HAL::ExecutableEntryPointOp> entryPointOps =
       getAllEntryPoints(moduleOp);
   for (auto funcOp : moduleOp.getOps<FuncOp>()) {
     auto entryPointOp = entryPointOps.lookup(funcOp.getName());
     if (!entryPointOp) continue;
     if (getTranslationInfo(entryPointOp)) continue;
     SmallVector<Operation *, 4> computeOps;
     SmallVector<Operation *, 4> tiledLoops;
     // If there are no linalg ops, not using Linalg based lowering.
     if (succeeded(getComputeOps(funcOp, computeOps, tiledLoops)) &&
         !computeOps.empty()) {
       if (failed(setRootConfig(funcOp, computeOps))) {
         return failure();
       }
     }

     // If the function entry point already doesnt have a lowering info attribute
     // on it, just add the default.
     if (!getTranslationInfo(entryPointOp)) {
       setTranslationInfo(funcOp,
                          IREE::HAL::DispatchLoweringPassPipeline::CPUDefault);
     }
   }
   return success();
 }

 }  // namespace iree_compiler
 }  // namespace mlir
	// Copyright 2020 The IREE Authors
	//
	// Licensed under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

	#include "iree/compiler/Codegen/LLVMCPU/KernelDispatch.h"

	#include "iree/compiler/Codegen/Transforms/Transforms.h"
	#include "iree/compiler/Codegen/Utils/MarkerUtils.h"
	#include "iree/compiler/Codegen/Utils/Utils.h"
	#include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
	#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
	#include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h"
	#include "llvm/ADT/TypeSwitch.h"
	#include "llvm/Support/CommandLine.h"
	#include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h"
	#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
	#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
	#include "mlir/Dialect/MemRef/IR/MemRef.h"
	#include "mlir/Dialect/StandardOps/IR/Ops.h"
	#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

	namespace mlir {
	namespace iree_compiler {

	// TODO(ravishankarm): This needs to be put in a common place for the CPU and
	// GPU backends to use.
	static llvm::cl::list<unsigned> clLLVMTileSizes(
	"iree-llvm-tile-size",
	llvm::cl::desc("Set tile sizes to use for tiling Linalg operations in "
	"LLVM code generation"),
	llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated);

	static llvm::cl::opt<int> matmulWorkgroupTileSize(
	"iree-codegen-llvm-matmul-workgroup-size",
	llvm::cl::desc(
	"linalg.matmul tile size for workgroups spliting of M, N dimension"),
	llvm::cl::init(64));
	static llvm::cl::opt<int> matmulL1TileSize(
	"iree-codegen-llvm-matmul-l1-size",
	llvm::cl::desc(
	"linalg.matmul tile size for L1 spliting of M, N, K dimension"),
	llvm::cl::init(32));
	static llvm::cl::opt<int> matmulVectorSize(
	"iree-codegen-llvm-matmul-vector-size",
	llvm::cl::desc("linalg.matmul vector tile size"), llvm::cl::init(4));

	static llvm::cl::opt<int> batchMatmulWorkgroupTileSize(
	"iree-codegen-llvm-batch-matmul-workgroup-size",
	llvm::cl::desc("linalg.batch_matmul tile size for workgroups spliting of "
	"M, N dimension"),
	llvm::cl::init(32));
	static llvm::cl::opt<int> batchMatmulL1TileSize(
	"iree-codegen-llvm-batch-matmul-l1-size",
	llvm::cl::desc("linalg.batch_matmul tile size for L1 spliting of M, N, K "
	"dimensions"),
	llvm::cl::init(16));
	static llvm::cl::opt<int> batchMatmulL2TileSize(
	"iree-codegen-llvm-batch-matmul-vector-size",
	llvm::cl::desc("linalg.batch_matmul vector tile size"), llvm::cl::init(4));

	static llvm::cl::list<int> mmt4dWorkgroupTileSizes(
	"iree-codegen-llvm-mmt4d-workgroup-tile-sizes",
	llvm::cl::desc("linalg.mmt4d workgroup tile size"), llvm::cl::ZeroOrMore,
	llvm::cl::MiscFlags::CommaSeparated);

	static llvm::cl::list<int> mmt4dL1TileSizes(
	"iree-codegen-llvm-mmt4d-l1-tile-size",
	llvm::cl::desc("linalg.mmt4d L1 tile size"), llvm::cl::ZeroOrMore,
	llvm::cl::MiscFlags::CommaSeparated);

	static llvm::cl::list<int> mmt4dVectorSizes(
	"iree-codegen-llvm-mmt4d-vector-size",
	llvm::cl::desc("linalg.mmt4d vector tile size"), llvm::cl::ZeroOrMore,
	llvm::cl::MiscFlags::CommaSeparated);

	static llvm::cl::opt<int> defaultWorkgroupTileSize(
	"iree-codegen-llvm-generic-ops-workgroup-size",
	llvm::cl::desc(
	"linalg.generic and linalg.indexed_generic workgroup tile size"),
	llvm::cl::init(128));

	/// Sets the lowering configuration for dispatch region with root op that
	/// implements the contraction operation interface.
	static LogicalResult setRootConfig(
	FuncOp entryPointFn, linalg::ContractionOpInterface contractionOp) {
	if (getLoweringConfig(contractionOp)) return success();
	if (contractionOp.isRowMajorMatmul()) {
	int mWorkgroupSize = matmulWorkgroupTileSize;
	int nWorkgroupSize = matmulWorkgroupTileSize;
	int mL1TileSize = matmulL1TileSize;
	int nL1TileSize = matmulL1TileSize;
	int kL1TileSize = matmulL1TileSize;
	auto lhsShape = getUntiledShape(contractionOp.lhs());
	auto rhsShape = getUntiledShape(contractionOp.rhs());
	if (!lhsShape.empty() && !rhsShape.empty()) {
	// Find largest tile size that is a multiple of the vector size.
	auto getTileSize = [](int dim, int maxSize) {
	if (dim == ShapedType::kDynamicSize) return maxSize;
	if (dim < matmulVectorSize) return matmulVectorSize.getValue();
	for (int i = std::min(maxSize, dim); i > 0; --i) {
	if (dim % i == 0 && i % matmulVectorSize == 0) {
	return i;
	}
	}
	return maxSize;
	};
	mWorkgroupSize = getTileSize(lhsShape[0], mWorkgroupSize);
	nWorkgroupSize = getTileSize(rhsShape[1], nWorkgroupSize);
	mL1TileSize = getTileSize(mWorkgroupSize, mL1TileSize);
	nL1TileSize = getTileSize(nWorkgroupSize, nL1TileSize);
	kL1TileSize = getTileSize(rhsShape[0], kL1TileSize);
	}
	TileSizesListType tileSizes = {
	{mWorkgroupSize, nWorkgroupSize},
	{mL1TileSize, nL1TileSize, kL1TileSize},
	{matmulVectorSize, matmulVectorSize, matmulVectorSize}};
	SmallVector<int64_t, 4> nativeVectorSize = {
	matmulVectorSize, matmulVectorSize, matmulVectorSize};
	return setOpConfigAndEntryPointFnTranslation(
	entryPointFn, contractionOp, tileSizes, nativeVectorSize,
	IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization);
	}
	if (contractionOp.isRowMajorBatchMatmul()) {
	// TODO(ataei, ravishankarm): This should just use the configuration for
	// matmul above. setting the tile size to 1 for all the batch dimensions.
	TileSizesListType tileSizes = {
	{1, batchMatmulWorkgroupTileSize, batchMatmulWorkgroupTileSize},
	{1, batchMatmulL1TileSize, batchMatmulL1TileSize,
	batchMatmulL1TileSize},
	{1, batchMatmulL2TileSize, batchMatmulL2TileSize,
	batchMatmulL2TileSize}};
	SmallVector<int64_t, 4> nativeVectorSize = {
	1, batchMatmulL2TileSize, batchMatmulL2TileSize, batchMatmulL2TileSize};
	return setOpConfigAndEntryPointFnTranslation(
	entryPointFn, contractionOp, tileSizes, nativeVectorSize,
	IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization);
	}
	return success();
	}

	/// Sets the lowering configuration for dispatch region for linalg.mmt4d root op
	static LogicalResult setRootConfig(FuncOp entryPointFn,
	linalg::Mmt4DOp mmt4dOp) {
	// TODO(ataei): These are hand tuned for some performance benchmarks for now,
	// we want to adapt the same strategy as matmul that dynamically sets tile
	// size.
	auto getWorkgroupTileSizes = [&]() -> SmallVector<int64_t> {
	if (!mmt4dWorkgroupTileSizes.empty()) {
	return SmallVector<int64_t>(mmt4dWorkgroupTileSizes.begin(),
	mmt4dWorkgroupTileSizes.end());
	}
	return {64, 32};
	};

	auto getL1TileSizes = [&]() -> SmallVector<int64_t> {
	if (!mmt4dL1TileSizes.empty()) {
	return SmallVector<int64_t>(mmt4dL1TileSizes.begin(),
	mmt4dL1TileSizes.end());
	}
	return {1, 1, 4, 4, 1, 4};
	};

	auto getVectorSizes = [&]() -> SmallVector<int64_t> {
	if (!mmt4dVectorSizes.empty()) {
	return SmallVector<int64_t>(mmt4dVectorSizes.begin(),
	mmt4dVectorSizes.end());
	}
	return {1, 1, 4, 4, 1, 4};
	};

	SmallVector<int64_t, 4> nativeVectorSize = getVectorSizes();

	TileSizesListType tileSizes = {getWorkgroupTileSizes(), getL1TileSizes(),
	nativeVectorSize};

	return setOpConfigAndEntryPointFnTranslation(
	entryPointFn, mmt4dOp, tileSizes, nativeVectorSize,
	IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization);
	}

	/// Sets the lowering configuration for dispatch region with root op being a
	/// generic op.
	static LogicalResult setDefaultRootConfig(FuncOp entryPointFn, Operation *op) {
	if (getLoweringConfig(op)) return success();
	auto partitionedLoops = getPartitionedLoops(op);
	if (partitionedLoops.empty()) {
	// Return success without doing anything. Eventually default will be used.
	return success();
	}
	unsigned maxDepth = partitionedLoops.back() + 1;
	SmallVector<int64_t, 4> workgroupTileSizes(maxDepth,
	defaultWorkgroupTileSize);
	llvm::DenseSet<unsigned> partitionedLoopsSet(partitionedLoops.begin(),
	partitionedLoops.end());
	for (auto dim : llvm::seq<int64_t>(0, workgroupTileSizes.size())) {
	if (!partitionedLoopsSet.count(dim)) {
	workgroupTileSizes[dim] = 0;
	}
	}
	TileSizesListType tileSizes = {workgroupTileSizes};
	return setOpConfigAndEntryPointFnTranslation(
	entryPointFn, op, tileSizes, /nativeVectorSizes=/ArrayRef<int64_t>{},
	IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization);
	}

	/// Finds the root operation in the given list of linalg operations and sets its
	/// configuration. Returns the root operation.
	static LogicalResult setRootConfig(FuncOp entryPointFn,
	ArrayRef<Operation *> computeOps) {
	Operation *rootOp = nullptr;
	for (auto computeOp : computeOps) {
	if (!hasMarker(computeOp, getWorkgroupMarker())) continue;

	auto setRootConfigFn = [&](Operation *op) -> LogicalResult {
	return TypeSwitch<Operation *, LogicalResult>(op)
	.Case<linalg::Mmt4DOp, linalg::ContractionOpInterface>(
	[&](auto op) { return setRootConfig(entryPointFn, op); })
	.Default([&](Operation *op) { return success(); });
	};

	if (failed(setRootConfigFn(computeOp))) {
	return failure();
	}

	if (getLoweringConfig(computeOp)) {
	if (rootOp) {
	return computeOp->emitError(
	"unhandled multiple roots in dispatch region");
	}
	rootOp = computeOp;
	continue;
	}
	}

	// If no root operation found, check if the dispatch region contains a single
	// generic op and chose pipeline based on that.
	if (!rootOp) {
	for (auto computeOp : computeOps) {
	if (!hasMarker(computeOp, getWorkgroupMarker())) continue;
	// Ignore fill ops. They never end up in their own dispatch, so are never
	// root ops.
	if (isa<linalg::FillOp>(computeOp)) continue;
	if (failed(setDefaultRootConfig(entryPointFn, computeOp))) {
	return failure();
	}
	if (getLoweringConfig(computeOp)) {
	if (rootOp) {
	return computeOp->emitError(
	"unhandled multiple roots in dispatch region");
	}
	rootOp = computeOp;
	continue;
	}
	}
	}
	return success();
	}

	LogicalResult initCPULaunchConfig(ModuleOp moduleOp) {
	llvm::StringMap<IREE::HAL::ExecutableEntryPointOp> entryPointOps =
	getAllEntryPoints(moduleOp);
	for (auto funcOp : moduleOp.getOps<FuncOp>()) {
	auto entryPointOp = entryPointOps.lookup(funcOp.getName());
	if (!entryPointOp) continue;
	if (getTranslationInfo(entryPointOp)) continue;
	SmallVector<Operation *, 4> computeOps;
	SmallVector<Operation *, 4> tiledLoops;
	// If there are no linalg ops, not using Linalg based lowering.
	if (succeeded(getComputeOps(funcOp, computeOps, tiledLoops)) &&
	!computeOps.empty()) {
	if (failed(setRootConfig(funcOp, computeOps))) {
	return failure();
	}
	}

	// If the function entry point already doesnt have a lowering info attribute
	// on it, just add the default.
	if (!getTranslationInfo(entryPointOp)) {
	setTranslationInfo(funcOp,
	IREE::HAL::DispatchLoweringPassPipeline::CPUDefault);
	}
	}
	return success();
	}

	} // namespace iree_compiler
	} // namespace mlir