iree/compiler/Codegen/Utils/Utils.h - 3p/openxla/iree - Git at Google

 // Copyright 2020 The IREE Authors
 //
 // Licensed under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 #ifndef IREE_COMPILER_CODEGEN_UTILS_UTILS_H_
 #define IREE_COMPILER_CODEGEN_UTILS_UTILS_H_

 #include "iree/compiler/Dialect/Flow/IR/PartitionableLoopsInterface.h"
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Triple.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/IR/BuiltinOps.h"

 namespace mlir {
 namespace iree_compiler {

 static constexpr unsigned kNumMaxParallelDims = 3;

 //===----------------------------------------------------------------------===//
 // Utility functions to get entry points
 //===----------------------------------------------------------------------===//

 /// Returns true if the given `func` is a kernel dispatch entry point.
 bool isEntryPoint(FuncOp func);

 /// Returns a map from function symbol name to corresponding entry point op.
 llvm::StringMap<IREE::HAL::ExecutableEntryPointOp> getAllEntryPoints(
     ModuleOp module);

 /// Returns the entry point op for the `funcOp`. Returns `nullptr` on failure.
 IREE::HAL::ExecutableEntryPointOp getEntryPoint(FuncOp funcOp);

 /// Methods to get backend information.
 bool isX86(IREE::HAL::ExecutableVariantOp variantOp);
 inline bool isX86(FuncOp entryPointFn) {
   auto variantOp =
       entryPointFn->getParentOfType<IREE::HAL::ExecutableVariantOp>();
   return isX86(variantOp);
 }
 inline bool isVMVXBackend(IREE::HAL::ExecutableVariantOp variantOp) {
   return variantOp.target().getBackend().getValue() == "vmvx";
 }
 inline bool isVMVXBackend(FuncOp entryPointFn) {
   auto variantOp =
       entryPointFn->getParentOfType<IREE::HAL::ExecutableVariantOp>();
   return isVMVXBackend(variantOp);
 }

 //===----------------------------------------------------------------------===//
 // Utility functions to get untiled op shapes
 //===----------------------------------------------------------------------===//

 /// Returns the untiled type of a tiled view for both tensor and memref
 /// types. Either walks the `ViewOpInterface` chain (for memrefs) or the
 /// extract/load op chain (for tensors).
 ArrayRef<int64_t> getUntiledShape(Value tiledView);

 /// Returns the untiled result shape for the given Linalg `op` by inspecting
 /// the subview chain or the tiled and distributed loop nests around it.
 SmallVector<int64_t> getUntiledResultShape(linalg::LinalgOp linalgOp,
                                            unsigned resultNum);

 //===----------------------------------------------------------------------===//
 // Utility functions to set configurations
 //===----------------------------------------------------------------------===//

 /// Return the tile sizes to use for the Flow partitioned loops given the
 /// workload per workgroup. The tile sizes for the partitioned loops are
 /// obtained from the workload per workgroup. The other loops are returned as
 /// zero.
 SmallVector<int64_t> getDistributedTileSizes(
     IREE::Flow::PartitionableLoopsInterface interfaceOp,
     ArrayRef<int64_t> workloadPerWorkgroup);

 /// Information about a tiled and distributed loop.
 ///
 /// Right now distribution is happening as the same time when we tile the linalg
 /// op. 0) Given an original loop:
 ///
 /// ```
 /// scf.for %iv = %init_lb to %init_ub step %init_step { ... }
 /// ```
 //
 /// 1) After tiling with tile size `%tile_size`, we have:
 //
 /// ```
 /// %tiled_step = %init_step * %tile_size
 /// scf.for %iv = %init_lb to %init_ub step %tiled_step { ... }
 /// ```
 ///
 /// 2) After distribution with processor `%id` and `%count`, we have:
 //
 /// ```
 /// %dist_lb = %init_lb + %id * %tiled_step
 /// %dist_step = %init_step * %tile_size * %count
 /// scf.for %iv = %dist_lb to %init_ub step %dist_step { ... }
 /// ```
 ///
 /// Given a loop already after 2), this struct contains recovered information
 /// about 0) and 1).
 struct LoopTilingAndDistributionInfo {
   // The tiled and distributed loop.
   Operation *loop;
   // The lower bound for the original untiled loop.
   OpFoldResult untiledLowerBound;
   // The upper bound for the original untiled loop.
   OpFoldResult untiledUpperBound;
   // The step for the original untiled loop.
   OpFoldResult untiledStep;
   // The tile size used to tile (and not distribute) the original untiled loop.
   Optional<int64_t> tileSize;
   // The processor dimension this loop is distributed to.
   unsigned processorDistributionDim;
 };

 /// Assuming that `funcOp` contains a single nested scf.for that represented the
 /// tiled+fused+distributed loops with the distribution being across workgroups,
 /// i.e.
 ///
 /// scf.for ... {
 ///   ...
 ///   scf.for ... {
 ///     ...
 ///     filtered_op.
 ///     ...
 ///     filtered_op.
 ///     ...
 ///   }
 /// }
 ///
 /// Returns the list of filtered operations in the functions. If there are no
 /// `scf.for` operations in the function return the linalg operations in the
 /// body of the function if it has a single basic block. Return failure in all
 /// other cases.
 using RootOpFilteringFn = std::function<bool(Operation *)>;
 LogicalResult getFilteredOps(
     FuncOp funcOp, RootOpFilteringFn filteringFn,
     SmallVectorImpl<Operation *> &filteredOps,
     SmallVectorImpl<LoopTilingAndDistributionInfo> &tiledLoops);

 /// Specialization of `getFilteredOps` for filtering `LinalgOp`s and
 /// `LinagExtOp`s.
 LogicalResult getComputeOps(
     FuncOp funcOp, SmallVectorImpl<Operation *> &computeOps,
     SmallVectorImpl<LoopTilingAndDistributionInfo> &tiledLoops);

 /// If the given `forOp` is a tiled and distributed loop, returns its tiling and
 /// distribution information.
 Optional<LoopTilingAndDistributionInfo> isTiledAndDistributedLoop(
     scf::ForOp forOp);

 /// Collects information about loops matching tiled+distribute pattern.
 SmallVector<LoopTilingAndDistributionInfo> getTiledAndDistributedLoopInfo(
     FuncOp funcOp);

 Operation *createLinalgCopyOp(OpBuilder &b, Location loc, Value from, Value to);
 }  // namespace iree_compiler
 }  // namespace mlir

 #endif  // IREE_COMPILER_CODEGEN_UTILS_UTILS_H_
	// Copyright 2020 The IREE Authors
	//
	// Licensed under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

	#ifndef IREE_COMPILER_CODEGEN_UTILS_UTILS_H_
	#define IREE_COMPILER_CODEGEN_UTILS_UTILS_H_

	#include "iree/compiler/Dialect/Flow/IR/PartitionableLoopsInterface.h"
	#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
	#include "llvm/ADT/StringMap.h"
	#include "llvm/ADT/Triple.h"
	#include "mlir/Dialect/Linalg/IR/Linalg.h"
	#include "mlir/Dialect/SCF/SCF.h"
	#include "mlir/IR/BuiltinOps.h"

	namespace mlir {
	namespace iree_compiler {

	static constexpr unsigned kNumMaxParallelDims = 3;

	//===----------------------------------------------------------------------===//
	// Utility functions to get entry points
	//===----------------------------------------------------------------------===//

	/// Returns true if the given `func` is a kernel dispatch entry point.
	bool isEntryPoint(FuncOp func);

	/// Returns a map from function symbol name to corresponding entry point op.
	llvm::StringMap<IREE::HAL::ExecutableEntryPointOp> getAllEntryPoints(
	ModuleOp module);

	/// Returns the entry point op for the `funcOp`. Returns `nullptr` on failure.
	IREE::HAL::ExecutableEntryPointOp getEntryPoint(FuncOp funcOp);

	/// Methods to get backend information.
	bool isX86(IREE::HAL::ExecutableVariantOp variantOp);
	inline bool isX86(FuncOp entryPointFn) {
	auto variantOp =
	entryPointFn->getParentOfType<IREE::HAL::ExecutableVariantOp>();
	return isX86(variantOp);
	}
	inline bool isVMVXBackend(IREE::HAL::ExecutableVariantOp variantOp) {
	return variantOp.target().getBackend().getValue() == "vmvx";
	}
	inline bool isVMVXBackend(FuncOp entryPointFn) {
	auto variantOp =
	entryPointFn->getParentOfType<IREE::HAL::ExecutableVariantOp>();
	return isVMVXBackend(variantOp);
	}

	//===----------------------------------------------------------------------===//
	// Utility functions to get untiled op shapes
	//===----------------------------------------------------------------------===//

	/// Returns the untiled type of a tiled view for both tensor and memref
	/// types. Either walks the `ViewOpInterface` chain (for memrefs) or the
	/// extract/load op chain (for tensors).
	ArrayRef<int64_t> getUntiledShape(Value tiledView);

	/// Returns the untiled result shape for the given Linalg `op` by inspecting
	/// the subview chain or the tiled and distributed loop nests around it.
	SmallVector<int64_t> getUntiledResultShape(linalg::LinalgOp linalgOp,
	unsigned resultNum);

	//===----------------------------------------------------------------------===//
	// Utility functions to set configurations
	//===----------------------------------------------------------------------===//

	/// Return the tile sizes to use for the Flow partitioned loops given the
	/// workload per workgroup. The tile sizes for the partitioned loops are
	/// obtained from the workload per workgroup. The other loops are returned as
	/// zero.
	SmallVector<int64_t> getDistributedTileSizes(
	IREE::Flow::PartitionableLoopsInterface interfaceOp,
	ArrayRef<int64_t> workloadPerWorkgroup);

	/// Information about a tiled and distributed loop.
	///
	/// Right now distribution is happening as the same time when we tile the linalg
	/// op. 0) Given an original loop:
	///
	/// ```
	/// scf.for %iv = %init_lb to %init_ub step %init_step { ... }
	/// ```
	//
	/// 1) After tiling with tile size `%tile_size`, we have:
	//
	/// ```
	/// %tiled_step = %init_step * %tile_size
	/// scf.for %iv = %init_lb to %init_ub step %tiled_step { ... }
	/// ```
	///
	/// 2) After distribution with processor `%id` and `%count`, we have:
	//
	/// ```
	/// %dist_lb = %init_lb + %id * %tiled_step
	/// %dist_step = %init_step * %tile_size * %count
	/// scf.for %iv = %dist_lb to %init_ub step %dist_step { ... }
	/// ```
	///
	/// Given a loop already after 2), this struct contains recovered information
	/// about 0) and 1).
	struct LoopTilingAndDistributionInfo {
	// The tiled and distributed loop.
	Operation *loop;
	// The lower bound for the original untiled loop.
	OpFoldResult untiledLowerBound;
	// The upper bound for the original untiled loop.
	OpFoldResult untiledUpperBound;
	// The step for the original untiled loop.
	OpFoldResult untiledStep;
	// The tile size used to tile (and not distribute) the original untiled loop.
	Optional<int64_t> tileSize;
	// The processor dimension this loop is distributed to.
	unsigned processorDistributionDim;
	};

	/// Assuming that `funcOp` contains a single nested scf.for that represented the
	/// tiled+fused+distributed loops with the distribution being across workgroups,
	/// i.e.
	///
	/// scf.for ... {
	/// ...
	/// scf.for ... {
	/// ...
	/// filtered_op.
	/// ...
	/// filtered_op.
	/// ...
	/// }
	/// }
	///
	/// Returns the list of filtered operations in the functions. If there are no
	/// `scf.for` operations in the function return the linalg operations in the
	/// body of the function if it has a single basic block. Return failure in all
	/// other cases.
	using RootOpFilteringFn = std::function<bool(Operation *)>;
	LogicalResult getFilteredOps(
	FuncOp funcOp, RootOpFilteringFn filteringFn,
	SmallVectorImpl<Operation *> &filteredOps,
	SmallVectorImpl<LoopTilingAndDistributionInfo> &tiledLoops);

	/// Specialization of `getFilteredOps` for filtering `LinalgOp`s and
	/// `LinagExtOp`s.
	LogicalResult getComputeOps(
	FuncOp funcOp, SmallVectorImpl<Operation *> &computeOps,
	SmallVectorImpl<LoopTilingAndDistributionInfo> &tiledLoops);

	/// If the given `forOp` is a tiled and distributed loop, returns its tiling and
	/// distribution information.
	Optional<LoopTilingAndDistributionInfo> isTiledAndDistributedLoop(
	scf::ForOp forOp);

	/// Collects information about loops matching tiled+distribute pattern.
	SmallVector<LoopTilingAndDistributionInfo> getTiledAndDistributedLoopInfo(
	FuncOp funcOp);

	Operation *createLinalgCopyOp(OpBuilder &b, Location loc, Value from, Value to);
	} // namespace iree_compiler
	} // namespace mlir

	#endif // IREE_COMPILER_CODEGEN_UTILS_UTILS_H_