iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp - 3p/openxla/iree - Git at Google

 // Copyright 2021 The IREE Authors
 //
 // Licensed under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //===- NVIDIAConfig.h - NVIDIA CodeGen Configurations ---------------------===//
 //
 // This file contains CodeGen configurations for NVIDIA GPUs.
 //
 //===----------------------------------------------------------------------===//

 #include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/SPIRV/KernelConfig.h"
 #include "iree/compiler/Codegen/Utils/Utils.h"
 #include "llvm/Support/Debug.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/IR/BuiltinOps.h"

 #define DEBUG_TYPE "iree-spirv-nvidia-config"

 namespace mlir {
 namespace iree_compiler {
 namespace detail {

 struct CooperativeMatrixSize {
   int64_t m;
   int64_t n;
   int64_t k;
 };

 /// Returns the cooperative matrix (M, N, K) sizes that are supported by the
 /// target environment and match the given parameters.
 static Optional<CooperativeMatrixSize> getCooperativeMatrixSize(
     spirv::ResourceLimitsAttr resourceLimits, Type lhsType, Type rhsType,
     Type resultType, int64_t m, int64_t n, int64_t k) {
   auto properties = resourceLimits.cooperative_matrix_properties_nv()
                         .getAsRange<spirv::CooperativeMatrixPropertiesNVAttr>();
   for (auto property : properties) {
     if (property.a_type().getValue() == lhsType &&
         property.b_type().getValue() == rhsType &&
         property.c_type().getValue() == resultType &&
         property.result_type().getValue() == resultType &&
         property.scope().getValue() == spirv::Scope::Subgroup) {
       int64_t matmulM = property.m_size().getValue().getZExtValue();
       int64_t matmulN = property.n_size().getValue().getZExtValue();
       int64_t matmulK = property.k_size().getValue().getZExtValue();
       if (m % matmulM == 0 && n % matmulN == 0 && k % matmulK == 0) {
         return CooperativeMatrixSize{matmulM, matmulN, matmulK};
       }
     }
   }
   return llvm::None;
 }

 static LogicalResult setOpConfig(const spirv::TargetEnv &targetEnv,
                                  linalg::MatmulOp op) {
   // This configuration is only for cooperative matrix.
   if (!targetEnv.allows(spirv::Capability::CooperativeMatrixNV) ||
       !targetEnv.allows(spirv::Extension::SPV_NV_cooperative_matrix)) {
     return success();
   }

   Value lhs = op.inputs()[0], rhs = op.inputs()[1], init = op.outputs()[0];

   ArrayRef<int64_t> lhsShape = getUntiledShape(lhs);
   ArrayRef<int64_t> rhsShape = getUntiledShape(rhs);
   if (llvm::any_of(lhsShape, ShapedType::isDynamic)) return success();
   if (llvm::any_of(rhsShape, ShapedType::isDynamic)) return success();

   // TODO: Cooperative matrix support is fairly restricted. We can only have
   // a curated list of fused element wise ops as defined in the extension
   // SPV_NV_cooperative_matrix. Check that once we move bufferization after
   // vectorization.

   auto getElementType = [](Value v) {
     return v.getType().cast<ShapedType>().getElementType();
   };

   auto resourceLimits = targetEnv.getResourceLimits();
   auto coopMatSize = getCooperativeMatrixSize(
       resourceLimits, getElementType(lhs), getElementType(rhs),
       getElementType(init), lhsShape[0], rhsShape[1], lhsShape[1]);
   if (!coopMatSize) return success();

   auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::
       SPIRVVectorizeToCooperativeOps;

   // For now only support one subgroup per workgroup because in the above
   // configuration deduction step we only consider whether the input workload is
   // perfectly divisible by some native cooperative matrix size.
   //
   // TODO: Use some heuristics to deduce how many subgroups should be used and
   // the tile sizes for each subgroup, considering the input workload size and
   // native cooperative matrix size choices.
   int64_t subgroupSize = resourceLimits.subgroup_size().getInt();
   std::array<int64_t, 3> workgroupSize = {subgroupSize, 1, 1};

   TileSizesListType tileSizes;
   // Again because we only consider whether the input workload is perfectly
   // divisible by some native cooperative matrix size, not some multiples of it,
   // need to make sure the subgroup tile sizes are the same as the workgroup
   // one.
   tileSizes.push_back({coopMatSize->m, coopMatSize->n, coopMatSize->k});
   tileSizes.push_back({coopMatSize->m, coopMatSize->n, coopMatSize->k});

   return setOpConfigAndEntryPointFnTranslation(op->getParentOfType<FuncOp>(),
                                                op, tileSizes, {}, pipeline,
                                                workgroupSize);
 }

 LogicalResult setNVIDIACodeGenConfig(const spirv::TargetEnv &targetEnv,
                                      Operation *rootOp) {
   if (auto matmulOp = dyn_cast<linalg::MatmulOp>(rootOp)) {
     return setOpConfig(targetEnv, matmulOp);
   }
   return success();
 }

 }  // namespace detail
 }  // namespace iree_compiler
 }  // namespace mlir
	// Copyright 2021 The IREE Authors
	//
	// Licensed under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

	//===- NVIDIAConfig.h - NVIDIA CodeGen Configurations ---------------------===//
	//
	// This file contains CodeGen configurations for NVIDIA GPUs.
	//
	//===----------------------------------------------------------------------===//

	#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
	#include "iree/compiler/Codegen/SPIRV/KernelConfig.h"
	#include "iree/compiler/Codegen/Utils/Utils.h"
	#include "llvm/Support/Debug.h"
	#include "mlir/Dialect/Linalg/IR/Linalg.h"
	#include "mlir/IR/BuiltinOps.h"

	#define DEBUG_TYPE "iree-spirv-nvidia-config"

	namespace mlir {
	namespace iree_compiler {
	namespace detail {

	struct CooperativeMatrixSize {
	int64_t m;
	int64_t n;
	int64_t k;
	};

	/// Returns the cooperative matrix (M, N, K) sizes that are supported by the
	/// target environment and match the given parameters.
	static Optional<CooperativeMatrixSize> getCooperativeMatrixSize(
	spirv::ResourceLimitsAttr resourceLimits, Type lhsType, Type rhsType,
	Type resultType, int64_t m, int64_t n, int64_t k) {
	auto properties = resourceLimits.cooperative_matrix_properties_nv()
	.getAsRange<spirv::CooperativeMatrixPropertiesNVAttr>();
	for (auto property : properties) {
	if (property.a_type().getValue() == lhsType &&
	property.b_type().getValue() == rhsType &&
	property.c_type().getValue() == resultType &&
	property.result_type().getValue() == resultType &&
	property.scope().getValue() == spirv::Scope::Subgroup) {
	int64_t matmulM = property.m_size().getValue().getZExtValue();
	int64_t matmulN = property.n_size().getValue().getZExtValue();
	int64_t matmulK = property.k_size().getValue().getZExtValue();
	if (m % matmulM == 0 && n % matmulN == 0 && k % matmulK == 0) {
	return CooperativeMatrixSize{matmulM, matmulN, matmulK};
	}
	}
	}
	return llvm::None;
	}

	static LogicalResult setOpConfig(const spirv::TargetEnv &targetEnv,
	linalg::MatmulOp op) {
	// This configuration is only for cooperative matrix.
	if (!targetEnv.allows(spirv::Capability::CooperativeMatrixNV) \|\|
	!targetEnv.allows(spirv::Extension::SPV_NV_cooperative_matrix)) {
	return success();
	}

	Value lhs = op.inputs()[0], rhs = op.inputs()[1], init = op.outputs()[0];

	ArrayRef<int64_t> lhsShape = getUntiledShape(lhs);
	ArrayRef<int64_t> rhsShape = getUntiledShape(rhs);
	if (llvm::any_of(lhsShape, ShapedType::isDynamic)) return success();
	if (llvm::any_of(rhsShape, ShapedType::isDynamic)) return success();

	// TODO: Cooperative matrix support is fairly restricted. We can only have
	// a curated list of fused element wise ops as defined in the extension
	// SPV_NV_cooperative_matrix. Check that once we move bufferization after
	// vectorization.

	auto getElementType = [](Value v) {
	return v.getType().cast<ShapedType>().getElementType();
	};

	auto resourceLimits = targetEnv.getResourceLimits();
	auto coopMatSize = getCooperativeMatrixSize(
	resourceLimits, getElementType(lhs), getElementType(rhs),
	getElementType(init), lhsShape[0], rhsShape[1], lhsShape[1]);
	if (!coopMatSize) return success();

	auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::
	SPIRVVectorizeToCooperativeOps;

	// For now only support one subgroup per workgroup because in the above
	// configuration deduction step we only consider whether the input workload is
	// perfectly divisible by some native cooperative matrix size.
	//
	// TODO: Use some heuristics to deduce how many subgroups should be used and
	// the tile sizes for each subgroup, considering the input workload size and
	// native cooperative matrix size choices.
	int64_t subgroupSize = resourceLimits.subgroup_size().getInt();
	std::array<int64_t, 3> workgroupSize = {subgroupSize, 1, 1};

	TileSizesListType tileSizes;
	// Again because we only consider whether the input workload is perfectly
	// divisible by some native cooperative matrix size, not some multiples of it,
	// need to make sure the subgroup tile sizes are the same as the workgroup
	// one.
	tileSizes.push_back({coopMatSize->m, coopMatSize->n, coopMatSize->k});
	tileSizes.push_back({coopMatSize->m, coopMatSize->n, coopMatSize->k});

	return setOpConfigAndEntryPointFnTranslation(op->getParentOfType<FuncOp>(),
	op, tileSizes, {}, pipeline,
	workgroupSize);
	}

	LogicalResult setNVIDIACodeGenConfig(const spirv::TargetEnv &targetEnv,
	Operation *rootOp) {
	if (auto matmulOp = dyn_cast<linalg::MatmulOp>(rootOp)) {
	return setOpConfig(targetEnv, matmulOp);
	}
	return success();
	}

	} // namespace detail
	} // namespace iree_compiler
	} // namespace mlir