iree/compiler/Codegen/SPIRV/AMDConfig.cpp - 3p/openxla/iree - Git at Google

 // Copyright 2022 The IREE Authors
 //
 // Licensed under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //===- AMDConfig.h - AMD CodeGen Configurations ---------------------------===//
 //
 // This file contains CodeGen configurations for AMD GPUs.
 //
 //===----------------------------------------------------------------------===//

 #include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/SPIRV/KernelConfig.h"
 #include "iree/compiler/Codegen/Utils/Utils.h"
 #include "llvm/Support/Debug.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/IR/BuiltinOps.h"

 #define DEBUG_TYPE "iree-spirv-amd-config"

 namespace mlir {
 namespace iree_compiler {
 namespace detail {

 // RDNA architecture:
 // https://gpuopen.com/wp-content/uploads/2019/08/RDNA_Architecture_public.pdf
 //
 // Workgroup Processor (WGP) is the block for workgroups in RDNA; it has its own
 // instruction/constant cache, L0 cache x2, Local Data Share (LDS, a.k.a. shared
 // memory), SALU x4, SIMD32 x4.
 //
 // * 1024 registers per SIMD32
 // * 128KB LDS per WGP
 // * Max 20 waves per SIMD32
 // * Max 64KB LDS per workgroup

 LogicalResult setAMDCodeGenConfig(const spirv::TargetEnv &targetEnv,
                                   Operation *rootOp) {
   int64_t subgroupSize = targetEnv.getResourceLimits().subgroup_size().getInt();
   if (auto matmulOp = dyn_cast<linalg::MatmulOp>(rootOp)) {
     std::array<int64_t, 2> workgroupXY = {subgroupSize / 2, 8};
     std::array<int64_t, 3> threadMNK = {8, 4, 32};
     return setMatmulOpConfig(matmulOp, subgroupSize, workgroupXY, threadMNK,
                              /*useWorkgroupMemory=*/true);
   }
   return success();
 }

 }  // namespace detail
 }  // namespace iree_compiler
 }  // namespace mlir
	// Copyright 2022 The IREE Authors
	//
	// Licensed under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

	//===- AMDConfig.h - AMD CodeGen Configurations ---------------------------===//
	//
	// This file contains CodeGen configurations for AMD GPUs.
	//
	//===----------------------------------------------------------------------===//

	#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
	#include "iree/compiler/Codegen/SPIRV/KernelConfig.h"
	#include "iree/compiler/Codegen/Utils/Utils.h"
	#include "llvm/Support/Debug.h"
	#include "mlir/Dialect/Linalg/IR/Linalg.h"
	#include "mlir/IR/BuiltinOps.h"

	#define DEBUG_TYPE "iree-spirv-amd-config"

	namespace mlir {
	namespace iree_compiler {
	namespace detail {

	// RDNA architecture:
	// https://gpuopen.com/wp-content/uploads/2019/08/RDNA_Architecture_public.pdf
	//
	// Workgroup Processor (WGP) is the block for workgroups in RDNA; it has its own
	// instruction/constant cache, L0 cache x2, Local Data Share (LDS, a.k.a. shared
	// memory), SALU x4, SIMD32 x4.
	//
	// * 1024 registers per SIMD32
	// * 128KB LDS per WGP
	// * Max 20 waves per SIMD32
	// * Max 64KB LDS per workgroup

	LogicalResult setAMDCodeGenConfig(const spirv::TargetEnv &targetEnv,
	Operation *rootOp) {
	int64_t subgroupSize = targetEnv.getResourceLimits().subgroup_size().getInt();
	if (auto matmulOp = dyn_cast<linalg::MatmulOp>(rootOp)) {
	std::array<int64_t, 2> workgroupXY = {subgroupSize / 2, 8};
	std::array<int64_t, 3> threadMNK = {8, 4, 32};
	return setMatmulOpConfig(matmulOp, subgroupSize, workgroupXY, threadMNK,
	/useWorkgroupMemory=/true);
	}
	return success();
	}

	} // namespace detail
	} // namespace iree_compiler
	} // namespace mlir