[NFC] Add proper debug messages to LLVMGPUUtils to be better informed… (#12437)
… of all the possible the failure cases of createAsyncGroups
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.cpp
index 9c41b8f..4e34f87 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.cpp
@@ -7,20 +7,31 @@
#include "iree/compiler/Codegen/LLVMGPU/Utils/LLVMGPUUtils.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/Debug.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/IR/Visitors.h"
+using namespace mlir;
+
+#define DEBUG_TYPE "llvm-gpu-utils"
+#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
+
namespace mlir {
namespace iree_compiler {
void createAsyncGroups(func::FuncOp funcOp, bool useMMASync) {
+ LLVM_DEBUG(DBGS() << "Start asyncGroups: useMMASync=" << useMMASync << "\n");
llvm::SmallSetVector<vector::TransferWriteOp, 16> copyToSharedMem;
// Look for all the copy that can be converted to async copy ops.
funcOp.walk([&](vector::TransferWriteOp writeOp) {
+ LLVM_DEBUG(DBGS() << "--candidate writeOp: " << writeOp << "\n");
if (!writeOp.getPermutationMap().isMinorIdentity() ||
writeOp.getVectorType().getRank() != 1 || !writeOp.isDimInBounds(0)) {
+ LLVM_DEBUG(
+ DBGS()
+ << "----writeOp is not an inbounds 1-D minor identity -> Skip \n");
return WalkResult::advance();
}
auto addressSpaceAttr = writeOp.getShapedType()
@@ -29,17 +40,34 @@
.dyn_cast_or_null<gpu::AddressSpaceAttr>();
if (!addressSpaceAttr || addressSpaceAttr.getValue() !=
gpu::GPUDialect::getWorkgroupAddressSpace()) {
+ LLVM_DEBUG(DBGS() << "----address space is not workgroup -> Skip \n");
return WalkResult::advance();
}
- auto read = writeOp.getVector().getDefiningOp<vector::TransferReadOp>();
- if (!read || read.getVectorType() != writeOp.getVectorType() ||
- !read.isDimInBounds(0) || !read.getPermutationMap().isMinorIdentity())
+ auto readOp = writeOp.getVector().getDefiningOp<vector::TransferReadOp>();
+ if (!readOp) {
+ LLVM_DEBUG(DBGS() << "----no readOp defining the writeOp -> Skip \n");
return WalkResult::advance();
- if (!((read.getVectorType().getElementType().isF32() &&
- read.getVectorType().getNumElements() <= 4) ||
- (read.getVectorType().getElementType().isF16() &&
- read.getVectorType().getNumElements() <= 8)))
+ }
+
+ LLVM_DEBUG(DBGS() << "--candidate readOp: " << readOp << " \n");
+ if (readOp.getVectorType() != writeOp.getVectorType() ||
+ !readOp.isDimInBounds(0) ||
+ !readOp.getPermutationMap().isMinorIdentity()) {
+ LLVM_DEBUG(
+ DBGS()
+ << "----readOp is not an in-bounds 1-D minor identity -> Skip \n");
return WalkResult::advance();
+ }
+ if (!((readOp.getVectorType().getElementType().isF32() &&
+ readOp.getVectorType().getNumElements() <= 4) ||
+ (readOp.getVectorType().getElementType().isF16() &&
+ readOp.getVectorType().getNumElements() <= 8))) {
+ LLVM_DEBUG(
+ DBGS() << "----readOp is not (<=4)xf32 or (<=8)xf16 -> Skip \n");
+ return WalkResult::advance();
+ }
+
+ LLVM_DEBUG(DBGS() << "--writeOp can be made async -> SUCCESS\n");
copyToSharedMem.insert(writeOp);
return WalkResult::advance();
});