Adds LLVMGPUCheckIRBeforeLLVMConversionPass to check shared memory allocation before lowering to LLVM (#12743) Applies a static shared memory check before llvm conversion. Current limit is set to 163KB based on `sm_86` (maximum we expect to see at any given point). Will followup with target specific checks based on sm. Similar to LLVMCPUCheckIRBeforeLLVMConversionPass but unable to reuse due to different allocation types.

commit: add3b023ca8c7f08352d8f8e53e0ff7706e77b40 [log] [tgz]
author: Kojo Acquah <KoolJBlack@users.noreply.github.com> Wed Apr 12 09:42:15 2023 -0700
committer: GitHub <noreply@github.com> Wed Apr 12 09:42:15 2023 -0700
tree: 881815477c0442f3c9a4d9c129de5ddbbc04472a
parent: 2f6808e53500e5ed4c914b8b234b1e3fe298e653 [diff]
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel b/compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel
index 57fd5ed..7b66eb5 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel

@@ -20,6 +20,7 @@
         "ConvertToROCDL.cpp",
         "ExtractAddressComputationGPUPass.cpp",
         "KernelConfig.cpp",
+        "LLVMGPUCheckIRBeforeLLVMConversion.cpp",
         "LLVMGPUDistribute.cpp",
         "LLVMGPULowerExecutableTarget.cpp",
         "LLVMGPUPackSharedMemoryAlloc.cpp",

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt
index 24f489d..3fc98ef 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt

@@ -24,6 +24,7 @@
     "ConvertToROCDL.cpp"
     "ExtractAddressComputationGPUPass.cpp"
     "KernelConfig.cpp"
+    "LLVMGPUCheckIRBeforeLLVMConversion.cpp"
     "LLVMGPUDistribute.cpp"
     "LLVMGPULowerExecutableTarget.cpp"
     "LLVMGPUPackSharedMemoryAlloc.cpp"

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCheckIRBeforeLLVMConversion.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCheckIRBeforeLLVMConversion.cpp
new file mode 100644
index 0000000..3bd3c21
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/LLVMGPUCheckIRBeforeLLVMConversion.cpp

@@ -0,0 +1,96 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Codegen/PassDetail.h"
+#include "iree/compiler/Codegen/Passes.h"
+#include "iree/compiler/Codegen/Utils/GPUUtils.h"
+#include "llvm/Support/CommandLine.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+namespace iree_compiler {
+
+static llvm::cl::opt<int> clMaxGPUSharedMemSize(
+    "iree-llvmgpu-shared-mem-allocation-limit",
+    llvm::cl::desc("maximum allowed shared memory size in bytes"),
+    llvm::cl::init(163 * 1024));
+
+namespace {
+struct LLVMGPUCheckIRBeforeLLVMConversionPass
+    : LLVMGPUCheckIRBeforeLLVMConversionBase<
+          LLVMGPUCheckIRBeforeLLVMConversionPass> {
+  void runOnOperation() override;
+};
+}  // namespace
+
+static int shapedTypeStaticSize(ShapedType shapedType) {
+  int allocSize = 1;
+  for (auto dimSize : shapedType.getShape()) {
+    if (ShapedType::isDynamic(dimSize)) continue;
+    allocSize *= dimSize;
+  }
+  if (auto elementType = shapedType.getElementType().dyn_cast<ShapedType>()) {
+    allocSize *= shapedTypeStaticSize(elementType);
+  } else {
+    allocSize *= shapedType.getElementType().getIntOrFloatBitWidth();
+  }
+  return allocSize;
+}
+
+/// Returns success if the total shared memory allocation size is less than the
+/// limit set by clMaxGPUSharedMemSize.
+static LogicalResult checkGPUAllocationSize(func::FuncOp funcOp) {
+  if (funcOp.getBody().empty()) return success();
+
+  SmallVector<memref::AllocOp> allocOps;
+  funcOp.walk([&](memref::AllocOp allocOp) { allocOps.push_back(allocOp); });
+  if (allocOps.empty()) {
+    return success();
+  }
+
+  int cumSize = 0;
+  for (auto allocOp : allocOps) {
+    auto allocType = allocOp.getType().cast<MemRefType>();
+    if (!hasSharedMemoryAddressSpace(allocType)) {
+      continue;
+    }
+    if (!allocOp.getDynamicSizes().empty()) {
+      return allocOp.emitOpError(
+          "dynamic shared memory allocations unsupported.");
+    }
+    int allocSize = shapedTypeStaticSize(allocType);
+    if (allocOp.getAlignment()) {
+      int64_t alignmentInBits = *allocOp.getAlignment() * 8;
+      allocSize =
+          (llvm::divideCeil(allocSize, alignmentInBits) * alignmentInBits);
+    }
+    cumSize += allocSize / 8;
+  }
+  if (cumSize > clMaxGPUSharedMemSize) {
+    return funcOp.emitOpError("exceeded GPU memory limit of ")
+           << clMaxGPUSharedMemSize.getValue() << " bytes for function. Got "
+           << cumSize << " bytes";
+  }
+  return success();
+}
+
+void LLVMGPUCheckIRBeforeLLVMConversionPass::runOnOperation() {
+  auto moduleOp = getOperation();
+  for (auto funcOp : moduleOp.getOps<func::FuncOp>()) {
+    if (failed(checkGPUAllocationSize(funcOp))) {
+      return signalPassFailure();
+    }
+  }
+}
+
+std::unique_ptr<OperationPass<ModuleOp>>
+createLLVMGPUCheckIRBeforeLLVMConversionPass() {
+  return std::make_unique<LLVMGPUCheckIRBeforeLLVMConversionPass>();
+}
+
+}  // namespace iree_compiler
+}  // namespace mlir

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
index f75cf6a..5566877 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp

@@ -504,6 +504,9 @@
   addLowerAndOptimzeAddressComputation(pm);
   // THIS NEEDS TO RUN BEFORE SCF ->CF OFF
 
+  // Run checks on shared memory usage.
+  pm.addPass(createLLVMGPUCheckIRBeforeLLVMConversionPass());
+
   // SCF -> STD
   pm.addNestedPass<func::FuncOp>(createConvertSCFToCFPass());
   pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Verifiers.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Verifiers.cpp
index 831ced7..cc52276 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Verifiers.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Verifiers.cpp

@@ -167,9 +167,6 @@
            << pipelineName;
   }
 
-  // Verify shared memory usage is within the limit.
-  // TODO(KoolJBlack): working on adding check shared memory usage.
-
   // Return success for SIMT/CUDA cores.
   if (pipeline.getValue() ==
       IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt)

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/BUILD.bazel
index fd3b1cf..1609fc8 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/BUILD.bazel

@@ -19,6 +19,7 @@
     srcs = enforce_glob(
         [
             "attention.mlir",
+            "check_ir_before_llvm_conversion.mlir",
             "conv_pipeline_test.mlir",
             "convert_to_nvvm.mlir",
             "convert_to_rocdl.mlir",

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/CMakeLists.txt
index e78caf7..5b16012 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/CMakeLists.txt

@@ -15,6 +15,7 @@
     lit
   SRCS
     "attention.mlir"
+    "check_ir_before_llvm_conversion.mlir"
     "conv_pipeline_test.mlir"
     "convert_to_nvvm.mlir"
     "convert_to_rocdl.mlir"

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/check_ir_before_llvm_conversion.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/check_ir_before_llvm_conversion.mlir
new file mode 100644
index 0000000..73e9a68
--- /dev/null
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/check_ir_before_llvm_conversion.mlir

@@ -0,0 +1,9 @@
+// RUN: iree-opt --iree-llvmgpu-check-ir-before-llvm-conversion %s --verify-diagnostics -split-input-file
+
+module {
+  // expected-error @+1 {{'func.func' op exceeded GPU memory limit of 166912 bytes for function. Got 274432 bytes}}
+  func.func @shared_mem_alloc(%arg0: index) {
+    %alloc = memref.alloc() : memref<274432xi8, #gpu.address_space<workgroup>>
+    return
+  }
+}

diff --git a/compiler/src/iree/compiler/Codegen/Passes.h b/compiler/src/iree/compiler/Codegen/Passes.h
index 49eb4b5..2e07a02 100644
--- a/compiler/src/iree/compiler/Codegen/Passes.h
+++ b/compiler/src/iree/compiler/Codegen/Passes.h

@@ -601,6 +601,10 @@
 std::unique_ptr<OperationPass<func::FuncOp>>
 createLLVMGPUPackSharedMemoryAlloc();
 
+/// Checks GPU backend specific IR constraints such as shared memory limits.
+std::unique_ptr<OperationPass<ModuleOp>>
+createLLVMGPUCheckIRBeforeLLVMConversionPass();
+
 //------------------------------------------------------------------------------
 // SPIR-V Passes
 //------------------------------------------------------------------------------

diff --git a/compiler/src/iree/compiler/Codegen/Passes.td b/compiler/src/iree/compiler/Codegen/Passes.td
index 3fa680c..2b7e11b 100644
--- a/compiler/src/iree/compiler/Codegen/Passes.td
+++ b/compiler/src/iree/compiler/Codegen/Passes.td

@@ -733,6 +733,12 @@
   let constructor = "mlir::iree_compiler::createLLVMGPUTensorPadPass()";
 }
 
+def LLVMGPUCheckIRBeforeLLVMConversion :
+    Pass<"iree-llvmgpu-check-ir-before-llvm-conversion", "ModuleOp"> {
+  let summary = "Checks GPU backend specific IR constraints such as shared memory limits";
+  let constructor = "mlir::iree_compiler::createLLVMGPUCheckIRBeforeLLVMConversionPass()";
+}
+
 //------------------------------------------------------------------------------
 // SPIR-V
 //------------------------------------------------------------------------------
commit	add3b023ca8c7f08352d8f8e53e0ff7706e77b40	[log] [tgz]
author	Kojo Acquah <KoolJBlack@users.noreply.github.com>	Wed Apr 12 09:42:15 2023 -0700
committer	GitHub <noreply@github.com>	Wed Apr 12 09:42:15 2023 -0700
tree	881815477c0442f3c9a4d9c129de5ddbbc04472a
parent	2f6808e53500e5ed4c914b8b234b1e3fe298e653 [diff]