blob: ef51a6a9a8837acc82a0e08b7b069fc9c44c1282 [file] [log] [blame]
// Copyright 2023 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#ifndef IREE_CODEGEN_LLVMGPU_PASSES
#define IREE_CODEGEN_LLVMGPU_PASSES
include "mlir/Pass/PassBase.td"
//------------------------------------------------------------------------------
// LLVMGPU Passes (keep alphabetical)
//------------------------------------------------------------------------------
def AMDGPUPrepareForChainedMatmulPass :
InterfacePass<"iree-amdgpu-prepare-chained-matmul", "mlir::FunctionOpInterface"> {
let summary = "Pass to swap operands and transpose accumulator and result";
let description = [{
Given a chain of matmuls with some or no operations
in between, like
d = matmul_transpose_b(a, b) + c
...
e = matmul_transpose_b(d, f) + g
this pattern transforms the above IR to
c.t = transpose c
d = matmul_transpose_b(b, a) + c.t
d.t = transpose d
...
g.t = transpose g
e = matmul_transpose_b(f, d.t) + g.t
e.t = transpose e
On CDNA architectures, where the layouts of the RHS and result
are the same and transposed from the LHS layout, this type
of transformation can avoid trips to shared memory/shuffle instructions
on operators like Flash Attention.
}];
}
// TODO: Bring the argument in line with the names used elsewhere.
def ConvertToNVVMPass :
Pass<"iree-convert-to-nvvm", "ModuleOp"> {
let summary = "Perform final conversion from builtin/GPU/HAL/standard dialect to LLVM "
"and NVVM dialects";
}
// TODO: Bring the argument in line with the names used elsewhere.
def ConvertToROCDLPass :
Pass<"iree-convert-to-rocdl", "ModuleOp"> {
let summary = "Perform final conversion from builtin/GPU/HAL/standard dialect to LLVM "
"and ROCDL dialects";
}
def ExtractAddressComputationGPUPass: Pass<"extract-address-computation-gpu"> {
let summary = "Extract address computations from memory accesses";
let description = [{
This pass is similar to `extract-address-computation` except it also
supports memory accesses that are specific to GPUs.
}];
let dependentDialects = [
"memref::MemRefDialect", "nvgpu::NVGPUDialect", "affine::AffineDialect"
];
}
def LLVMGPUCastAddressSpaceFunctionPass :
Pass<"iree-llvmgpu-cast-address-space-function", "ModuleOp"> {
let summary = "Cast address space to generic in CallOp and FuncOp";
}
def LLVMGPUCastTypeToFitMMAPass : InterfacePass<"iree-llvmgpu-cast-type-to-fit-mma",
"mlir::FunctionOpInterface"> {
let summary = "Perform type extension/truncation over vector.contract types "
"to target GPU MMA intrinsics";
}
def LLVMGPUConfigureTensorLayoutsPass :
InterfacePass<"iree-llvmgpu-configure-tensor-layouts", "mlir::FunctionOpInterface"> {
let summary = "Pass to set layouts on tensors for later vector distribution";
}
def LLVMGPUConfigureVectorLayoutsPass :
InterfacePass<"iree-llvmgpu-configure-vector-layouts", "mlir::FunctionOpInterface"> {
let summary = "Pass to set layouts for vector distribution";
}
def LLVMGPULowerExecutableTargetPass :
InterfacePass<"iree-llvmgpu-lower-executable-target", "mlir::FunctionOpInterface"> {
let summary = "Perform lowering of executable target using one of the IREE::HAL::DispatchLoweringPassPipeline";
}
def LLVMGPUPackSharedMemoryAllocPass :
InterfacePass<"iree-llvmgpu-pack-shared-memory-alloc", "mlir::FunctionOpInterface"> {
let summary = "Pass pack shared memory allocation in order to reduce memory usage.";
}
def LLVMGPUPrefetchSharedMemoryPass :
InterfacePass<"iree-llvmgpu-prefetch-shared-memory", "mlir::FunctionOpInterface"> {
let summary = "Rotate scf.for loops to prefetch shared memory with distance 1";
}
def LLVMGPUPromoteMatmulToFitMMAPass :
InterfacePass<"iree-llvmgpu-promote-matmul-to-fit-mma", "mlir::FunctionOpInterface"> {
let summary = "Pass to promote contraction ops to fit mma shapes";
let options = [
Option<"targetDimensions", "target-dimensions", "mlir::iree_compiler::LLVMGPUMatmulPadOption",
/*default=*/"mlir::iree_compiler::LLVMGPUMatmulPadOption::ParallelDims",
"Select the strategy to control how multi_reduction is lowered.",
[{::llvm::cl::values(
clEnumValN(mlir::iree_compiler::LLVMGPUMatmulPadOption::ParallelDims,
"parallel",
"Pad all the parallel dims for contraction ops."),
clEnumValN(mlir::iree_compiler::LLVMGPUMatmulPadOption::ReductionDims,
"reduction",
"Pad all the reduction dims for contraction ops.")
)}]>
];
}
def LLVMGPUSelectLoweringStrategyPass :
Pass<"iree-llvmgpu-select-lowering-strategy", "ModuleOp"> {
let summary = "Select a IREE::HAL::DispatchLoweringPassPipeline for lowering the target variant";
}
def LLVMGPUTensorCoreVectorizationPass :
InterfacePass<"iree-llvmgpu-tensorcore-vectorization", "mlir::FunctionOpInterface"> {
let summary = "Pass to convert linalg into Vector and transform it to a form that can be lowered to GPU MMA ops";
}
def LLVMGPUTensorPadPass :
InterfacePass<"iree-llvmgpu-tensor-pad", "mlir::FunctionOpInterface"> {
let summary = "Pass to pad out tensors up to static dimensions.";
}
def LLVMGPUTileAndDistributePass :
InterfacePass<"iree-llvmgpu-tile-and-distribute", "mlir::FunctionOpInterface"> {
let summary = "Pass to tile and distribute linalg ops within a workgroup.";
}
def LLVMGPUVectorDistributePass :
InterfacePass<"iree-llvmgpu-vector-distribute", "mlir::FunctionOpInterface"> {
let summary = "Pass to distribute vectorized functions.";
}
def LLVMGPUVectorLoweringPass :
InterfacePass<"iree-llvmgpu-vector-lowering", "mlir::FunctionOpInterface"> {
let summary = "Pass to lower Vector ops before conversion to LLVM.";
}
def LLVMGPUVectorToGPUPass :
InterfacePass<"iree-llvmgpu-vector-to-gpu", "mlir::FunctionOpInterface"> {
let summary = "Pass to convert vector to gpu.";
}
//------------------------------------------------------------------------------
// Test Passes
//------------------------------------------------------------------------------
def TestLLVMGPUScalarizeMathOpPass :
Pass<"iree-test-llvmgpu-legalize-ops", "ModuleOp"> {
let summary = "Test pass for several legalization patterns.";
}
#endif // IREE_CODEGEN_LLVMGPU_PASSES