| // Copyright 2020 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| //===- GPUKernelOutlining.cpp - Generate GPU device-side code -------------===// |
| // |
| // Implements a pass to convert a launch operation into a device-side code. Uses |
| // a separate pass since the pass from core puts the gpu.module at the module |
| // scope instead of allowing where to put it. Since we dont need the host-side |
| // aspects of the GPU dialect, a separate pass is used here that only cares |
| // about the device-side. |
| // |
| //===----------------------------------------------------------------------===// |
| #include "iree/compiler/Translation/CodegenUtils/CodegenUtils.h" |
| #include "llvm/ADT/SetVector.h" |
| #include "mlir/Dialect/GPU/GPUDialect.h" |
| #include "mlir/Dialect/GPU/Utils.h" |
| #include "mlir/Dialect/SPIRV/TargetAndABI.h" |
| #include "mlir/Dialect/StandardOps/IR/Ops.h" |
| #include "mlir/IR/BlockAndValueMapping.h" |
| #include "mlir/IR/Function.h" |
| #include "mlir/IR/PatternMatch.h" |
| #include "mlir/IR/Value.h" |
| #include "mlir/Pass/Pass.h" |
| #include "mlir/Transforms/RegionUtils.h" |
| |
| namespace mlir { |
| namespace iree_compiler { |
| |
| namespace { |
| // Pattern to get the gpu.GPUModuleOp from the gpu.LaunchOp. |
| struct ConvertToGPUFuncOp : public OpRewritePattern<gpu::LaunchOp> { |
| using OpRewritePattern<gpu::LaunchOp>::OpRewritePattern; |
| LogicalResult matchAndRewrite(gpu::LaunchOp launchOp, |
| PatternRewriter &rewriter) const final; |
| }; |
| |
| // Pass to outline the region of the gpu.LaunchOp. |
| class IREEGpuKernelOutliningPass |
| : public ModulePass<IREEGpuKernelOutliningPass> { |
| public: |
| void runOnModule() override; |
| }; |
| } // namespace |
| |
| LogicalResult ConvertToGPUFuncOp::matchAndRewrite( |
| gpu::LaunchOp launchOp, PatternRewriter &rewriter) const { |
| OpBuilder::InsertionGuard guard(rewriter); |
| auto funcOp = launchOp.getParentOfType<FuncOp>(); |
| SmallVector<int32_t, 3> workGroupSize; |
| if (failed(getWorkGroupSize(funcOp, workGroupSize))) return failure(); |
| |
| if (failed(sinkOperationsIntoLaunchOp(launchOp))) return failure(); |
| |
| // The arguments of the funcOp must be the arguments of the launchOp, in the |
| // same order. |
| SmallVector<Value, 4> arguments(funcOp.args_begin(), funcOp.args_end()); |
| gpu::GPUFuncOp gpuFuncOp = |
| outlineKernelFunc(launchOp, funcOp.getName(), arguments); |
| |
| // Add the SPIR-V ABI attr here since it is needed for the SPIR-V lowering. |
| // TODO(ravishankarm/antiagainst) : When there is a mirror of the |
| // workgroup-size attribute in GPU dialect use that instead. |
| StringRef abiAttrName = spirv::getEntryPointABIAttrName(); |
| auto abiAttr = |
| spirv::getEntryPointABIAttr(workGroupSize, rewriter.getContext()); |
| gpuFuncOp.setAttr(abiAttrName, abiAttr); |
| |
| // If any additional arguments are needed, then the launch op cannot be |
| // converted. |
| if (arguments.size() != gpuFuncOp.getNumArguments()) return failure(); |
| |
| // Wrap this within a gpu.module |
| rewriter.setInsertionPoint(funcOp); |
| std::string moduleName = Twine(funcOp.getName(), "_gpumodule").str(); |
| auto kernelModule = |
| rewriter.create<gpu::GPUModuleOp>(funcOp.getLoc(), moduleName); |
| SymbolTable symbolTable(kernelModule); |
| symbolTable.insert(gpuFuncOp); |
| |
| // Set the conversion target attributes on the GPU module. |
| auto targetEnvAttrName = spirv::getTargetEnvAttrName(); |
| kernelModule.setAttr(targetEnvAttrName, |
| spirv::lookupTargetEnvOrDefault(funcOp)); |
| |
| rewriter.eraseOp(launchOp); |
| return success(); |
| } |
| |
| void IREEGpuKernelOutliningPass::runOnModule() { |
| OwningRewritePatternList patterns; |
| ModuleOp moduleOp = getModule(); |
| SmallVector<gpu::LaunchOp, 1> gpuLaunchOp; |
| moduleOp.walk( |
| [&gpuLaunchOp](gpu::LaunchOp op) { gpuLaunchOp.push_back(op); }); |
| if (!mlir::has_single_element(gpuLaunchOp)) { |
| moduleOp.emitError( |
| "expected single gpu.launch operation within translation module"); |
| return signalPassFailure(); |
| } |
| patterns.insert<ConvertToGPUFuncOp>(moduleOp.getContext()); |
| applyPatternsGreedily(moduleOp.getOperation(), patterns); |
| } |
| |
| std::unique_ptr<OpPassBase<ModuleOp>> createIREEGpuKernelOutliningPass() { |
| return std::make_unique<IREEGpuKernelOutliningPass>(); |
| } |
| } // namespace iree_compiler |
| } // namespace mlir |