| // Copyright 2021 The IREE Authors |
| // |
| // Licensed under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| #include "iree-dialects/Dialect/LinalgExt/IR/TiledOpInterface.h" |
| |
| #include "mlir/Dialect/Affine/IR/AffineOps.h" |
| #include "mlir/Dialect/Func/IR/FuncOps.h" |
| #include "mlir/Dialect/Linalg/IR/Linalg.h" |
| #include "mlir/Dialect/Tensor/IR/Tensor.h" |
| #include "mlir/Dialect/Utils/StaticValueUtils.h" |
| #include "llvm/ADT/SmallBitVector.h" |
| #include "llvm/Support/Debug.h" |
| |
| #define DEBUG_TYPE "iree-tiled-op-interface" |
| |
| using namespace mlir; |
| namespace IREE = mlir::iree_compiler::IREE; |
| using namespace IREE::LinalgExt; |
| |
| #include "iree-dialects/Dialect/LinalgExt/IR/TiledOpInterface.cpp.inc" |
| |
| /// Converts an `OpFoldResult` to a `Value` by building a constant op if |
| /// if the `OpFoldResult` is an `IntegerAttr`. |
| static Value getValue(OpBuilder &builder, Location loc, |
| OpFoldResult valueOrAttr) { |
| if (auto attr = valueOrAttr.dyn_cast<Attribute>()) { |
| return builder.create<arith::ConstantIndexOp>( |
| loc, attr.cast<IntegerAttr>().getInt()); |
| } |
| return valueOrAttr.get<Value>(); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Interface implementations for external operations. |
| //===----------------------------------------------------------------------===// |
| |
| namespace { |
| |
| /// External model for `tensor.extract_slice`. |
| struct ExtractSliceTiledOpInterface |
| : public TiledOpInterface::ExternalModel<ExtractSliceTiledOpInterface, |
| tensor::ExtractSliceOp> { |
| SmallVector<Value> getDestinationOperands(Operation *op, OpBuilder &b) const { |
| // No operand of `tensor.extract_slice` serves as a destination operand. So |
| // create an `init_tensor` op of the same size as the result. |
| auto extractSliceOp = cast<tensor::ExtractSliceOp>(op); |
| SmallVector<Value> dest; |
| ReifiedRankedShapedTypeDims returnShape; |
| (void)extractSliceOp.reifyResultShapes(b, returnShape); |
| auto ofrShape = llvm::to_vector<4>(llvm::map_range( |
| returnShape[0], [](Value v) { return getAsOpFoldResult(v); })); |
| Value initTensor = b.create<linalg::InitTensorOp>( |
| op->getLoc(), ofrShape, extractSliceOp.getType().getElementType()); |
| return {initTensor}; |
| } |
| |
| SmallVector<StringRef> getLoopIteratorTypes(Operation *op) const { |
| auto extractSliceOp = cast<tensor::ExtractSliceOp>(op); |
| return SmallVector<StringRef>(extractSliceOp.getType().getRank(), |
| getParallelIteratorTypeName()); |
| } |
| |
| SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const { |
| auto extractSliceOp = cast<tensor::ExtractSliceOp>(op); |
| SmallVector<Value> dest; |
| ReifiedRankedShapedTypeDims returnShape; |
| (void)extractSliceOp.reifyResultShapes(b, returnShape); |
| Location loc = op->getLoc(); |
| Value zero = b.create<arith::ConstantIndexOp>(loc, 0); |
| Value one = b.create<arith::ConstantIndexOp>(loc, 1); |
| SmallVector<Range> loopRanges(returnShape[0].size(), |
| Range{zero, nullptr, one}); |
| for (auto ub : enumerate(returnShape[0])) { |
| loopRanges[ub.index()].size = ub.value(); |
| } |
| return loopRanges; |
| } |
| |
| Operation *getTiledImplementation(Operation *op, OpBuilder &b, |
| ValueRange outputs, |
| ArrayRef<OpFoldResult> offsets, |
| ArrayRef<OpFoldResult> sizes, |
| SmallVectorImpl<Value> &results) const { |
| auto extractOp = cast<tensor::ExtractSliceOp>(op); |
| // Check that strides are 1. For now abort if they arent |
| Location loc = extractOp.getLoc(); |
| auto oneAttr = b.getI64IntegerAttr(1); |
| |
| // Compute the offset and sizes for the tiled `tensor.extract_slice` |
| // operation. |
| llvm::SmallBitVector droppedDims = extractOp.getDroppedDims(); |
| unsigned resultDimPos = 0; |
| auto opOffsets = extractOp.getMixedOffsets(); |
| auto opSizes = extractOp.getMixedSizes(); |
| auto opStrides = extractOp.getMixedStrides(); |
| MLIRContext *context = b.getContext(); |
| SmallVector<OpFoldResult> newOffset, newSizes, newStrides; |
| for (auto opOffset : enumerate(opOffsets)) { |
| // If the dimension is dropped, use the same offset. |
| if (droppedDims.test(opOffset.index())) { |
| newOffset.push_back(opOffset.value()); |
| newSizes.push_back(opSizes[opOffset.index()]); |
| } else { |
| AffineExpr d0, s0, s1; |
| bindDims(context, d0); |
| bindSymbols(context, s0, s1); |
| AffineMap map = AffineMap::get(1, 2, d0 * s0 + s1); |
| SmallVector<Value> operands = { |
| getValue(b, loc, offsets[resultDimPos]), |
| getValue(b, loc, opStrides[opOffset.index()]), |
| getValue(b, loc, opOffset.value())}; |
| Value offset = b.create<AffineApplyOp>(loc, map, operands); |
| newOffset.push_back(offset); |
| newSizes.push_back(sizes[resultDimPos]); |
| resultDimPos++; |
| } |
| newStrides.push_back(opStrides[opOffset.index()]); |
| } |
| |
| // Generate the tiled `tensor.extract_slice` operation. |
| Type resultType = tensor::ExtractSliceOp::inferRankReducedResultType( |
| extractOp.getType().getRank(), extractOp.getSourceType(), newOffset, |
| newSizes, newStrides); |
| auto tiledExtractOp = b.create<tensor::ExtractSliceOp>( |
| loc, resultType.cast<RankedTensorType>(), extractOp.source(), newOffset, |
| newSizes, newStrides); |
| |
| // Insert the tiled extract into the result tensor. |
| SmallVector<OpFoldResult> resultStrides(offsets.size(), oneAttr); |
| auto tiledInsertOp = b.create<tensor::InsertSliceOp>( |
| loc, tiledExtractOp.result(), outputs[0], offsets, sizes, |
| resultStrides); |
| results.push_back(tiledInsertOp.result()); |
| return tiledExtractOp; |
| } |
| }; |
| |
| struct InsertSliceTiledOpInterface |
| : public TiledOpInterface::ExternalModel<InsertSliceTiledOpInterface, |
| tensor::InsertSliceOp> { |
| SmallVector<Value> getDestinationOperands(Operation *op, OpBuilder &b) const { |
| SmallVector<Value> dest; |
| dest.push_back(cast<tensor::InsertSliceOp>(op).dest()); |
| return dest; |
| } |
| |
| SmallVector<StringRef> getLoopIteratorTypes(Operation *op) const { |
| auto insertSliceOp = cast<tensor::InsertSliceOp>(op); |
| return SmallVector<StringRef>(insertSliceOp.getSourceType().getRank(), |
| getParallelIteratorTypeName()); |
| } |
| |
| SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const { |
| auto insertSliceOp = cast<tensor::InsertSliceOp>(op); |
| Value source = insertSliceOp.source(); |
| RankedTensorType sourceType = insertSliceOp.getSourceType(); |
| Location loc = op->getLoc(); |
| Value zero = b.create<arith::ConstantIndexOp>(loc, 0); |
| Value one = b.create<arith::ConstantIndexOp>(loc, 1); |
| SmallVector<Range> loopBounds(sourceType.getRank(), |
| Range{zero, nullptr, one}); |
| for (auto dim : |
| llvm::seq<int64_t>(0, insertSliceOp.getSourceType().getRank())) { |
| loopBounds[dim].size = b.create<tensor::DimOp>(loc, source, dim); |
| } |
| return loopBounds; |
| } |
| |
| Operation *getTiledImplementation(Operation *op, OpBuilder &b, |
| ValueRange outputs, |
| ArrayRef<OpFoldResult> offsets, |
| ArrayRef<OpFoldResult> sizes, |
| SmallVectorImpl<Value> &results) const { |
| auto insertOp = cast<tensor::InsertSliceOp>(op); |
| // Compute a subtensor of the source based on the offsets. |
| auto opStrides = insertOp.getMixedStrides(); |
| if (!llvm::all_of(opStrides, [&](OpFoldResult valueOrAttr) { |
| Optional<int64_t> intVal = getConstantIntValue(valueOrAttr); |
| return intVal && *intVal == 1; |
| })) { |
| op->emitOpError("unable to tile operation with non-unit stride"); |
| return nullptr; |
| } |
| MLIRContext *context = b.getContext(); |
| Location loc = insertOp.getLoc(); |
| auto oneAttr = b.getI64IntegerAttr(1); |
| SmallVector<OpFoldResult> strides(offsets.size(), oneAttr); |
| auto extractSliceOp = b.create<tensor::ExtractSliceOp>( |
| loc, insertOp.source(), offsets, sizes, strides); |
| |
| // The offsets for the insert is based on the op offsets plus the offsets of |
| // the loops passed in. |
| auto opOffsets = insertOp.getMixedOffsets(); |
| auto opSizes = insertOp.getMixedSizes(); |
| unsigned offsetIndex = 0; |
| ArrayRef<int64_t> sourceShape = insertOp.getSourceType().getShape(); |
| int64_t destRank = insertOp.getType().getRank(); |
| SmallVector<OpFoldResult> resultOffsets(destRank); |
| SmallVector<OpFoldResult> resultSizes(destRank); |
| for (auto opOffset : llvm::enumerate(opOffsets)) { |
| // Check for rank-reducing by checking that |
| // 1) The corresponding opSize value is 1 |
| // 2) The current rank of the source is not 1. |
| // Then the opOffset is for the rank-reduced dimension. Skip. |
| unsigned opOffsetIndex = opOffset.index(); |
| OpFoldResult opOffsetVal = opOffset.value(); |
| Optional<int64_t> opSizeVal = getConstantIntValue(opSizes[opOffsetIndex]); |
| if (offsetIndex >= sourceShape.size() || |
| (opSizeVal && *opSizeVal == 1 && sourceShape[offsetIndex] != 1)) { |
| resultOffsets[opOffsetIndex] = opOffsetVal; |
| resultSizes[opOffsetIndex] = oneAttr; |
| continue; |
| } |
| OpFoldResult offset = offsets[offsetIndex]; |
| if (opOffsetVal.is<Attribute>() && offset.is<Attribute>()) { |
| resultOffsets[opOffsetIndex] = b.getI64IntegerAttr( |
| *getConstantIntValue(opOffsetVal) + *getConstantIntValue(offset)); |
| } else { |
| AffineExpr d0, s0; |
| bindDims(context, d0); |
| bindSymbols(context, s0); |
| AffineMap map = AffineMap::get(1, 1, d0 + s0); |
| SmallVector<Value> operands = {getValue(b, loc, offset), |
| getValue(b, loc, opOffsetVal)}; |
| resultOffsets[opOffsetIndex] = |
| b.create<AffineApplyOp>(loc, map, operands).getResult(); |
| } |
| resultSizes[opOffsetIndex] = sizes[offsetIndex]; |
| offsetIndex++; |
| } |
| SmallVector<OpFoldResult> resultStrides(destRank, oneAttr); |
| auto tiledInsertOp = b.create<tensor::InsertSliceOp>( |
| loc, extractSliceOp.result(), outputs[0], resultOffsets, resultSizes, |
| resultStrides); |
| results.push_back(tiledInsertOp.result()); |
| return extractSliceOp; |
| } |
| }; |
| |
| /// Forwards the implementation of `TiledOpInterface` to upstream |
| /// `TilingInterface`. Note that this forwarding is only valid when the |
| /// iteration space is same as the data space of the result(s). This is due to |
| /// the difference in the tiling algorithm being developed around |
| /// `TilingInterface` and that used with `TiledOpInterface`. The difference |
| /// comes down to the former only needing the tiled operation, and not the value |
| /// of the whole tensor. |
| template <typename OpTy> |
| struct ForwardToTilingInterface |
| : public TiledOpInterface::ExternalModel<ForwardToTilingInterface<OpTy>, |
| OpTy> { |
| SmallVector<Value> getDestinationOperands(Operation *op, OpBuilder &b) const { |
| return cast<OpTy>(op).getDestinationOperands(b); |
| } |
| |
| SmallVector<StringRef> getLoopIteratorTypes(Operation *op) const { |
| return cast<OpTy>(op).getLoopIteratorTypes(); |
| } |
| SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const { |
| return cast<OpTy>(op).getIterationDomain(b); |
| } |
| Operation *getTiledImplementation(Operation *op, OpBuilder &b, |
| ValueRange dest, |
| ArrayRef<OpFoldResult> offsets, |
| ArrayRef<OpFoldResult> sizes, |
| SmallVectorImpl<Value> &results) const { |
| SmallVector<Operation *> tiledOps = cast<OpTy>(op).getTiledImplementation( |
| b, dest, offsets, sizes, /*tileDestOperands=*/true); |
| if (tiledOps.empty()) { |
| op->emitOpError("failed to tile operation"); |
| return nullptr; |
| } |
| assert(tiledOps.size() == 1 && "expected single tiled op"); |
| Operation *tiledOp = tiledOps.front(); |
| if (tiledOp->getNumResults() != dest.size()) { |
| op->emitOpError( |
| "mismatch in the number of results of the tiled operation and the " |
| "number of results expected"); |
| return nullptr; |
| } |
| Location loc = op->getLoc(); |
| auto oneAttr = b.getI64IntegerAttr(1); |
| SmallVector<OpFoldResult> strides(offsets.size(), oneAttr); |
| for (auto result : llvm::enumerate(tiledOp->getResults())) { |
| // Assume that the shape of the result is same as the loop bounds of the |
| // op. This implies the result can be inserted into the `dest` at |
| // `offsets` and `sizes`. This would be illegal if that is not the |
| // case. This is a point of difference between the `TiledOpInterface` in |
| // IREE and `TilingInterface` in MLIR, since the latter sees fusion and |
| // tiling as the same things. So it returns just the tiled op, and not the |
| // result of the full tensor as the current tiling algorithm expects. |
| auto tiledInsertOp = b.create<tensor::InsertSliceOp>( |
| loc, result.value(), dest[result.index()], offsets, sizes, strides); |
| results.push_back(tiledInsertOp); |
| } |
| return tiledOp; |
| } |
| }; |
| |
| } // namespace |
| |
| void IREE::LinalgExt::registerTiledOpInterfaceExternalModels( |
| DialectRegistry ®istry) { |
| LLVM_DEBUG( |
| { llvm::dbgs() << "Adding external models of tiled op interface\n"; }); |
| |
| registry.addExtension(+[](MLIRContext *ctx, tensor::TensorDialect *dialect) { |
| tensor::ExtractSliceOp::attachInterface<ExtractSliceTiledOpInterface>(*ctx); |
| tensor::InsertSliceOp::attachInterface<InsertSliceTiledOpInterface>(*ctx); |
| }); |
| |
| // TODO(ravishankarm): Needs custom PadTiledOpInterface or equiv. |
| // registry.addOpInterface<tensor::PadOp, |
| // ForwardToTilingInterface<tensor::PadOp>>(); |
| } |