[GPU][DT] Add support for materializing tensor.empty and linalg.fill ops (#18563)
The revisions moves the materialization patterns of tensor.empty and
linalg.fill to "populateShapeIndependentMaterializeEncodingPatterns"
set; updates the comments. This set of patterns lower the ops with
encodings to the same op with materialized types.
It adds the tile swizzle shape inference to the tensor.empty pattern and
moves the utility to the "Utility methods" section without changes.
This is a step towards https://github.com/iree-org/iree/issues/18554
---------
Signed-off-by: hanhanW <hanhan0912@gmail.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp
index 21f988f..9725432 100644
--- a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp
@@ -466,7 +466,7 @@
auto materializeEncodingValueFn = getMaterializeEncodingValueFn(targetAttr);
populateMaterializeEncodingIntoPackUnPackPatterns(
materializeEncodingPattern, typeConverter, materializeEncodingValueFn);
- populateIREEMaterializeEncodingIntoPackUnPackPatterns(
+ populateShapeIndependentMaterializeEncodingPatterns(
materializeEncodingPattern, target, typeConverter,
materializeEncodingValueFn);
diff --git a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h
index bfe1902..b7d75c9 100644
--- a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h
+++ b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h
@@ -129,9 +129,10 @@
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn);
-/// Pouplates the set of patterns that lowers IREE dialect (e.g., Flow, Hal,
-/// etc) ops with encoding types to pack/unpack ops.
-void populateIREEMaterializeEncodingIntoPackUnPackPatterns(
+/// Pouplates the set of patterns that lowers shape-like operations (e.g., Flow
+/// ops, Hal ops, tensor.empty, linalg.fill, etc) with encoding types to the
+/// same op with materialized shapes.
+void populateShapeIndependentMaterializeEncodingPatterns(
RewritePatternSet &patterns, MaterializeEncodingConversionTarget &target,
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn);
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMaterializeEncoding.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMaterializeEncoding.cpp
index feb5553..9275d4e 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMaterializeEncoding.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMaterializeEncoding.cpp
@@ -489,7 +489,7 @@
MaterializeEncodingValueFn materializeEncodingValueFn =
[](RankedTensorType, OpBuilder,
Location) -> FailureOr<MaterializeEncodingValueInfo> { return {}; };
- populateIREEMaterializeEncodingIntoPackUnPackPatterns(
+ populateShapeIndependentMaterializeEncodingPatterns(
patterns, target, typeConverter, materializeEncodingValueFn);
patterns.insert<GPUSetEncodingOpLoweringConversion,
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding.mlir
index 209b29c..5b0be32 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding.mlir
@@ -13,6 +13,28 @@
#hal.pipeline.binding<storage_buffer>,
#hal.pipeline.binding<storage_buffer>
]>
+func.func @empty_fill_encoding_unroll8x8x4_MFMA_F32_16x16x4_F32() {
+ %c0 = arith.constant 0 : index
+ %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<255x513xf32, #encoding>>
+ %cst = arith.constant 0.0 : f32
+ %1 = tensor.empty() : tensor<255x513xf32, #encoding>
+ %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<255x513xf32, #encoding>) -> tensor<255x513xf32, #encoding>
+ flow.dispatch.tensor.store %2, %0, offsets = [0, 0], sizes = [255, 513], strides = [1, 1] : tensor<255x513xf32, #encoding> -> !flow.dispatch.tensor<writeonly:tensor<255x513xf32, #encoding>>
+ return
+}
+// CHECK-LABEL: func.func @empty_fill_encoding_unroll8x8x4_MFMA_F32_16x16x4_F32
+// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<2x33x8x4x16x4xf32>
+// CHECK: %{{.+}} = linalg.fill ins({{.+}}) outs(%[[EMPTY]]
+
+// -----
+
+#encoding = #iree_encoding.encoding<operand_index = 0, op_type = matmul, element_types = [f32, f32, f32], original_type = tensor<255x513xf32>,
+ user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>],
+ round_dims_to = array<i64: 16, 16, 16>>
+#pipeline_layout = #hal.pipeline.layout<bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
+]>
func.func @set_encoding_LHS_unroll8x8x4_MFMA_F32_16x16x4_F32() {
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<255x513xf32>>
diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp
index 8024a7a..6a7230d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp
@@ -49,7 +49,7 @@
MaterializeEncodingConversionTarget target(*context);
populateMaterializeEncodingIntoPackUnPackPatterns(
materializeEncodingPattern, typeConverter, materializeEncodingValueFn);
- populateIREEMaterializeEncodingIntoPackUnPackPatterns(
+ populateShapeIndependentMaterializeEncodingPatterns(
materializeEncodingPattern, target, typeConverter,
materializeEncodingValueFn);
diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp
index 8521832..d487e95 100644
--- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp
@@ -31,6 +31,33 @@
// Utility methods
//===---------------------------------------------------------------------===//
+// Utility to apply a tile-swizzling to a packed shape.
+static SmallVector<OpFoldResult>
+getSwizzledShape(ArrayRef<OpFoldResult> packedShape,
+ MaterializeEncodingInfo encodingInfo) {
+ if (packedShape.empty() || !encodingInfo.swizzle) {
+ return SmallVector<OpFoldResult>(packedShape);
+ }
+
+ int64_t srcRank = packedShape.size() - encodingInfo.innerTileSizes.size();
+ SmallVector<int64_t> perm = llvm::to_vector(llvm::seq<int64_t>(0, srcRank));
+ for (auto i : encodingInfo.swizzle->permutation) {
+ perm.push_back(i + srcRank);
+ }
+
+ SmallVector<OpFoldResult> newShape(packedShape.take_front(srcRank));
+ SmallVector<int64_t> expandedTileShape =
+ getExpandedTileShape(encodingInfo.swizzle->expandShape);
+ MLIRContext *ctx = packedShape[0].getContext();
+ Builder b(ctx);
+ for (int64_t d : expandedTileShape) {
+ newShape.push_back(b.getIndexAttr(d));
+ }
+ applyPermutationToVector(newShape, perm);
+
+ return newShape;
+}
+
static Operation *dropEncodingAndCloneOp(OpBuilder &builder, Operation *op,
ValueRange convertedInputOperands,
ValueRange convertedOutputOperands) {
@@ -368,6 +395,7 @@
SmallVector<OpFoldResult> newShape = tensor::PackOp::getResultShape(
rewriter, loc, sourceDims, *innerTileSizesOfr, encodingInfo->innerDimsPos,
encodingInfo->outerDimsPerm);
+ newShape = getSwizzledShape(newShape, *encodingInfo);
Operation *newEmptyOp = rewriter.create<tensor::EmptyOp>(
loc, newShape, emptyType.getElementType());
return newEmptyOp;
@@ -507,33 +535,6 @@
.Default([](Operation *op) { return failure(); });
}
-// Utility to apply a tile-swizzling to a packed shape.
-static SmallVector<OpFoldResult>
-getSwizzledShape(ArrayRef<OpFoldResult> packedShape,
- MaterializeEncodingInfo encodingInfo) {
- if (packedShape.empty() || !encodingInfo.swizzle) {
- return SmallVector<OpFoldResult>(packedShape);
- }
-
- int64_t srcRank = packedShape.size() - encodingInfo.innerTileSizes.size();
- SmallVector<int64_t> perm = llvm::to_vector(llvm::seq<int64_t>(0, srcRank));
- for (auto i : encodingInfo.swizzle->permutation) {
- perm.push_back(i + srcRank);
- }
-
- SmallVector<OpFoldResult> newShape(packedShape.take_front(srcRank));
- SmallVector<int64_t> expandedTileShape =
- getExpandedTileShape(encodingInfo.swizzle->expandShape);
- MLIRContext *ctx = packedShape[0].getContext();
- Builder b(ctx);
- for (int64_t d : expandedTileShape) {
- newShape.push_back(b.getIndexAttr(d));
- }
- applyPermutationToVector(newShape, perm);
-
- return newShape;
-}
-
/// For `dispatchTensorType` that bind a `RankedTensorType` with encoding,
/// returns the materialized shape of the `dispatchTensorType`. The
/// dynamic dimensions of the `dispatchTensorType` are provided in
@@ -818,6 +819,11 @@
};
/// Generic pattern to convert operation that is in Destination Passing Style.
+/// TODO(hanchung): Implement a different pattern for non-elementwise
+/// operations. Because they should implement their own patterns based on
+/// backends. The elementwise operations are just like shape-like op in
+/// data-tiling concept. They still have the same computation but with different
+/// shapes.
template <typename OpTy>
struct MaterializeDPSOperation : public OpMaterializeEncodingPattern<OpTy> {
using OpMaterializeEncodingPattern<OpTy>::OpMaterializeEncodingPattern;
@@ -914,16 +920,14 @@
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn) {
MLIRContext *context = patterns.getContext();
- patterns.insert<MaterializeDPSOperation<linalg::FillOp>,
- MaterializeDPSOperation<linalg::GenericOp>,
- MaterializeOperation<tensor::EmptyOp>,
+ patterns.insert<MaterializeDPSOperation<linalg::GenericOp>,
MaterializeContractionOp, SetEncodingOpToPackOpConversion,
UnsetEncodingOpToUnPackOpConversion>(
context, typeConverter, materializeEncodingValueFn);
memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns);
}
-void populateIREEMaterializeEncodingIntoPackUnPackPatterns(
+void populateShapeIndependentMaterializeEncodingPatterns(
RewritePatternSet &patterns, MaterializeEncodingConversionTarget &target,
MaterializeEncodingTypeConverter &typeConverter,
MaterializeEncodingValueFn materializeEncodingValueFn) {
@@ -949,7 +953,9 @@
return resultType == typeConverter.convertType(resultType);
});
- patterns.insert<MaterializeFlowDispatchTensorLoadOp,
+ patterns.insert<MaterializeDPSOperation<linalg::FillOp>,
+ MaterializeOperation<tensor::EmptyOp>,
+ MaterializeFlowDispatchTensorLoadOp,
MaterializeFlowDispatchTensorStoreOp,
MaterializeInterfaceBindingEncoding>(
context, typeConverter, materializeEncodingValueFn);