[Flow] Fixed dropped dim computations to handle some ambiguous cases. (#15035)
The rank-reduced version of `flow.dispatch.tensor.load/store`, suffers
from the same issue that upstream `tensor.extract_slice/insert_slice`
suffers from. The dropped dims computation is inherently ambiguous. This
is ongoing work (see
https://github.com/openxla/iree/pull/14851). Here once the number of
dropped dimensions have been found (while iterating from outer to inner)
no other dimensions need to be dropped.
Fixes #15016
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
index 62547e8..a5925f4 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
@@ -109,10 +109,12 @@
ArrayRef<OpFoldResult> mixedSizes) {
ArrayRef<int64_t> resultShape = slicedObjectType.getShape();
llvm::SmallBitVector droppedDims(mixedSizes.size());
- if (slicedObjectType.getRank() == mixedSizes.size()) {
+ size_t maxDroppedDims = mixedSizes.size() - resultShape.size();
+ if (maxDroppedDims == 0) {
return droppedDims;
}
unsigned shapePos = 0;
+ int numSet = 0;
for (const auto &size : llvm::enumerate(mixedSizes)) {
std::optional<int64_t> sizeVal = getConstantIntValue(size.value());
// If the size is not 1, or if the current matched dimension of the result
@@ -124,6 +126,10 @@
continue;
}
droppedDims.set(size.index());
+ numSet++;
+ if (numSet == maxDroppedDims) {
+ break;
+ }
}
return droppedDims;
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir b/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir
index 6f579c1..be71444 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir
@@ -695,3 +695,17 @@
%1 = flow.tensor.reshape %0 : tensor<?x4xf32>{%arg1} -> tensor<?x?xf32>{%arg2, %arg3}
return %1 : tensor<?x?xf32>
}
+
+// -----
+
+func.func @innermost_unit_dim(%4: !flow.dispatch.tensor<readonly:tensor<3x1x16x257x88xf16>>,
+ %arg0: index, %arg2 : index, %10 : index, %9 : index) -> tensor<?x?x?xf16> {
+ %c16 = arith.constant 16 : index
+ %c1 = arith.constant 1 : index
+ %11 = flow.dispatch.tensor.load %4, offsets = [1, 0, %arg0, %10, %arg2], sizes = [1, 1, %c16, %9, %c1], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<3x1x16x257x88xf16>> -> tensor<?x?x?xf16>
+ return %11 : tensor<?x?x?xf16>
+}
+// CHECK-LABEL: func @innermost_unit_dim
+// CHECK-SAME: %[[DYNAMIC_DIM:[a-zA-Z0-9]+]]: index)
+// CHECK: flow.dispatch.tensor.load
+// CHECK-SAME: sizes = [1, 1, 16, %[[DYNAMIC_DIM]], 1]