[Codegen][NFC] Add dynamic tests for DecomposeBoundaryPackUnPackOpsPass (#19079)
This adds some more tests to decompose_boundary_pack_unpack_ops.mlir to
test dynamic shapes, and uses util.optimization_barrier in existing
tests to separate independent parts of the test functions.
Signed-off-by: Max Dawkins <max.dawkins@gmail.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir
index 6ff5bed..6be2a7d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir
@@ -11,7 +11,8 @@
%src = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x16xf32>> -> tensor<16x16xf32>
%dest = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
%pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32>
- flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
+ %barrier = util.optimization_barrier %pack : tensor<4x4x4x4xf32>
+ flow.dispatch.tensor.store %barrier, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
return
}
// CHECK-LABEL: func.func @pack_at_source
@@ -30,8 +31,8 @@
%src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
%dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>> -> tensor<16x16xf32>
%unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
- %copy = linalg.copy ins(%unpack : tensor<16x16xf32>) outs(%dest : tensor<16x16xf32>) -> tensor<16x16xf32>
- flow.dispatch.tensor.store %copy, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
+ %barrier = util.optimization_barrier %unpack : tensor<16x16xf32>
+ flow.dispatch.tensor.store %barrier, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
return
}
// CHECK-LABEL: func.func @unpack_at_source
@@ -49,9 +50,8 @@
%1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
%src = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x16xf32>> -> tensor<16x16xf32>
%dest = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
- %empty = tensor.empty() : tensor<16x16xf32>
- %copy = linalg.copy ins(%src : tensor<16x16xf32>) outs(%empty : tensor<16x16xf32>) -> tensor<16x16xf32>
- %pack = tensor.pack %copy inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32>
+ %barrier = util.optimization_barrier %src : tensor<16x16xf32>
+ %pack = tensor.pack %barrier inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32>
flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
return
}
@@ -70,7 +70,8 @@
%1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
%src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
%dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>> -> tensor<16x16xf32>
- %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
+ %barrier = util.optimization_barrier %src : tensor<4x4x4x4xf32>
+ %unpack = tensor.unpack %barrier inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
return
}
@@ -118,6 +119,53 @@
// -----
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
+]>
+func.func @dynamic_pack() {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f32
+ %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
+ %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
+ %2 = arith.index_castui %0 : i32 to index
+ %3 = arith.index_castui %1 : i32 to index
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %2}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<?x?x4x4xf32>>{%3, %3}
+ %src = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %2], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %2} -> tensor<?x?xf32>
+ %dest = flow.dispatch.tensor.load %5, offsets = [0, 0, 0, 0], sizes = [%3, %3, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<?x?x4x4xf32>>{%3, %3} -> tensor<?x?x4x4xf32>
+ %pack = tensor.pack %src padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<?x?xf32> -> tensor<?x?x4x4xf32>
+ flow.dispatch.tensor.store %pack, %5, offsets = [0, 0, 0, 0], sizes = [%3, %3, 4, 4], strides = [1, 1, 1, 1] : tensor<?x?x4x4xf32> -> !flow.dispatch.tensor<readwrite:tensor<?x?x4x4xf32>>{%3, %3}
+ return
+}
+// CHECK-LABEL: func.func @dynamic_pack
+// CHECK: tensor.pack
+
+// -----
+
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+ #hal.pipeline.binding<storage_buffer>,
+ #hal.pipeline.binding<storage_buffer>
+]>
+func.func @dynamic_unpack() {
+ %c0 = arith.constant 0 : index
+ %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
+ %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
+ %2 = arith.index_castui %0 : i32 to index
+ %3 = arith.index_castui %1 : i32 to index
+ %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x4x4xf32>>{%2, %2}
+ %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%3, %3}
+ %src = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0], sizes = [%2, %2, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x4x4xf32>>{%2, %2} -> tensor<?x?x4x4xf32>
+ %dest = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%3, %3], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%3, %3} -> tensor<?x?xf32>
+ %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<?x?x4x4xf32> -> tensor<?x?xf32>
+ flow.dispatch.tensor.store %unpack, %5, offsets = [0, 0], sizes = [%3, %3], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%3, %3}
+ return
+}
+// CHECK-LABEL: func.func @dynamic_unpack
+// CHECK: tensor.unpack
+
+// -----
+
#pipeline_layout = #hal.pipeline.layout<bindings = [
#hal.pipeline.binding<storage_buffer>,
#hal.pipeline.binding<storage_buffer>