[Codegen][NFC] Add dynamic tests for DecomposeBoundaryPackUnPackOpsPass (#19079)

This adds some more tests to decompose_boundary_pack_unpack_ops.mlir to
test dynamic shapes, and uses util.optimization_barrier in existing
tests to separate independent parts of the test functions.

Signed-off-by: Max Dawkins <max.dawkins@gmail.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir
index 6ff5bed..6be2a7d 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/decompose_boundary_pack_unpack_ops.mlir
@@ -11,7 +11,8 @@
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x16xf32>> -> tensor<16x16xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
   %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32>
-  flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
+  %barrier = util.optimization_barrier %pack : tensor<4x4x4x4xf32>
+  flow.dispatch.tensor.store %barrier, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
   return
 }
 // CHECK-LABEL: func.func @pack_at_source
@@ -30,8 +31,8 @@
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>> -> tensor<16x16xf32>
   %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
-  %copy = linalg.copy ins(%unpack : tensor<16x16xf32>) outs(%dest : tensor<16x16xf32>) -> tensor<16x16xf32>
-  flow.dispatch.tensor.store %copy, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
+  %barrier = util.optimization_barrier %unpack : tensor<16x16xf32>
+  flow.dispatch.tensor.store %barrier, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
   return
 }
 // CHECK-LABEL: func.func @unpack_at_source
@@ -49,9 +50,8 @@
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<16x16xf32>> -> tensor<16x16xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
-  %empty = tensor.empty() : tensor<16x16xf32>
-  %copy = linalg.copy ins(%src : tensor<16x16xf32>) outs(%empty : tensor<16x16xf32>) -> tensor<16x16xf32>
-  %pack = tensor.pack %copy inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32>
+  %barrier = util.optimization_barrier %src : tensor<16x16xf32>
+  %pack = tensor.pack %barrier inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<16x16xf32> -> tensor<4x4x4x4xf32>
   flow.dispatch.tensor.store %pack, %1, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : tensor<4x4x4x4xf32> -> !flow.dispatch.tensor<readwrite:tensor<4x4x4x4xf32>>
   return
 }
@@ -70,7 +70,8 @@
   %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
   %src = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0], sizes = [4, 4, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<4x4x4x4xf32>> -> tensor<4x4x4x4xf32>
   %dest = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<16x16xf32>> -> tensor<16x16xf32>
-  %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
+  %barrier = util.optimization_barrier %src : tensor<4x4x4x4xf32>
+  %unpack = tensor.unpack %barrier inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<4x4x4x4xf32> -> tensor<16x16xf32>
   flow.dispatch.tensor.store %unpack, %1, offsets = [0, 0], sizes = [16, 16], strides = [1, 1] : tensor<16x16xf32> -> !flow.dispatch.tensor<readwrite:tensor<16x16xf32>>
   return
 }
@@ -118,6 +119,53 @@
 
 // -----
 
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+  #hal.pipeline.binding<storage_buffer>,
+  #hal.pipeline.binding<storage_buffer>
+]>
+func.func @dynamic_pack() {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
+  %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
+  %2 = arith.index_castui %0 : i32 to index
+  %3 = arith.index_castui %1 : i32 to index
+  %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %2}
+  %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<?x?x4x4xf32>>{%3, %3}
+  %src = flow.dispatch.tensor.load %4, offsets = [0, 0], sizes = [%2, %2], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?xf32>>{%2, %2} -> tensor<?x?xf32>
+  %dest = flow.dispatch.tensor.load %5, offsets = [0, 0, 0, 0], sizes = [%3, %3, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readwrite:tensor<?x?x4x4xf32>>{%3, %3} -> tensor<?x?x4x4xf32>
+  %pack = tensor.pack %src padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<?x?xf32> -> tensor<?x?x4x4xf32>
+  flow.dispatch.tensor.store %pack, %5, offsets = [0, 0, 0, 0], sizes = [%3, %3, 4, 4], strides = [1, 1, 1, 1] : tensor<?x?x4x4xf32> -> !flow.dispatch.tensor<readwrite:tensor<?x?x4x4xf32>>{%3, %3}
+  return
+}
+// CHECK-LABEL: func.func @dynamic_pack
+// CHECK:         tensor.pack
+
+// -----
+
+#pipeline_layout = #hal.pipeline.layout<constants = 2, bindings = [
+  #hal.pipeline.binding<storage_buffer>,
+  #hal.pipeline.binding<storage_buffer>
+]>
+func.func @dynamic_unpack() {
+  %c0 = arith.constant 0 : index
+  %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32
+  %1 = hal.interface.constant.load layout(#pipeline_layout) ordinal(1) : i32
+  %2 = arith.index_castui %0 : i32 to index
+  %3 = arith.index_castui %1 : i32 to index
+  %4 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor<readonly:tensor<?x?x4x4xf32>>{%2, %2}
+  %5 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%3, %3}
+  %src = flow.dispatch.tensor.load %4, offsets = [0, 0, 0, 0], sizes = [%2, %2, 4, 4], strides = [1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<?x?x4x4xf32>>{%2, %2} -> tensor<?x?x4x4xf32>
+  %dest = flow.dispatch.tensor.load %5, offsets = [0, 0], sizes = [%3, %3], strides = [1, 1] : !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%3, %3} -> tensor<?x?xf32>
+  %unpack = tensor.unpack %src inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %dest : tensor<?x?x4x4xf32> -> tensor<?x?xf32>
+  flow.dispatch.tensor.store %unpack, %5, offsets = [0, 0], sizes = [%3, %3], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<readwrite:tensor<?x?xf32>>{%3, %3}
+  return
+}
+// CHECK-LABEL: func.func @dynamic_unpack
+// CHECK:         tensor.unpack
+
+// -----
+
 #pipeline_layout = #hal.pipeline.layout<bindings = [
   #hal.pipeline.binding<storage_buffer>,
   #hal.pipeline.binding<storage_buffer>