[LinalgExt][NFC] Split tiling tests into tiling and distribution tests. (#15903)
It is a step towards retiring LinalgExt tiling patterns. The upstream
methods models distribution through scf.forall ops; it models tiling
through scf.for ops. The break allows us to track them separately.
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/distribution.mlir b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/distribution.mlir
new file mode 100644
index 0000000..6203e18
--- /dev/null
+++ b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/distribution.mlir
@@ -0,0 +1,158 @@
+// RUN: iree-dialects-opt --iree-linalg-ext-tile --split-input-file -cse %s | FileCheck %s
+
+func.func @scatter_tiling_distribution(
+ %original: tensor<?x?xf32>, %indices: tensor<?x1xi32>,
+ %update : tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %0 = iree_linalg_ext.scatter
+ {__internal_linalg_transform__ = "distribute_input"}
+ dimension_map = [0]
+ unique_indices(true)
+ ins(%update, %indices : tensor<?x?xf32>, tensor<?x1xi32>)
+ outs(%original : tensor<?x?xf32>) {
+ ^bb0(%arg1: f32, %arg2: f32):
+ %1 = arith.addf %arg1, %arg2 : f32
+ iree_linalg_ext.yield %1 : f32
+ } -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 * 10)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)>
+// CHECK: func.func @scatter_tiling_distribution(
+// CHECK-SAME: %[[ORIGINAL:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[INDICES:[a-zA-Z0-9_]+]]: tensor<?x1xi32>
+// CHECK-SAME: %[[UPDATES:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
+// CHECK-DAG: %[[TILESIZE:.+]] = arith.constant 10 : index
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[UPDATES]], %[[C0]]
+// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[UPDATES]], %[[C1]]
+// CHECK-DAG: %[[ID:.+]] = iree_input.dispatch.workgroup.id[0]
+// CHECK-DAG: %[[COUNT:.+]] = iree_input.dispatch.workgroup.count[0]
+// CHECK-DAG: %[[OFFSET:.+]] = affine.apply #[[MAP0]]()[%[[ID]]]
+// CHECK-DAG: %[[STEP:.+]] = affine.apply #[[MAP0]]()[%[[COUNT]]]
+// CHECK: %[[RESULT:.+]] = scf.for %[[IV:.+]] = %[[OFFSET]] to %[[D0]] step %[[STEP]]
+// CHECK-SAME: iter_args(%[[INIT:.+]] = %[[ORIGINAL]])
+// CHECK: %[[USED_TILESIZE:.+]] = affine.min #[[MAP1]](%[[IV]])[%[[TILESIZE]], %[[D0]]]
+// CHECK: %[[UPDATE_SLICE:.+]] = tensor.extract_slice %[[UPDATES]][%[[IV]], 0]
+// CHECK-SAME: [%[[USED_TILESIZE]], %[[D1]]]
+// CHECK: %[[INDEX_SLICE:.+]] = tensor.extract_slice %[[INDICES]][%[[IV]], 0]
+// CHECK-SAME: [%[[USED_TILESIZE]], 1]
+// CHECK: %[[D2:.+]] = tensor.dim %[[ORIGINAL]], %[[C0]]
+// CHECK: %[[ORIGINAL_SLICE:.+]] = tensor.extract_slice %[[ORIGINAL]][0, 0]
+// CHECK-SAME: [%[[D2]], %[[D1]]]
+// CHECK: %[[SCATTER_TILE:.+]] = iree_linalg_ext.scatter
+// CHECK-SAME: __internal_linalg_transform__ = "distribute_output"
+// CHECK-SAME: unique_indices(true)
+// CHECK-SAME: ins(%[[UPDATE_SLICE]], %[[INDEX_SLICE]]
+// CHECK-SAME: outs(%[[ORIGINAL_SLICE]]
+// CHECK: %[[YIELD:.+]] = tensor.insert_slice %[[SCATTER_TILE]] into %[[INIT]][0, 0]
+// CHECK-SAME: [%[[D2]], %[[D1]]]
+// CHECK: return %[[RESULT]]
+
+// -----
+
+func.func @sort_3d_multi_result_distribute(
+ %arg0: tensor<?x?x?xi32>, %arg1 : tensor<?x?x?xf32>)
+ -> (tensor<?x?x?xi32>, tensor<?x?x?xf32>) {
+ %0, %1 = iree_linalg_ext.sort
+ {__internal_linalg_transform__ = "distribute_input"}
+ dimension(1)
+ outs(%arg0, %arg1 : tensor<?x?x?xi32>, tensor<?x?x?xf32>) {
+ ^bb0(%arg2: i32, %arg3: i32, %arg4 : f32, %arg5 : f32): // no predecessors
+ %2 = arith.cmpf ogt, %arg4, %arg5 : f32
+ iree_linalg_ext.yield %2 : i1
+ } -> tensor<?x?x?xi32>, tensor<?x?x?xf32>
+ return %0, %1 : tensor<?x?x?xi32>, tensor<?x?x?xf32>
+}
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 * 10)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)>
+// CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0] -> (s0 * 30)>
+// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0, s1] -> (30, -d0 + s1)>
+// CHECK: func.func @sort_3d_multi_result_distribute(
+// CHECK-SAME: %[[OPERAND1:[a-zA-Z0-9_]+]]: tensor<?x?x?xi32>
+// CHECK-SAME: %[[OPERAND2:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
+// CHECK-DAG: %[[TILESIZE1:.+]] = arith.constant 10 : index
+// CHECK-DAG: %[[TILESIZE2:.+]] = arith.constant 30 : index
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
+// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
+// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[OPERAND1]], %[[C0]]
+// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[OPERAND1]], %[[C1]]
+// CHECK-DAG: %[[D2:.+]] = tensor.dim %[[OPERAND1]], %[[C2]]
+// CHECK-DAG: %[[IDX:.+]] = iree_input.dispatch.workgroup.id[0]
+// CHECK-DAG: %[[COUNTX:.+]] = iree_input.dispatch.workgroup.count[0]
+// CHECK-DAG: %[[IDY:.+]] = iree_input.dispatch.workgroup.id[1]
+// CHECK-DAG: %[[COUNTY:.+]] = iree_input.dispatch.workgroup.count[1]
+// CHECK-DAG: %[[OFFSETY:.+]] = affine.apply #[[MAP0]]()[%[[IDY]]]
+// CHECK-DAG: %[[STEPY:.+]] = affine.apply #[[MAP0]]()[%[[COUNTY]]]
+// CHECK: %[[RESULT:.+]]:2 = scf.for %[[IV0:.+]] = %[[OFFSETY]] to %[[D0]] step %[[STEPY]]
+// CHECK-SAME: iter_args(%[[INIT1:.+]] = %[[OPERAND1]], %[[INIT2:.+]] = %[[OPERAND2]])
+// CHECK-DAG: %[[USED_TILESIZE1:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[TILESIZE1]], %[[D0]]]
+// CHECK-DAG: %[[OFFSETX:.+]] = affine.apply #[[MAP2]]()[%[[IDX]]]
+// CHECK-DAG: %[[STEPX:.+]] = affine.apply #[[MAP2]]()[%[[COUNTX]]]
+// CHECK: %[[RESULT_INNER:.+]]:2 = scf.for %[[IV1:.+]] = %[[OFFSETX]] to %[[D2]] step %[[STEPX]]
+// CHECK-SAME: iter_args(%[[INIT3:.+]] = %[[INIT1]], %[[INIT4:.+]] = %[[INIT2]])
+// CHECK-DAG: %[[USED_TILESIZE2:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[TILESIZE2]], %[[D2]]]
+// CHECK: %[[OPERAND1_SLICE:.+]] = tensor.extract_slice %[[OPERAND1]][%[[IV0]], 0, %[[IV1]]]
+// CHECK-SAME: [%[[USED_TILESIZE1]], %[[D1]], %[[USED_TILESIZE2]]]
+// CHECK: %[[OPERAND2_SLICE:.+]] = tensor.extract_slice %[[OPERAND2]][%[[IV0]], 0, %[[IV1]]]
+// CHECK-SAME: [%[[USED_TILESIZE1]], %[[D1]], %[[USED_TILESIZE2]]]
+// CHECK: %[[SORT_SLICE:.+]]:2 = iree_linalg_ext.sort
+// CHECK-SAME: __internal_linalg_transform__ = "distribute_output"
+// CHECK-SAME: outs(%[[OPERAND1_SLICE]], %[[OPERAND2_SLICE]]
+// CHECK: %[[YIELD1:.+]] = tensor.insert_slice %[[SORT_SLICE]]#0
+// CHECK-SAME: into %[[INIT3]][%[[IV0]], 0, %[[IV1]]]
+// CHECK: %[[YIELD2:.+]] = tensor.insert_slice %[[SORT_SLICE]]#1
+// CHECK-SAME: into %[[INIT4]][%[[IV0]], 0, %[[IV1]]]
+// CHECK: scf.yield %[[YIELD1]], %[[YIELD2]]
+// CHECK: scf.yield %[[RESULT_INNER]]#0, %[[RESULT_INNER]]#1
+// CHECK: return %[[RESULT]]#0, %[[RESULT]]#1
+
+// -----
+
+func.func @sort_3d_multi_result_distribute_memref(
+ %arg0: memref<?x?x?xi32>, %arg1 : memref<?x?x?xf32>) {
+ iree_linalg_ext.sort
+ {__internal_linalg_transform__ = "distribute_input"}
+ dimension(1)
+ outs(%arg0, %arg1 : memref<?x?x?xi32>, memref<?x?x?xf32>) {
+ ^bb0(%arg2: i32, %arg3: i32, %arg4 : f32, %arg5 : f32): // no predecessors
+ %0 = arith.cmpf ogt, %arg4, %arg5 : f32
+ iree_linalg_ext.yield %0 : i1
+ }
+ return
+}
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 * 10)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)>
+// CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0] -> (s0 * 30)>
+// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0, s1] -> (30, -d0 + s1)>
+// CHECK: func.func @sort_3d_multi_result_distribute_memref(
+// CHECK-SAME: %[[OPERAND1:[a-zA-Z0-9_]+]]: memref<?x?x?xi32>
+// CHECK-SAME: %[[OPERAND2:[a-zA-Z0-9_]+]]: memref<?x?x?xf32>
+// CHECK-DAG: %[[TILESIZE1:.+]] = arith.constant 10 : index
+// CHECK-DAG: %[[TILESIZE2:.+]] = arith.constant 30 : index
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
+// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
+// CHECK-DAG: %[[D0:.+]] = memref.dim %[[OPERAND1]], %[[C0]]
+// CHECK-DAG: %[[D1:.+]] = memref.dim %[[OPERAND1]], %[[C1]]
+// CHECK-DAG: %[[D2:.+]] = memref.dim %[[OPERAND1]], %[[C2]]
+// CHECK-DAG: %[[IDX:.+]] = iree_input.dispatch.workgroup.id[0]
+// CHECK-DAG: %[[COUNTX:.+]] = iree_input.dispatch.workgroup.count[0]
+// CHECK-DAG: %[[IDY:.+]] = iree_input.dispatch.workgroup.id[1]
+// CHECK-DAG: %[[COUNTY:.+]] = iree_input.dispatch.workgroup.count[1]
+// CHECK-DAG: %[[OFFSETY:.+]] = affine.apply #[[MAP0]]()[%[[IDY]]]
+// CHECK-DAG: %[[STEPY:.+]] = affine.apply #[[MAP0]]()[%[[COUNTY]]]
+// CHECK: scf.for %[[IV0:.+]] = %[[OFFSETY]] to %[[D0]] step %[[STEPY]]
+// CHECK-DAG: %[[USED_TILESIZE1:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[TILESIZE1]], %[[D0]]]
+// CHECK-DAG: %[[OFFSETX:.+]] = affine.apply #[[MAP2]]()[%[[IDX]]]
+// CHECK-DAG: %[[STEPX:.+]] = affine.apply #[[MAP2]]()[%[[COUNTX]]]
+// CHECK: scf.for %[[IV1:.+]] = %[[OFFSETX]] to %[[D2]] step %[[STEPX]]
+// CHECK-DAG: %[[USED_TILESIZE2:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[TILESIZE2]], %[[D2]]]
+// CHECK: %[[OPERAND1_SLICE:.+]] = memref.subview %[[OPERAND1]][%[[IV0]], 0, %[[IV1]]]
+// CHECK-SAME: [%[[USED_TILESIZE1]], %[[D1]], %[[USED_TILESIZE2]]]
+// CHECK: %[[OPERAND2_SLICE:.+]] = memref.subview %[[OPERAND2]][%[[IV0]], 0, %[[IV1]]]
+// CHECK-SAME: [%[[USED_TILESIZE1]], %[[D1]], %[[USED_TILESIZE2]]]
+// CHECK: iree_linalg_ext.sort
+// CHECK-SAME: __internal_linalg_transform__ = "distribute_output"
+// CHECK-SAME: outs(%[[OPERAND1_SLICE]], %[[OPERAND2_SLICE]]
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/tiling.mlir b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/tiling.mlir
index f86e5a3..0803b0c 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/tiling.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/tiling.mlir
@@ -99,57 +99,6 @@
// -----
-func.func @scatter_tiling_distribution(
- %original: tensor<?x?xf32>, %indices: tensor<?x1xi32>,
- %update : tensor<?x?xf32>) -> tensor<?x?xf32> {
- %0 = iree_linalg_ext.scatter
- {__internal_linalg_transform__ = "distribute_input"}
- dimension_map = [0]
- unique_indices(true)
- ins(%update, %indices : tensor<?x?xf32>, tensor<?x1xi32>)
- outs(%original : tensor<?x?xf32>) {
- ^bb0(%arg1: f32, %arg2: f32):
- %1 = arith.addf %arg1, %arg2 : f32
- iree_linalg_ext.yield %1 : f32
- } -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
-}
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 * 10)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)>
-// CHECK: func.func @scatter_tiling_distribution(
-// CHECK-SAME: %[[ORIGINAL:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
-// CHECK-SAME: %[[INDICES:[a-zA-Z0-9_]+]]: tensor<?x1xi32>
-// CHECK-SAME: %[[UPDATES:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
-// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
-// CHECK-DAG: %[[TILESIZE:.+]] = arith.constant 10 : index
-// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
-// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[UPDATES]], %[[C0]]
-// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[UPDATES]], %[[C1]]
-// CHECK-DAG: %[[ID:.+]] = iree_input.dispatch.workgroup.id[0]
-// CHECK-DAG: %[[COUNT:.+]] = iree_input.dispatch.workgroup.count[0]
-// CHECK-DAG: %[[OFFSET:.+]] = affine.apply #[[MAP0]]()[%[[ID]]]
-// CHECK-DAG: %[[STEP:.+]] = affine.apply #[[MAP0]]()[%[[COUNT]]]
-// CHECK: %[[RESULT:.+]] = scf.for %[[IV:.+]] = %[[OFFSET]] to %[[D0]] step %[[STEP]]
-// CHECK-SAME: iter_args(%[[INIT:.+]] = %[[ORIGINAL]])
-// CHECK: %[[USED_TILESIZE:.+]] = affine.min #[[MAP1]](%[[IV]])[%[[TILESIZE]], %[[D0]]]
-// CHECK: %[[UPDATE_SLICE:.+]] = tensor.extract_slice %[[UPDATES]][%[[IV]], 0]
-// CHECK-SAME: [%[[USED_TILESIZE]], %[[D1]]]
-// CHECK: %[[INDEX_SLICE:.+]] = tensor.extract_slice %[[INDICES]][%[[IV]], 0]
-// CHECK-SAME: [%[[USED_TILESIZE]], 1]
-// CHECK: %[[D2:.+]] = tensor.dim %[[ORIGINAL]], %[[C0]]
-// CHECK: %[[ORIGINAL_SLICE:.+]] = tensor.extract_slice %[[ORIGINAL]][0, 0]
-// CHECK-SAME: [%[[D2]], %[[D1]]]
-// CHECK: %[[SCATTER_TILE:.+]] = iree_linalg_ext.scatter
-// CHECK-SAME: __internal_linalg_transform__ = "distribute_output"
-// CHECK-SAME: unique_indices(true)
-// CHECK-SAME: ins(%[[UPDATE_SLICE]], %[[INDEX_SLICE]]
-// CHECK-SAME: outs(%[[ORIGINAL_SLICE]]
-// CHECK: %[[YIELD:.+]] = tensor.insert_slice %[[SCATTER_TILE]] into %[[INIT]][0, 0]
-// CHECK-SAME: [%[[D2]], %[[D1]]]
-// CHECK: return %[[RESULT]]
-
-// -----
-
func.func @scatter_no_tiling(
%original: tensor<?x?xf32>, %indices: tensor<?x1xi32>,
%update : tensor<?x?xf32>) -> tensor<?x?xf32> {
@@ -407,114 +356,6 @@
// -----
-func.func @sort_3d_multi_result_distribute(
- %arg0: tensor<?x?x?xi32>, %arg1 : tensor<?x?x?xf32>)
- -> (tensor<?x?x?xi32>, tensor<?x?x?xf32>) {
- %0, %1 = iree_linalg_ext.sort
- {__internal_linalg_transform__ = "distribute_input"}
- dimension(1)
- outs(%arg0, %arg1 : tensor<?x?x?xi32>, tensor<?x?x?xf32>) {
- ^bb0(%arg2: i32, %arg3: i32, %arg4 : f32, %arg5 : f32): // no predecessors
- %2 = arith.cmpf ogt, %arg4, %arg5 : f32
- iree_linalg_ext.yield %2 : i1
- } -> tensor<?x?x?xi32>, tensor<?x?x?xf32>
- return %0, %1 : tensor<?x?x?xi32>, tensor<?x?x?xf32>
-}
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 * 10)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0] -> (s0 * 30)>
-// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0, s1] -> (30, -d0 + s1)>
-// CHECK: func.func @sort_3d_multi_result_distribute(
-// CHECK-SAME: %[[OPERAND1:[a-zA-Z0-9_]+]]: tensor<?x?x?xi32>
-// CHECK-SAME: %[[OPERAND2:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-// CHECK-DAG: %[[TILESIZE1:.+]] = arith.constant 10 : index
-// CHECK-DAG: %[[TILESIZE2:.+]] = arith.constant 30 : index
-// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
-// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
-// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
-// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[OPERAND1]], %[[C0]]
-// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[OPERAND1]], %[[C1]]
-// CHECK-DAG: %[[D2:.+]] = tensor.dim %[[OPERAND1]], %[[C2]]
-// CHECK-DAG: %[[IDX:.+]] = iree_input.dispatch.workgroup.id[0]
-// CHECK-DAG: %[[COUNTX:.+]] = iree_input.dispatch.workgroup.count[0]
-// CHECK-DAG: %[[IDY:.+]] = iree_input.dispatch.workgroup.id[1]
-// CHECK-DAG: %[[COUNTY:.+]] = iree_input.dispatch.workgroup.count[1]
-// CHECK-DAG: %[[OFFSETY:.+]] = affine.apply #[[MAP0]]()[%[[IDY]]]
-// CHECK-DAG: %[[STEPY:.+]] = affine.apply #[[MAP0]]()[%[[COUNTY]]]
-// CHECK: %[[RESULT:.+]]:2 = scf.for %[[IV0:.+]] = %[[OFFSETY]] to %[[D0]] step %[[STEPY]]
-// CHECK-SAME: iter_args(%[[INIT1:.+]] = %[[OPERAND1]], %[[INIT2:.+]] = %[[OPERAND2]])
-// CHECK-DAG: %[[USED_TILESIZE1:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[TILESIZE1]], %[[D0]]]
-// CHECK-DAG: %[[OFFSETX:.+]] = affine.apply #[[MAP2]]()[%[[IDX]]]
-// CHECK-DAG: %[[STEPX:.+]] = affine.apply #[[MAP2]]()[%[[COUNTX]]]
-// CHECK: %[[RESULT_INNER:.+]]:2 = scf.for %[[IV1:.+]] = %[[OFFSETX]] to %[[D2]] step %[[STEPX]]
-// CHECK-SAME: iter_args(%[[INIT3:.+]] = %[[INIT1]], %[[INIT4:.+]] = %[[INIT2]])
-// CHECK-DAG: %[[USED_TILESIZE2:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[TILESIZE2]], %[[D2]]]
-// CHECK: %[[OPERAND1_SLICE:.+]] = tensor.extract_slice %[[OPERAND1]][%[[IV0]], 0, %[[IV1]]]
-// CHECK-SAME: [%[[USED_TILESIZE1]], %[[D1]], %[[USED_TILESIZE2]]]
-// CHECK: %[[OPERAND2_SLICE:.+]] = tensor.extract_slice %[[OPERAND2]][%[[IV0]], 0, %[[IV1]]]
-// CHECK-SAME: [%[[USED_TILESIZE1]], %[[D1]], %[[USED_TILESIZE2]]]
-// CHECK: %[[SORT_SLICE:.+]]:2 = iree_linalg_ext.sort
-// CHECK-SAME: __internal_linalg_transform__ = "distribute_output"
-// CHECK-SAME: outs(%[[OPERAND1_SLICE]], %[[OPERAND2_SLICE]]
-// CHECK: %[[YIELD1:.+]] = tensor.insert_slice %[[SORT_SLICE]]#0
-// CHECK-SAME: into %[[INIT3]][%[[IV0]], 0, %[[IV1]]]
-// CHECK: %[[YIELD2:.+]] = tensor.insert_slice %[[SORT_SLICE]]#1
-// CHECK-SAME: into %[[INIT4]][%[[IV0]], 0, %[[IV1]]]
-// CHECK: scf.yield %[[YIELD1]], %[[YIELD2]]
-// CHECK: scf.yield %[[RESULT_INNER]]#0, %[[RESULT_INNER]]#1
-// CHECK: return %[[RESULT]]#0, %[[RESULT]]#1
-
-// -----
-
-func.func @sort_3d_multi_result_distribute_memref(
- %arg0: memref<?x?x?xi32>, %arg1 : memref<?x?x?xf32>) {
- iree_linalg_ext.sort
- {__internal_linalg_transform__ = "distribute_input"}
- dimension(1)
- outs(%arg0, %arg1 : memref<?x?x?xi32>, memref<?x?x?xf32>) {
- ^bb0(%arg2: i32, %arg3: i32, %arg4 : f32, %arg5 : f32): // no predecessors
- %0 = arith.cmpf ogt, %arg4, %arg5 : f32
- iree_linalg_ext.yield %0 : i1
- }
- return
-}
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 * 10)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0] -> (s0 * 30)>
-// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0, s1] -> (30, -d0 + s1)>
-// CHECK: func.func @sort_3d_multi_result_distribute_memref(
-// CHECK-SAME: %[[OPERAND1:[a-zA-Z0-9_]+]]: memref<?x?x?xi32>
-// CHECK-SAME: %[[OPERAND2:[a-zA-Z0-9_]+]]: memref<?x?x?xf32>
-// CHECK-DAG: %[[TILESIZE1:.+]] = arith.constant 10 : index
-// CHECK-DAG: %[[TILESIZE2:.+]] = arith.constant 30 : index
-// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
-// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
-// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
-// CHECK-DAG: %[[D0:.+]] = memref.dim %[[OPERAND1]], %[[C0]]
-// CHECK-DAG: %[[D1:.+]] = memref.dim %[[OPERAND1]], %[[C1]]
-// CHECK-DAG: %[[D2:.+]] = memref.dim %[[OPERAND1]], %[[C2]]
-// CHECK-DAG: %[[IDX:.+]] = iree_input.dispatch.workgroup.id[0]
-// CHECK-DAG: %[[COUNTX:.+]] = iree_input.dispatch.workgroup.count[0]
-// CHECK-DAG: %[[IDY:.+]] = iree_input.dispatch.workgroup.id[1]
-// CHECK-DAG: %[[COUNTY:.+]] = iree_input.dispatch.workgroup.count[1]
-// CHECK-DAG: %[[OFFSETY:.+]] = affine.apply #[[MAP0]]()[%[[IDY]]]
-// CHECK-DAG: %[[STEPY:.+]] = affine.apply #[[MAP0]]()[%[[COUNTY]]]
-// CHECK: scf.for %[[IV0:.+]] = %[[OFFSETY]] to %[[D0]] step %[[STEPY]]
-// CHECK-DAG: %[[USED_TILESIZE1:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[TILESIZE1]], %[[D0]]]
-// CHECK-DAG: %[[OFFSETX:.+]] = affine.apply #[[MAP2]]()[%[[IDX]]]
-// CHECK-DAG: %[[STEPX:.+]] = affine.apply #[[MAP2]]()[%[[COUNTX]]]
-// CHECK: scf.for %[[IV1:.+]] = %[[OFFSETX]] to %[[D2]] step %[[STEPX]]
-// CHECK-DAG: %[[USED_TILESIZE2:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[TILESIZE2]], %[[D2]]]
-// CHECK: %[[OPERAND1_SLICE:.+]] = memref.subview %[[OPERAND1]][%[[IV0]], 0, %[[IV1]]]
-// CHECK-SAME: [%[[USED_TILESIZE1]], %[[D1]], %[[USED_TILESIZE2]]]
-// CHECK: %[[OPERAND2_SLICE:.+]] = memref.subview %[[OPERAND2]][%[[IV0]], 0, %[[IV1]]]
-// CHECK-SAME: [%[[USED_TILESIZE1]], %[[D1]], %[[USED_TILESIZE2]]]
-// CHECK: iree_linalg_ext.sort
-// CHECK-SAME: __internal_linalg_transform__ = "distribute_output"
-// CHECK-SAME: outs(%[[OPERAND1_SLICE]], %[[OPERAND2_SLICE]]
-
-// -----
-
func.func @fft_1d_stage_5(%arg0: tensor<1024xf32>, %arg1: tensor<1024xf32>,
%arg2: tensor<16xf32>, %arg3: tensor<16xf32>) -> (tensor<1024xf32>, tensor<1024xf32>) {
%cst1 = arith.constant 5 : index