Use better Linalg transform op builders (#11551) This PR cherry-picks the following LLVM commit: f27514800cc50677d640deae555bf999653a4c6f @hanhan for integrate.

commit: 628af7670c6c227cb54b3fbff04383da7ed97d29 [log] [tgz]
author: Nicolas Vasilache <nicolasvasilache@users.noreply.github.com> Wed Dec 14 17:05:55 2022 +0100
committer: GitHub <noreply@github.com> Wed Dec 14 17:05:55 2022 +0100
tree: 01fe1da5af3e2a755319636f8598783e24e63d8d
parent: e919b8ba4523a74211728388bbce6d5503325b3f [diff]
diff --git a/compiler/src/iree/compiler/Codegen/Common/TransformDialectStrategiesGPU.cpp b/compiler/src/iree/compiler/Codegen/Common/TransformDialectStrategiesGPU.cpp
index 50c1e32..ff5bb28 100644
--- a/compiler/src/iree/compiler/Codegen/Common/TransformDialectStrategiesGPU.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/TransformDialectStrategiesGPU.cpp

@@ -165,8 +165,7 @@
   // Split the reduction into a parallel and combiner part, then tile the
   // parallel part and map it to a full warp so it works on vectors.
   auto tileReduction = b.create<transform::TileReductionUsingScfOp>(
-      pdlOperation, pdlOperation, pdlOperation, pdlOperation, gridReductionH,
-      b.getI64ArrayAttr({0, firstReductionSize}));
+      gridReductionH, ArrayRef<int64_t>({0, firstReductionSize}));
   Value blockParallelFillH = tileReduction.getFillOp();
   Value blockParallelOpH = tileReduction.getSplitLinalgOp();
   Value blockCombinerOpH = tileReduction.getCombiningLinalgOp();

diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy.mlir
index e25a9d5..0f3ab31 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/set_transform_strategy.mlir

@@ -35,7 +35,7 @@
 //         CHECK:   transform.iree.tile_to_foreach_thread_and_workgroup_count_region {{.*}} tile_sizes [1](mapping = [#gpu.block<x>])
 // CHECK-COUNT-3:   transform.structured.fuse_into_containing_op
 //         CHECK:   transform.iree.take_first
-//         CHECK:   transform.structured.tile_reduction_using_scf %{{.*}} {tile_sizes = [0, 64]}
+//         CHECK:   transform.structured.tile_reduction_using_scf %{{.*}} by tile_sizes = [0, 64]
 //         CHECK:   transform.structured.tile_to_foreach_thread_op %{{.*}} num_threads [0, 32]
 //    CHECK-SAME:      (mapping = [#gpu.thread<x>])
 //         CHECK:   transform.structured.tile_to_foreach_thread_op %{{.*}} tile_sizes [0, 2](mapping = [#gpu.thread<x>])
@@ -95,7 +95,7 @@
 
 //   CHECK-LABEL: func.func @group_reduction_128
 //         CHECK:   transform.structured.canonicalized_sequence failures(propagate)
-//         CHECK:   transform.structured.tile_reduction_using_scf %{{.*}} {tile_sizes = [0, 128]}
+//         CHECK:   transform.structured.tile_reduction_using_scf %{{.*}} by tile_sizes = [0, 128]
 //         CHECK:   transform.structured.tile_to_foreach_thread_op %{{.*}} num_threads [0, 32]
 //    CHECK-SAME:      (mapping = [#gpu.thread<x>])
 //         CHECK:   transform.structured.tile_to_foreach_thread_op %{{.*}} tile_sizes [0, 4](mapping = [#gpu.thread<x>])
@@ -136,7 +136,7 @@
 
 //   CHECK-LABEL: func.func @group_reduction_32
 //         CHECK:   transform.structured.canonicalized_sequence failures(propagate)
-//         CHECK:   transform.structured.tile_reduction_using_scf %{{.*}} {tile_sizes = [0, 32]}
+//         CHECK:   transform.structured.tile_reduction_using_scf %{{.*}} by tile_sizes = [0, 32]
 //         CHECK:   transform.structured.tile_to_foreach_thread_op %{{.*}} num_threads [0, 32]
 //    CHECK-SAME:      (mapping = [#gpu.thread<x>])
 //         CHECK:   transform.structured.tile_to_foreach_thread_op %{{.*}} tile_sizes [0, 1](mapping = [#gpu.thread<x>])

diff --git a/tests/transform_dialect/cuda/reduction_v2_codegen_spec.mlir b/tests/transform_dialect/cuda/reduction_v2_codegen_spec.mlir
index eb0a481..5d171d0 100644
--- a/tests/transform_dialect/cuda/reduction_v2_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/reduction_v2_codegen_spec.mlir

@@ -15,7 +15,7 @@
   // Step 2. Split the reduction to get meatier parallelism.
   // ===========================================================================
   %foreach_thread, %block_more_parallel_fill_op_2, %block_more_parallel_op_2, %block_combiner_op_2 = 
-    transform.structured.tile_reduction_using_scf %grid_reduction { tile_sizes = [0, 128] }
+    transform.structured.tile_reduction_using_scf %grid_reduction by tile_sizes = [0, 128]
   %_1:2 =
     transform.structured.tile_to_foreach_thread_op %block_more_parallel_op_2 num_threads [0, 32] 
     ( mapping = [#gpu.thread<x>] )

diff --git a/tests/transform_dialect/cuda/reduction_v3_codegen_spec.mlir b/tests/transform_dialect/cuda/reduction_v3_codegen_spec.mlir
index 93107bb..31845ec 100644
--- a/tests/transform_dialect/cuda/reduction_v3_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/reduction_v3_codegen_spec.mlir

@@ -17,10 +17,12 @@
   // ===========================================================================
   %foreach_thread, %block_more_parallel_fill_op_2, %block_more_parallel_op_2, %block_combiner_op_2 = 
      transform.structured.tile_reduction_using_foreach_thread %grid_reduction 
-       { num_threads = [0, 1024], tile_sizes = [0, 1], mapping = [#gpu.thread<x>] }
+        by num_threads = [0, 1024], tile_sizes = [0, 1], mapping = [#gpu.thread<x>]
+
   // Fuse the fill and pointwise to privatize them. 
   transform.structured.fuse_into_containing_op %block_more_parallel_fill_op_2
     into %foreach_thread
+
   // block_combiner_op_2 op is [parallel, reduction] of 1x384 that cannot fuse.
   // map the 1-dim to threadIdx.y to trigger mapping of the reduction to 
   // threadIdx.x via predication via `if (x==0)`.

diff --git a/third_party/llvm-project b/third_party/llvm-project
index 1f2fc6b..f1757ad 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project

@@ -1 +1 @@
-Subproject commit 1f2fc6b557655caf802dac906768a5c8880a731c
+Subproject commit f1757ad433874b7c9f0cd755e90e221ddb5bde48
commit	628af7670c6c227cb54b3fbff04383da7ed97d29	[log] [tgz]
author	Nicolas Vasilache <nicolasvasilache@users.noreply.github.com>	Wed Dec 14 17:05:55 2022 +0100
committer	GitHub <noreply@github.com>	Wed Dec 14 17:05:55 2022 +0100
tree	01fe1da5af3e2a755319636f8598783e24e63d8d
parent	e919b8ba4523a74211728388bbce6d5503325b3f [diff]