Move Codegen pass pipelines to nest on `FunctionOpInterface`. (#16665) This PR modifies the codegen backends to have the lowering pass pipelines nest on `FunctionOpInterface`. This allows running different pass pipelines on functions within the dispatch. This would allow you to have things like ``` func.func @foo_pipeline(...) { } func.func @bar_pipeline(...) { } func.func @entry_point() { if (<condnn for foo pipeline based lowering>) { foo_pipeline() } else { bar_pipeline() } } ``` To connect everything the following things are done 1) The `iree_codegen.translation_info` attribute that was set on entry point operations are now set on the surrounding function. This allows implementing a lowering strategy on a function. 2) The GPU backends set the `workgroup_size` and `subgroup_size` on the `hal.executable.export` operation. To unwind this, the `translation_info` has fields for `workgroup_size` and `subgroup_size`. This allows GPU backends to set the expected `workgroup_size` and `subgroup_size` on the `translation_info` itself (which is now on the surrounding function). 3) A pass is added after lower strategies to `ReconcileTranslationInfo`. The intent of this pass is to take the `translation_info` on each function and set the values for `workgroup_size` and `subgroup_size` on the `hal.executable.export`. Eventually this would also be a place where the number of workgroups is populated on the `hal.executable.export` (instead of doing it on `TileAndDistributeToWorkgroups` as it is done today). 4) All backends `*SelectLoweringStrategy` work as Module pass. These need to be Module passes since transform dialect tends to inject the transform script within the module. 5) The `*LowerExecutableStrategy` works at `FunctionOpInterface` now. 6) The transform dialect interpreter has to run on `Module` granularity, so a new pass `LowerExecutableUsingTransformDialect` is added. This runs the transform interpreter before `*SelectLoweringStrategy`. After this pass is run, the `translation_info` is expected to have the pipeline be set to `None` to skip subsequent lowering pipelines. 7) Most tests are now moved to remove the boiler plates surrounding `hal.executable` and `hal.executable.variant`. This does most of the heavy lifting for running lowering strategies per function-like op. The biggest missing piece are 1) The `TileAndDistributeOnWorkgroups` ops still cannot really be run on a dispatch with multiple functions since it updates the `hal.executable.export`. To address this, the pass will have to move to use `scf.forall`. 2) Some optimizations expect static workgroup count. Those currently go upto the `hal.executable.export` op to get these values (that were populated by `TileAndDistributeToWorkgroups`). When moving to `scf.forall` this will be available withint the function. ci-extra: build_test_all_arm64, build_test_all_windows, build_test_all_maxos_arm64, build_test_all_macos_x86_64, test_nvidia_a100

commit: 954cb36d8201638823a08d0c65eb35325758c730 [log] [tgz]
author: MaheshRavishankar <1663364+MaheshRavishankar@users.noreply.github.com> Fri Apr 12 14:00:49 2024 -0700
committer: GitHub <noreply@github.com> Fri Apr 12 14:00:49 2024 -0700
tree: d8c4316b8c547a371b217c25c15fc54a42d5854b
parent: 699d244b7b82e26cb41e33bbb45a4237f3918a84 [diff]
diff --git a/samples/transform_dialect/example_module.mlir b/samples/transform_dialect/example_module.mlir
index 9ea0f78..13128e1 100644
--- a/samples/transform_dialect/example_module.mlir
+++ b/samples/transform_dialect/example_module.mlir

@@ -125,7 +125,7 @@
 // CODEGEN-PRINTER:     IR printer: Setting matmul strategy to custom_transform_strategy
 // CODEGEN-PRINTER:       translation_info = #iree_codegen.translation_info<TransformDialectCodegen codegen_spec = @custom_transform_strategy>
 // CODEGEN-PRINTER:     IR printer: Setting reduce strategy to base vectorize top-level
-// CODEGEN-PRINTER:       translation_info = #iree_codegen.translation_info<SPIRVBaseVectorize>, workgroup_size = [16 : index, 1 : index, 1 : index]
+// CODEGEN-PRINTER:       translation_info = #iree_codegen.translation_info<SPIRVBaseVectorize workgroup_size = [16, 1, 1]>
 
 /// Then test with threading to make sure it runs
 // RUN: iree-compile %s --iree-hal-target-backends=vulkan \
@@ -135,9 +135,6 @@
 // RUN:   --mlir-disable-threading | \
 // RUN: FileCheck %s --check-prefixes=CODEGEN
 
-// CODEGEN: Ran custom_transform_strategy
 // CODEGEN: spirv.func @example_module_dispatch_0_generic_80_f32
-// CODEGEN: hal.executable private @example_module_dispatch_1
-// CODEGEN:   #iree_codegen.translation_info<TransformDialectCodegen codegen_spec = @custom_transform_strategy>
-// CODEGEN:     spirv.func @example_module_dispatch_1_matmul_16x16x5_f32
+// CODEGEN: spirv.func @example_module_dispatch_1_matmul_16x16x5_f32
 // CODEGEN: spirv.func @example_module_dispatch_2_generic_16x16_f32

diff --git a/samples/transform_dialect/transform_library.mlir b/samples/transform_dialect/transform_library.mlir
index 8b17af7..21349fa 100644
--- a/samples/transform_dialect/transform_library.mlir
+++ b/samples/transform_dialect/transform_library.mlir

@@ -3,7 +3,7 @@
   // the name of this strategy down below before strategy selection, overriding
   // default IREE codegen.
   transform.named_sequence @custom_transform_strategy(
-      %variant_op: !transform.any_op {transform.consumed}) {
+      %variant_op: !transform.any_op) {
     // Step 1. Re-match the matmul
     // ===========================================================================
     %matmul = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!transform.any_op) -> !transform.any_op
@@ -40,19 +40,18 @@
     transform.apply_patterns to %func_1 {
       transform.apply_patterns.linalg.erase_unnecessary_inputs
     } : !transform.any_op
-    %variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op : (!transform.any_op) -> (!transform.any_op)
-    %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
+    %memref_func = transform.iree.bufferize { target_gpu } %func_1 : (!transform.any_op) -> (!transform.any_op)
 
     // Step 6. Post-bufferization vector distribution
     // ===========================================================================
-    %func_7 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
+    %func_7 = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
     transform.iree.forall_to_workgroup %func_7 : (!transform.any_op) -> ()
     transform.iree.map_nested_forall_to_gpu_threads %func_7
         workgroup_dims = [4, 8, 1] : (!transform.any_op) -> ()
 
     // Step 7. Do layout analysis and lower to mma
     // ===========================================================================
-    %func_10 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
+    %func_10 = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
     %func_11 = transform.iree.layout_analysis_and_distribution %func_10 : (!transform.any_op) -> (!transform.any_op)
     transform.print {name = "Ran custom_transform_strategy"}
     transform.yield
@@ -61,10 +60,10 @@
   // Send it down a custom transform dialect pipeline.
   transform.named_sequence @custom_matmul(%matmul: !transform.any_op {transform.readonly}) {
     %variant_op = transform.get_parent_op %matmul {op_name = "hal.executable.variant"} : (!transform.any_op) -> !transform.any_op
-    %exports = transform.structured.match ops{["hal.executable.export"]} in %variant_op : (!transform.any_op) -> !transform.any_op
+    %funcs = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
     %subgroup_reduce = transform.param.constant #iree_codegen.translation_info<TransformDialectCodegen
                                                                                codegen_spec = @custom_transform_strategy> -> !transform.any_param
-    transform.annotate %exports "translation_info" = %subgroup_reduce : !transform.any_op, !transform.any_param
+    transform.annotate %funcs "translation_info" = %subgroup_reduce : !transform.any_op, !transform.any_param
     transform.print {name = "Setting matmul strategy to custom_transform_strategy"}
     transform.yield
   }
@@ -74,11 +73,9 @@
     %variant_op = transform.get_parent_op %reduce {op_name = "hal.executable.variant"} : (!transform.any_op) -> !transform.any_op
     %lowering_config = transform.param.constant #iree_codegen.lowering_config<tile_sizes = [[8, 0], [1, 0], [0, 0, 4]]> -> !transform.any_param
     transform.annotate %reduce "lowering_config" = %lowering_config : !transform.any_op, !transform.any_param
-    %exports = transform.structured.match ops{["hal.executable.export"]} in %variant_op : (!transform.any_op) -> !transform.any_op
-    %subgroup_reduce = transform.param.constant #iree_codegen.translation_info<SPIRVBaseVectorize> -> !transform.any_param
-    %workgroup_size = transform.param.constant [16 : index, 1 : index, 1 : index] -> !transform.any_param
-    transform.annotate %exports "translation_info" = %subgroup_reduce : !transform.any_op, !transform.any_param
-    transform.annotate %exports "workgroup_size" = %workgroup_size : !transform.any_op, !transform.any_param
+    %funcs = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
+    %subgroup_reduce = transform.param.constant #iree_codegen.translation_info<SPIRVBaseVectorize workgroup_size = [16, 1, 1]> -> !transform.any_param
+    transform.annotate %funcs "translation_info" = %subgroup_reduce : !transform.any_op, !transform.any_param
     transform.print {name = "Setting reduce strategy to base vectorize"}
     transform.yield
   }
commit	954cb36d8201638823a08d0c65eb35325758c730	[log] [tgz]
author	MaheshRavishankar <1663364+MaheshRavishankar@users.noreply.github.com>	Fri Apr 12 14:00:49 2024 -0700
committer	GitHub <noreply@github.com>	Fri Apr 12 14:00:49 2024 -0700
tree	d8c4316b8c547a371b217c25c15fc54a42d5854b
parent	699d244b7b82e26cb41e33bbb45a4237f3918a84 [diff]