samples/transform_dialect/transform_library.mlir - 3p/openxla/iree - Git at Google

 module attributes { transform.with_named_sequence } {
   // Example of a custom matmul strategy. The target matmul is annotated with
   // the name of this strategy down below before strategy selection, overriding
   // default IREE codegen.
   transform.named_sequence @custom_transform_strategy(
       %variant_op: !transform.any_op) {
     // Step 1. Re-match the matmul
     // ===========================================================================
     %matmul = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!transform.any_op) -> !transform.any_op

     // Step 2. Tile to grid
     // ===========================================================================
     %grid_reduction, %forall_grid =
     transform.structured.tile_using_forall %matmul tile_sizes [16, 16] ( mapping = [#gpu.block<x>, #gpu.block<y>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
     transform.iree.populate_workgroup_count_region_using_num_threads_slice %forall_grid : (!transform.any_op) -> ()

     // Step 3. Vectorize
     // ===========================================================================
     %func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
     transform.apply_patterns to %func {
       transform.apply_patterns.iree.fold_reshape_into_tensor_hal_interface
       transform.apply_patterns.linalg.fold_unit_extent_dims_via_slices
       transform.apply_patterns.vector.cast_away_vector_leading_one_dim
     } : !transform.any_op
     %func_1 = transform.structured.vectorize_children_and_apply_patterns %func : (!transform.any_op) -> !transform.any_op

     // Step 4. Bufferize
     // ===========================================================================
     transform.apply_patterns to %func_1 {
       transform.apply_patterns.iree.fold_fill_into_pad
       transform.apply_patterns.linalg.tiling_canonicalization
       transform.apply_patterns.scf.for_loop_canonicalization
     } : !transform.any_op
     transform.apply_patterns to %func_1 {
       transform.apply_patterns.tensor.reassociative_reshape_folding
       transform.apply_patterns.canonicalization
     } : !transform.any_op
     transform.apply_cse to %func_1 : !transform.any_op
     transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> ()
     transform.apply_patterns to %func_1 {
       transform.apply_patterns.linalg.erase_unnecessary_inputs
     } : !transform.any_op
     %memref_func = transform.iree.bufferize { target_gpu } %func_1 : (!transform.any_op) -> (!transform.any_op)

     // Step 6. Post-bufferization vector distribution
     // ===========================================================================
     %func_7 = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
     transform.iree.forall_to_workgroup %func_7 : (!transform.any_op) -> ()
     transform.iree.map_nested_forall_to_gpu_threads %func_7
         workgroup_dims = [4, 8, 1] : (!transform.any_op) -> ()

     transform.print {name = "Ran custom_transform_strategy"}
     transform.yield
   }

   // Send it down a custom transform dialect pipeline.
   transform.named_sequence @custom_matmul(%matmul: !transform.any_op {transform.readonly}) {
     %variant_op = transform.get_parent_op %matmul {op_name = "hal.executable.variant"} : (!transform.any_op) -> !transform.any_op
     %funcs = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
     %subgroup_reduce = transform.param.constant #iree_codegen.translation_info<pipeline = TransformDialectCodegen
                                                                                codegen_spec = @custom_transform_strategy> -> !transform.any_param
     transform.annotate %funcs "translation_info" = %subgroup_reduce : !transform.any_op, !transform.any_param
     transform.print {name = "Setting matmul strategy to custom_transform_strategy"}
     transform.yield
   }

   // Send it down subgroup reduce with a custom tiling configuration.
   transform.named_sequence @use_base_vectorize(%reduce: !transform.any_op {transform.readonly}) {
     %variant_op = transform.get_parent_op %reduce {op_name = "hal.executable.variant"} : (!transform.any_op) -> !transform.any_op
     %lowering_config = transform.param.constant #iree_codegen.lowering_config<tile_sizes = [[8, 0], [1, 0], [0, 0, 4]]> -> !transform.any_param
     transform.annotate %reduce "lowering_config" = %lowering_config : !transform.any_op, !transform.any_param
     %funcs = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
     %subgroup_reduce = transform.param.constant #iree_codegen.translation_info<pipeline = SPIRVBaseVectorize workgroup_size = [16, 1, 1]> -> !transform.any_param
     transform.annotate %funcs "translation_info" = %subgroup_reduce : !transform.any_op, !transform.any_param
     transform.print {name = "Setting reduce strategy to base vectorize"}
     transform.yield
   }

   //===------------------------------------------------------===
   // Matchers
   //===------------------------------------------------------===
   transform.named_sequence @match_matmul(%matmul: !transform.any_op {transform.readonly}) -> (!transform.any_op) {
     transform.match.operation_name %matmul ["linalg.matmul"] : !transform.any_op
     transform.yield %matmul : !transform.any_op
   }

   transform.named_sequence @match_reduce(%reduce: !transform.any_op {transform.readonly}) -> (!transform.any_op) {
     transform.match.operation_name %reduce ["linalg.generic"] : !transform.any_op
     %matched = transform.match.structured failures(propagate) %reduce : (!transform.any_op) -> (!transform.any_op) {
     ^bb1(%arg1: !transform.any_op):
       %c2 = transform.param.constant 2 : i64 -> !transform.param<i64>
       %rank = transform.match.structured.rank %arg1 : (!transform.any_op) -> !transform.param<i64>
       transform.match.param.cmpi eq %rank, %c2 : !transform.param<i64>

       transform.match.structured.dim %arg1[-1] {reduction} : !transform.any_op
       transform.match.structured.yield %arg1 : !transform.any_op
     }
     transform.yield %matched : !transform.any_op
   }

   // An example of a custom transform dialect based kernel config. Note that
   // because of the way `transform.foreach_match` works, the callback cannot
   // manipulate IR beyond the op *given* to the matcher, as foreach_match will
   // attempt to keep walking the IR even after a successful match. The expected
   // flow for a strategy like this is as follows:
   //
   // Author an entry point like this (@kernel_config) that walks the IR and
   // attempts to annotate the dispatch with the codegen strategy to use, i.e.
   //   transform.foreach_match in %variant_op
   //       @matcher_0 -> @annotator_0,
   //       @matcher_1 -> @annotator_1,
   //       ...
   //
   // the annotators should attach an #iree_codegen.translation_info attribute
   // to the `hal.executable.export` ops within the variant as well as any
   // relevant op specific tile sizes (and other important attributes like
   // workgroup_size and subgroup_size, if relevant). This will then get handed
   // off to backend specific kernel config, which will let these user configs
   // pass through unperturbed.
   //
   // To couple this with a transform dialect based codegen strategy, the target
   // codegen strategy can be included inline with this library and relevant ops
   // can be annotated with `TransformDialectCodegen` as the lowering pipeline,
   // with a reference to the strategy to use (see an example above).
   transform.named_sequence @kernel_config(%variant_op: !transform.any_op {transform.consumed}) {
     transform.foreach_match in %variant_op
         @match_matmul -> @custom_matmul,
         @match_reduce -> @use_base_vectorize
       : (!transform.any_op) -> (!transform.any_op)
     transform.yield
   }
 }
	module attributes { transform.with_named_sequence } {
	// Example of a custom matmul strategy. The target matmul is annotated with
	// the name of this strategy down below before strategy selection, overriding
	// default IREE codegen.
	transform.named_sequence @custom_transform_strategy(
	%variant_op: !transform.any_op) {
	// Step 1. Re-match the matmul
	// ===========================================================================
	%matmul = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!transform.any_op) -> !transform.any_op

	// Step 2. Tile to grid
	// ===========================================================================
	%grid_reduction, %forall_grid =
	transform.structured.tile_using_forall %matmul tile_sizes [16, 16] ( mapping = [#gpu.block<x>, #gpu.block<y>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
	transform.iree.populate_workgroup_count_region_using_num_threads_slice %forall_grid : (!transform.any_op) -> ()

	// Step 3. Vectorize
	// ===========================================================================
	%func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
	transform.apply_patterns to %func {
	transform.apply_patterns.iree.fold_reshape_into_tensor_hal_interface
	transform.apply_patterns.linalg.fold_unit_extent_dims_via_slices
	transform.apply_patterns.vector.cast_away_vector_leading_one_dim
	} : !transform.any_op
	%func_1 = transform.structured.vectorize_children_and_apply_patterns %func : (!transform.any_op) -> !transform.any_op

	// Step 4. Bufferize
	// ===========================================================================
	transform.apply_patterns to %func_1 {
	transform.apply_patterns.iree.fold_fill_into_pad
	transform.apply_patterns.linalg.tiling_canonicalization
	transform.apply_patterns.scf.for_loop_canonicalization
	} : !transform.any_op
	transform.apply_patterns to %func_1 {
	transform.apply_patterns.tensor.reassociative_reshape_folding
	transform.apply_patterns.canonicalization
	} : !transform.any_op
	transform.apply_cse to %func_1 : !transform.any_op
	transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> ()
	transform.apply_patterns to %func_1 {
	transform.apply_patterns.linalg.erase_unnecessary_inputs
	} : !transform.any_op
	%memref_func = transform.iree.bufferize { target_gpu } %func_1 : (!transform.any_op) -> (!transform.any_op)

	// Step 6. Post-bufferization vector distribution
	// ===========================================================================
	%func_7 = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
	transform.iree.forall_to_workgroup %func_7 : (!transform.any_op) -> ()
	transform.iree.map_nested_forall_to_gpu_threads %func_7
	workgroup_dims = [4, 8, 1] : (!transform.any_op) -> ()

	transform.print {name = "Ran custom_transform_strategy"}
	transform.yield
	}

	// Send it down a custom transform dialect pipeline.
	transform.named_sequence @custom_matmul(%matmul: !transform.any_op {transform.readonly}) {
	%variant_op = transform.get_parent_op %matmul {op_name = "hal.executable.variant"} : (!transform.any_op) -> !transform.any_op
	%funcs = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
	%subgroup_reduce = transform.param.constant #iree_codegen.translation_info<pipeline = TransformDialectCodegen
	codegen_spec = @custom_transform_strategy> -> !transform.any_param
	transform.annotate %funcs "translation_info" = %subgroup_reduce : !transform.any_op, !transform.any_param
	transform.print {name = "Setting matmul strategy to custom_transform_strategy"}
	transform.yield
	}

	// Send it down subgroup reduce with a custom tiling configuration.
	transform.named_sequence @use_base_vectorize(%reduce: !transform.any_op {transform.readonly}) {
	%variant_op = transform.get_parent_op %reduce {op_name = "hal.executable.variant"} : (!transform.any_op) -> !transform.any_op
	%lowering_config = transform.param.constant #iree_codegen.lowering_config<tile_sizes = [[8, 0], [1, 0], [0, 0, 4]]> -> !transform.any_param
	transform.annotate %reduce "lowering_config" = %lowering_config : !transform.any_op, !transform.any_param
	%funcs = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
	%subgroup_reduce = transform.param.constant #iree_codegen.translation_info<pipeline = SPIRVBaseVectorize workgroup_size = [16, 1, 1]> -> !transform.any_param
	transform.annotate %funcs "translation_info" = %subgroup_reduce : !transform.any_op, !transform.any_param
	transform.print {name = "Setting reduce strategy to base vectorize"}
	transform.yield
	}

	//===------------------------------------------------------===
	// Matchers
	//===------------------------------------------------------===
	transform.named_sequence @match_matmul(%matmul: !transform.any_op {transform.readonly}) -> (!transform.any_op) {
	transform.match.operation_name %matmul ["linalg.matmul"] : !transform.any_op
	transform.yield %matmul : !transform.any_op
	}

	transform.named_sequence @match_reduce(%reduce: !transform.any_op {transform.readonly}) -> (!transform.any_op) {
	transform.match.operation_name %reduce ["linalg.generic"] : !transform.any_op
	%matched = transform.match.structured failures(propagate) %reduce : (!transform.any_op) -> (!transform.any_op) {
	^bb1(%arg1: !transform.any_op):
	%c2 = transform.param.constant 2 : i64 -> !transform.param<i64>
	%rank = transform.match.structured.rank %arg1 : (!transform.any_op) -> !transform.param<i64>
	transform.match.param.cmpi eq %rank, %c2 : !transform.param<i64>

	transform.match.structured.dim %arg1[-1] {reduction} : !transform.any_op
	transform.match.structured.yield %arg1 : !transform.any_op
	}
	transform.yield %matched : !transform.any_op
	}

	// An example of a custom transform dialect based kernel config. Note that
	// because of the way `transform.foreach_match` works, the callback cannot
	// manipulate IR beyond the op given to the matcher, as foreach_match will
	// attempt to keep walking the IR even after a successful match. The expected
	// flow for a strategy like this is as follows:
	//
	// Author an entry point like this (@kernel_config) that walks the IR and
	// attempts to annotate the dispatch with the codegen strategy to use, i.e.
	// transform.foreach_match in %variant_op
	// @matcher_0 -> @annotator_0,
	// @matcher_1 -> @annotator_1,
	// ...
	//
	// the annotators should attach an #iree_codegen.translation_info attribute
	// to the `hal.executable.export` ops within the variant as well as any
	// relevant op specific tile sizes (and other important attributes like
	// workgroup_size and subgroup_size, if relevant). This will then get handed
	// off to backend specific kernel config, which will let these user configs
	// pass through unperturbed.
	//
	// To couple this with a transform dialect based codegen strategy, the target
	// codegen strategy can be included inline with this library and relevant ops
	// can be annotated with `TransformDialectCodegen` as the lowering pipeline,
	// with a reference to the strategy to use (see an example above).
	transform.named_sequence @kernel_config(%variant_op: !transform.any_op {transform.consumed}) {
	transform.foreach_match in %variant_op
	@match_matmul -> @custom_matmul,
	@match_reduce -> @use_base_vectorize
	: (!transform.any_op) -> (!transform.any_op)
	transform.yield
	}
	}