blob: 678708e8a1cbfe6a35826bff3ce9752c14fc069c [file] [log] [blame]
module attributes { transform.with_named_sequence } {
// Example of a custom matmul strategy. The target matmul is annotated with
// the name of this strategy down below before strategy selection, overriding
// default IREE codegen.
transform.named_sequence @custom_transform_strategy(
%variant_op: !transform.any_op) {
// Step 1. Re-match the matmul
// ===========================================================================
%matmul = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!transform.any_op) -> !transform.any_op
// Step 2. Tile to grid
// ===========================================================================
%grid_reduction, %forall_grid =
transform.structured.tile_using_forall %matmul tile_sizes [16, 16] ( mapping = [#gpu.block<x>, #gpu.block<y>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.iree.populate_workgroup_count_region_using_num_threads_slice %forall_grid : (!transform.any_op) -> ()
// Step 3. Vectorize
// ===========================================================================
%func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %func {
transform.apply_patterns.iree.fold_reshape_into_tensor_hal_interface
transform.apply_patterns.linalg.fold_unit_extent_dims_via_slices
transform.apply_patterns.vector.cast_away_vector_leading_one_dim
} : !transform.any_op
%func_1 = transform.structured.vectorize_children_and_apply_patterns %func : (!transform.any_op) -> !transform.any_op
// Step 4. Bufferize
// ===========================================================================
transform.apply_patterns to %func_1 {
transform.apply_patterns.iree.fold_fill_into_pad
transform.apply_patterns.linalg.tiling_canonicalization
transform.apply_patterns.scf.for_loop_canonicalization
} : !transform.any_op
transform.apply_patterns to %func_1 {
transform.apply_patterns.tensor.reassociative_reshape_folding
transform.apply_patterns.canonicalization
} : !transform.any_op
transform.apply_cse to %func_1 : !transform.any_op
transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> ()
transform.apply_patterns to %func_1 {
transform.apply_patterns.linalg.erase_unnecessary_inputs
} : !transform.any_op
%memref_func = transform.iree.bufferize { target_gpu } %func_1 : (!transform.any_op) -> (!transform.any_op)
// Step 6. Post-bufferization vector distribution
// ===========================================================================
%func_7 = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
transform.iree.forall_to_workgroup %func_7 : (!transform.any_op) -> ()
transform.iree.map_nested_forall_to_gpu_threads %func_7
workgroup_dims = [4, 8, 1] : (!transform.any_op) -> ()
transform.print {name = "Ran custom_transform_strategy"}
transform.yield
}
// Send it down a custom transform dialect pipeline.
transform.named_sequence @custom_matmul(%matmul: !transform.any_op {transform.readonly}) {
%variant_op = transform.get_parent_op %matmul {op_name = "hal.executable.variant"} : (!transform.any_op) -> !transform.any_op
%funcs = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
%subgroup_reduce = transform.param.constant #iree_codegen.translation_info<pipeline = TransformDialectCodegen
codegen_spec = @custom_transform_strategy> -> !transform.any_param
transform.annotate %funcs "translation_info" = %subgroup_reduce : !transform.any_op, !transform.any_param
transform.print {name = "Setting matmul strategy to custom_transform_strategy"}
transform.yield
}
// Send it down subgroup reduce with a custom tiling configuration.
transform.named_sequence @use_base_vectorize(%reduce: !transform.any_op {transform.readonly}) {
%variant_op = transform.get_parent_op %reduce {op_name = "hal.executable.variant"} : (!transform.any_op) -> !transform.any_op
%lowering_config = transform.param.constant #iree_codegen.lowering_config<tile_sizes = [[8, 0], [1, 0], [0, 0, 4]]> -> !transform.any_param
transform.annotate %reduce "lowering_config" = %lowering_config : !transform.any_op, !transform.any_param
%funcs = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
%subgroup_reduce = transform.param.constant #iree_codegen.translation_info<pipeline = SPIRVBaseVectorize workgroup_size = [16, 1, 1]> -> !transform.any_param
transform.annotate %funcs "translation_info" = %subgroup_reduce : !transform.any_op, !transform.any_param
transform.print {name = "Setting reduce strategy to base vectorize"}
transform.yield
}
//===------------------------------------------------------===
// Matchers
//===------------------------------------------------------===
transform.named_sequence @match_matmul(%matmul: !transform.any_op {transform.readonly}) -> (!transform.any_op) {
transform.match.operation_name %matmul ["linalg.matmul"] : !transform.any_op
transform.yield %matmul : !transform.any_op
}
transform.named_sequence @match_reduce(%reduce: !transform.any_op {transform.readonly}) -> (!transform.any_op) {
transform.match.operation_name %reduce ["linalg.generic"] : !transform.any_op
%matched = transform.match.structured failures(propagate) %reduce : (!transform.any_op) -> (!transform.any_op) {
^bb1(%arg1: !transform.any_op):
%c2 = transform.param.constant 2 : i64 -> !transform.param<i64>
%rank = transform.match.structured.rank %arg1 : (!transform.any_op) -> !transform.param<i64>
transform.match.param.cmpi eq %rank, %c2 : !transform.param<i64>
transform.match.structured.dim %arg1[-1] {reduction} : !transform.any_op
transform.match.structured.yield %arg1 : !transform.any_op
}
transform.yield %matched : !transform.any_op
}
// An example of a custom transform dialect based kernel config. Note that
// because of the way `transform.foreach_match` works, the callback cannot
// manipulate IR beyond the op *given* to the matcher, as foreach_match will
// attempt to keep walking the IR even after a successful match. The expected
// flow for a strategy like this is as follows:
//
// Author an entry point like this (@kernel_config) that walks the IR and
// attempts to annotate the dispatch with the codegen strategy to use, i.e.
// transform.foreach_match in %variant_op
// @matcher_0 -> @annotator_0,
// @matcher_1 -> @annotator_1,
// ...
//
// the annotators should attach an #iree_codegen.translation_info attribute
// to the `hal.executable.export` ops within the variant as well as any
// relevant op specific tile sizes (and other important attributes like
// workgroup_size and subgroup_size, if relevant). This will then get handed
// off to backend specific kernel config, which will let these user configs
// pass through unperturbed.
//
// To couple this with a transform dialect based codegen strategy, the target
// codegen strategy can be included inline with this library and relevant ops
// can be annotated with `TransformDialectCodegen` as the lowering pipeline,
// with a reference to the strategy to use (see an example above).
transform.named_sequence @kernel_config(%variant_op: !transform.any_op {transform.consumed}) {
transform.foreach_match in %variant_op
@match_matmul -> @custom_matmul,
@match_reduce -> @use_base_vectorize
: (!transform.any_op) -> (!transform.any_op)
transform.yield
}
}