blob: aefe3b30538045c02524de4cbf8e9e5610c9fe1c [file] [log] [blame]
transform.sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
// Step 1. Find three linalg.generics and tile to GPU thread blocks.
// ===========================================================================
%generics = transform.structured.match ops{["linalg.generic"]} in %variant_op : (!pdl.operation) -> !pdl.operation
transform.iree.tile_to_forall_and_workgroup_count_region %generics
tile_sizes [5, 3] ( mapping = [#gpu.block<z>, #gpu.block<x>])
// Step 2. Rank reduce and bufferize and drop HAL decriptor from memref ops.
// ===========================================================================
%func = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
transform.iree.apply_patterns %func { rank_reducing_linalg, rank_reducing_vector } : (!pdl.operation) -> ()
transform.iree.eliminate_empty_tensors %variant_op : (!pdl.operation) -> ()
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op : (!pdl.operation) -> !pdl.operation
%memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func : (!pdl.operation) -> ()
// Step 3. Map to GPU thread blocks.
// ===========================================================================
transform.iree.forall_to_workgroup %memref_func : (!pdl.operation) -> ()
}