llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/tile-to-in-parallel.mlir - 3p/openxla/iree - Git at Google

 // RUN: iree-dialects-opt %s  -linalg-interp-transforms --split-input-file | FileCheck %s

 // CHECK-DAG: #[[$CEIL_MAP:.*]] = affine_map<()[s0, s1] -> (s1 ceildiv s0)>
 // CHECK-DAG: #[[$MUL_MAP:.*]] = affine_map<(d0)[s0] -> (d0 * s0)>
 // CHECK-DAG: #[[$SUB_MAP:.*]] = affine_map<(d0)[s0, s1] -> (-(d0 * s0) + s1, s0)>
 // CHECK-DAG: #[[$ID1_MAP:.*]] = affine_map<(d0) -> (d0)>

 module {
   // CHECK-LABEL: func @static_tile
   //  CHECK-SAME:   %[[CHUNK_SIZE:[0-9a-z]+]]: index
   //  CHECK-SAME:   %[[IN:[0-9a-z]+]]: tensor<?xf32>
   //  CHECK-SAME:   %[[OUT:[0-9a-z]+]]: tensor<?xf32>
   func @static_tile(%chunk_size: index, %in: tensor<?xf32>, %out: tensor<?xf32>) -> (tensor<?xf32>) {
     %c0 = arith.constant 0: index

     // CHECK: %[[M:.*]] = tensor.dim %{{.*}}, %{{.*}} : tensor<?xf32>
     // CHECK: %[[CEIL:.*]] = affine.apply #[[$CEIL_MAP]]()[%[[CHUNK_SIZE]], %[[M]]]
     // CHECK: iree_linalg_ext.in_parallel %[[CEIL]] -> (tensor<?xf32>) {
     %0 = iree_linalg_ext.tile %chunk_size outs(%out: tensor<?xf32>) -> (tensor<?xf32>) {

     // CHECK: ^bb0(%[[TIDX:.*]]: index):
     // CHECK:    %[[OFFSET:.*]] = affine.apply #[[$MUL_MAP]](%[[TIDX]])[%[[CHUNK_SIZE]]]
     // CHECK:    %[[SIZE:.*]] = affine.min #[[$SUB_MAP]](%[[TIDX]])[%[[CHUNK_SIZE]], %[[M]]]
     // CHECK:    %[[O:.*]] = tensor.extract_slice %[[OUT]][%[[OFFSET]]] [%[[SIZE]]] [{{.*}}] : tensor<?xf32> to tensor<?xf32>

       // TODO: one offset and one size per tensor?
       // If not necessary in the dense strided-array world, what about the rest?
       ^bb0(%offset: index, %size: index, %st1: tensor<?xf32>):

         // TODO: atm this is just 1-1: out-chunk-size -> in-size.
     // CHECK:    %[[I:.*]] = tensor.extract_slice %[[IN]][%[[OFFSET]]] [%[[SIZE]]] [{{.*}}] : tensor<?xf32> to tensor<?xf32>
         %1 = tensor.extract_slice %in[%offset][%size][1] : tensor<?xf32> to tensor<?xf32>

     // CHECK:    %[[R:.*]] = linalg.generic {{.*}} ins(%[[I]] : tensor<?xf32>) outs(%[[O]] : tensor<?xf32>)
         %3 = linalg.generic {
             indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
             iterator_types = ["parallel"]}
           ins(%1: tensor<?xf32>) outs(%st1: tensor<?xf32>) {
           ^bb0(%a: f32, %b:f32):  // no predecessors
             %f42 = arith.constant 42.0: f32
             %tmp = arith.mulf %a, %f42: f32
             linalg.yield %tmp: f32
         } -> tensor<?xf32>

     // CHECK: iree_linalg_ext.perform_concurrently {
     // CHECK:    iree_linalg_ext.parallel_insert_slice %[[R]] into %[[OUT]][%[[OFFSET]]] [%[[SIZE]]] [{{.*}}] : tensor<?xf32> into tensor<?xf32>
         iree_linalg_ext.tile_yield %3: tensor<?xf32>
     }
     return %0: tensor<?xf32>
   }

   pdl.pattern @match_iree_linalg_ext_tile : benefit(1) {
     %0 = operands
     %1 = types
     %2 = operation "iree_linalg_ext.tile"(%0 : !pdl.range<value>)  -> (%1 : !pdl.range<type>)
     rewrite %2 with "iree_linalg_transform.apply"
   }
   iree_linalg_transform.sequence {
     %0 = match @match_iree_linalg_ext_tile
     %1 = rewrite_iree_linalg_ext_tile_to_in_parallel %0
   }
 }
	// RUN: iree-dialects-opt %s -linalg-interp-transforms --split-input-file \| FileCheck %s

	// CHECK-DAG: #[[$CEIL_MAP:.*]] = affine_map<()[s0, s1] -> (s1 ceildiv s0)>
	// CHECK-DAG: #[[$MUL_MAP:.]] = affine_map<(d0)[s0] -> (d0 s0)>
	// CHECK-DAG: #[[$SUB_MAP:.]] = affine_map<(d0)[s0, s1] -> (-(d0 s0) + s1, s0)>
	// CHECK-DAG: #[[$ID1_MAP:.*]] = affine_map<(d0) -> (d0)>

	module {
	// CHECK-LABEL: func @static_tile
	// CHECK-SAME: %[[CHUNK_SIZE:[0-9a-z]+]]: index
	// CHECK-SAME: %[[IN:[0-9a-z]+]]: tensor<?xf32>
	// CHECK-SAME: %[[OUT:[0-9a-z]+]]: tensor<?xf32>
	func @static_tile(%chunk_size: index, %in: tensor<?xf32>, %out: tensor<?xf32>) -> (tensor<?xf32>) {
	%c0 = arith.constant 0: index

	// CHECK: %[[M:.]] = tensor.dim %{{.}}, %{{.*}} : tensor<?xf32>
	// CHECK: %[[CEIL:.*]] = affine.apply #[[$CEIL_MAP]]()[%[[CHUNK_SIZE]], %[[M]]]
	// CHECK: iree_linalg_ext.in_parallel %[[CEIL]] -> (tensor<?xf32>) {
	%0 = iree_linalg_ext.tile %chunk_size outs(%out: tensor<?xf32>) -> (tensor<?xf32>) {

	// CHECK: ^bb0(%[[TIDX:.*]]: index):
	// CHECK: %[[OFFSET:.*]] = affine.apply #[[$MUL_MAP]](%[[TIDX]])[%[[CHUNK_SIZE]]]
	// CHECK: %[[SIZE:.*]] = affine.min #[[$SUB_MAP]](%[[TIDX]])[%[[CHUNK_SIZE]], %[[M]]]
	// CHECK: %[[O:.]] = tensor.extract_slice %[[OUT]][%[[OFFSET]]] [%[[SIZE]]] [{{.}}] : tensor<?xf32> to tensor<?xf32>

	// TODO: one offset and one size per tensor?
	// If not necessary in the dense strided-array world, what about the rest?
	^bb0(%offset: index, %size: index, %st1: tensor<?xf32>):

	// TODO: atm this is just 1-1: out-chunk-size -> in-size.
	// CHECK: %[[I:.]] = tensor.extract_slice %[[IN]][%[[OFFSET]]] [%[[SIZE]]] [{{.}}] : tensor<?xf32> to tensor<?xf32>
	%1 = tensor.extract_slice %in[%offset][%size][1] : tensor<?xf32> to tensor<?xf32>

	// CHECK: %[[R:.]] = linalg.generic {{.}} ins(%[[I]] : tensor<?xf32>) outs(%[[O]] : tensor<?xf32>)
	%3 = linalg.generic {
	indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
	iterator_types = ["parallel"]}
	ins(%1: tensor<?xf32>) outs(%st1: tensor<?xf32>) {
	^bb0(%a: f32, %b:f32): // no predecessors
	%f42 = arith.constant 42.0: f32
	%tmp = arith.mulf %a, %f42: f32
	linalg.yield %tmp: f32
	} -> tensor<?xf32>

	// CHECK: iree_linalg_ext.perform_concurrently {
	// CHECK: iree_linalg_ext.parallel_insert_slice %[[R]] into %[[OUT]][%[[OFFSET]]] [%[[SIZE]]] [{{.*}}] : tensor<?xf32> into tensor<?xf32>
	iree_linalg_ext.tile_yield %3: tensor<?xf32>
	}
	return %0: tensor<?xf32>
	}

	pdl.pattern @match_iree_linalg_ext_tile : benefit(1) {
	%0 = operands
	%1 = types
	%2 = operation "iree_linalg_ext.tile"(%0 : !pdl.range<value>) -> (%1 : !pdl.range<type>)
	rewrite %2 with "iree_linalg_transform.apply"
	}
	iree_linalg_transform.sequence {
	%0 = match @match_iree_linalg_ext_tile
	%1 = rewrite_iree_linalg_ext_tile_to_in_parallel %0
	}
	}