tests/e2e/regression/large_reduction.mlir - 3p/openxla/iree - Git at Google

 func.func @reduction_aligned() {
   %in = util.unfoldable_constant dense<1.0> : tensor<128x384xf32>
   %cst = arith.constant 0.0 : f32
   %init = linalg.init_tensor [128] : tensor<128xf32>
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<128xf32>) -> tensor<128xf32>
   %result = linalg.generic {indexing_maps = [
     affine_map<(d0, d1) -> (d0, d1)>,affine_map<(d0, d1) -> (d0)>],
     iterator_types = ["parallel", "reduction"]}
     ins(%in : tensor<128x384xf32>) outs(%fill : tensor<128xf32>) {
     ^bb0(%arg3: f32, %arg4: f32):  // no predecessors
       %2 = arith.addf %arg3, %arg4 : f32
       linalg.yield %2 : f32
     } -> tensor<128xf32>
   check.expect_eq_const(%result, dense<384.0> : tensor<128xf32>) : tensor<128xf32>
   return
 }

 func.func @reduction_unaligned() {
   %in = util.unfoldable_constant dense<1.0> : tensor<129x384xf32>
   %cst = arith.constant 0.0 : f32
   %init = linalg.init_tensor [129] : tensor<129xf32>
   %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<129xf32>) -> tensor<129xf32>
   %result = linalg.generic {indexing_maps = [
     affine_map<(d0, d1) -> (d0, d1)>,affine_map<(d0, d1) -> (d0)>],
     iterator_types = ["parallel", "reduction"]}
     ins(%in : tensor<129x384xf32>) outs(%fill : tensor<129xf32>) {
     ^bb0(%arg3: f32, %arg4: f32):  // no predecessors
       %2 = arith.addf %arg3, %arg4 : f32
       linalg.yield %2 : f32
     } -> tensor<129xf32>
   check.expect_eq_const(%result, dense<384.0> : tensor<129xf32>) : tensor<129xf32>
   return
 }
	func.func @reduction_aligned() {
	%in = util.unfoldable_constant dense<1.0> : tensor<128x384xf32>
	%cst = arith.constant 0.0 : f32
	%init = linalg.init_tensor [128] : tensor<128xf32>
	%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<128xf32>) -> tensor<128xf32>
	%result = linalg.generic {indexing_maps = [
	affine_map<(d0, d1) -> (d0, d1)>,affine_map<(d0, d1) -> (d0)>],
	iterator_types = ["parallel", "reduction"]}
	ins(%in : tensor<128x384xf32>) outs(%fill : tensor<128xf32>) {
	^bb0(%arg3: f32, %arg4: f32): // no predecessors
	%2 = arith.addf %arg3, %arg4 : f32
	linalg.yield %2 : f32
	} -> tensor<128xf32>
	check.expect_eq_const(%result, dense<384.0> : tensor<128xf32>) : tensor<128xf32>
	return
	}

	func.func @reduction_unaligned() {
	%in = util.unfoldable_constant dense<1.0> : tensor<129x384xf32>
	%cst = arith.constant 0.0 : f32
	%init = linalg.init_tensor [129] : tensor<129xf32>
	%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<129xf32>) -> tensor<129xf32>
	%result = linalg.generic {indexing_maps = [
	affine_map<(d0, d1) -> (d0, d1)>,affine_map<(d0, d1) -> (d0)>],
	iterator_types = ["parallel", "reduction"]}
	ins(%in : tensor<129x384xf32>) outs(%fill : tensor<129xf32>) {
	^bb0(%arg3: f32, %arg4: f32): // no predecessors
	%2 = arith.addf %arg3, %arg4 : f32
	linalg.yield %2 : f32
	} -> tensor<129xf32>
	check.expect_eq_const(%result, dense<384.0> : tensor<129xf32>) : tensor<129xf32>
	return
	}