blob: aea8843d2602dcecdc9303a0bfe4cbf2e8fdc5be [file] [log] [blame]
func.func @reduction_aligned() {
%in = util.unfoldable_constant dense<1.0> : tensor<128x384xf32>
%cst = arith.constant 0.0 : f32
%init = linalg.init_tensor [128] : tensor<128xf32>
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<128xf32>) -> tensor<128xf32>
%result = linalg.generic {indexing_maps = [
affine_map<(d0, d1) -> (d0, d1)>,affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
ins(%in : tensor<128x384xf32>) outs(%fill : tensor<128xf32>) {
^bb0(%arg3: f32, %arg4: f32): // no predecessors
%2 = arith.addf %arg3, %arg4 : f32
linalg.yield %2 : f32
} -> tensor<128xf32>
check.expect_eq_const(%result, dense<384.0> : tensor<128xf32>) : tensor<128xf32>
return
}
func.func @reduction_unaligned() {
%in = util.unfoldable_constant dense<1.0> : tensor<129x384xf32>
%cst = arith.constant 0.0 : f32
%init = linalg.init_tensor [129] : tensor<129xf32>
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<129xf32>) -> tensor<129xf32>
%result = linalg.generic {indexing_maps = [
affine_map<(d0, d1) -> (d0, d1)>,affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
ins(%in : tensor<129x384xf32>) outs(%fill : tensor<129xf32>) {
^bb0(%arg3: f32, %arg4: f32): // no predecessors
%2 = arith.addf %arg3, %arg4 : f32
linalg.yield %2 : f32
} -> tensor<129xf32>
check.expect_eq_const(%result, dense<384.0> : tensor<129xf32>) : tensor<129xf32>
return
}