blob: 0acd8c77808734e25383d0ee94a8bb0c7b58e091 [file] [log] [blame]
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
module {
func.func @softmax(%arg0: tensor<16x128x128xf32>) -> tensor<16x128x128xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<16x128xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x128xf32>) -> tensor<16x128xf32>
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<16x128xf32>) -> tensor<16x128xf32>
%3 = tensor.empty() : tensor<16x128x128xf32>
%4 = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0, %2 : tensor<16x128x128xf32>, tensor<16x128xf32>) outs(%3 : tensor<16x128x128xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%6 = arith.addf %in, %in_0 : f32
linalg.yield %6 : f32
} -> tensor<16x128x128xf32>
%5 = util.optimization_barrier %4 : tensor<16x128x128xf32>
return %4 : tensor<16x128x128xf32>
}
}