blob: ac383472495bc692a6d61a996ffca6e44cf3c971 [file] [log] [blame]
// Test large aligned linalg matmul to make sure we go through the optimized
// path for GPUs.
// Problem size : 2048x512x1024
// Input type : F32
// Accumulation type : F32
func.func @matmul_2048x512x1024_f32_f32() {
%lhs = util.unfoldable_constant dense<1.0> : tensor<2048x1024xf32>
%rhs = util.unfoldable_constant dense<0.4> : tensor<1024x512xf32>
%c0 = arith.constant 0.0 : f32
%init = tensor.empty() : tensor<2048x512xf32>
%CC = linalg.fill ins(%c0 : f32) outs(%init : tensor<2048x512xf32>) -> tensor<2048x512xf32>
%D = linalg.matmul ins(%lhs, %rhs: tensor<2048x1024xf32>, tensor<1024x512xf32>)
outs(%CC: tensor<2048x512xf32>) -> tensor<2048x512xf32>
check.expect_almost_eq_const(%D, dense<409.596> : tensor<2048x512xf32>) : tensor<2048x512xf32>
return
}
// Problem size : 3456x1024x2048
// Input type : F16
// Accumulation type : F16
func.func @matmul_3456x1024x2048_f16_f16() {
%lhs = util.unfoldable_constant dense<1.00> : tensor<3456x2048xf16>
%rhs = util.unfoldable_constant dense<0.01> : tensor<2048x1024xf16>
%c0 = arith.constant 0.0 : f16
%init = tensor.empty() : tensor<3456x1024xf16>
%CC = linalg.fill ins(%c0 : f16) outs(%init : tensor<3456x1024xf16>) -> tensor<3456x1024xf16>
%D = linalg.matmul ins(%lhs, %rhs: tensor<3456x2048xf16>, tensor<2048x1024xf16>)
outs(%CC: tensor<3456x1024xf16>) -> tensor<3456x1024xf16>
check.expect_almost_eq_const(%D, dense<20.2812> : tensor<3456x1024xf16>) : tensor<3456x1024xf16>
return
}