blob: 0bad5e06eef72529ee0e4ccbe8d37aa6cce39a17 [file] [log] [blame]
# Copyright 2022 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
# End-to-end matrix multiplication tests.
load("//build_tools/bazel:iree_e2e_generated_runner_test.bzl", "iree_generated_e2e_runner_test")
package(
features = ["layering_check"],
licenses = ["notice"], # Apache 2.0
)
py_binary(
name = "generate_e2e_matmul_tests",
srcs = ["generate_e2e_matmul_tests.py"],
)
###########################################################################
##
## LLVMCPU backend
##
###########################################################################
# LLVMCPU, non-data-tiling, no microkernels
[iree_generated_e2e_runner_test(
name = "e2e_matmul_cpu_nondt_%s_%s" % (lhs_rhs_type, acc_type),
compiler_flags = [
"--iree-opt-data-tiling=false",
"--iree-llvmcpu-enable-ukernels=none",
"--iree-llvmcpu-enable-scalable-vectorization",
"--iree-llvmcpu-target-triple=aarch64-unknown-unknown",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--acc_type=%s" % acc_type,
],
tags = [
# f16/bf16 trigger internal LLVM assertion errors on riscv and wasm.
"noriscv",
"nowasm",
] if (lhs_rhs_type == "f16" or lhs_rhs_type == "bf16") else [],
target_backends_and_drivers = [
("llvm-cpu", "local-task"),
],
target_cpu_features_variants = ["generic"] +
# Widening matmuls fail to lower for SVE.
(["arm_64:sve:+sve"] if lhs_rhs_type == acc_type else []),
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
) for (lhs_rhs_type, acc_type) in [
# ("i8", "i32"), # TODO(#15800): enable once compile time is reasonable
# ("f32", "f32"), # TODO(#15800): enable once compile time is reasonable
# ("f16", "f16"), # TODO(#15800): enable once compile time is reasonable
# ("f16", "f32"), # TODO(#15800): enable once compile time is reasonable
# TODO(#15258): enable bf16 tests when that bug is fixed.
# ("bf16", "bf16"),
# ("bf16", "f32"),
]]
PREPROCESSING_TRANSPOSE_LHS = "--iree-preprocessing-pass-pipeline=builtin.module\\(util.func\\(iree-preprocessing-transpose-matmul-pass{input=lhs}\\)\\)"
PREPROCESSING_PEEL = "--iree-llvmcpu-vector-pproc-strategy=peel"
# LLVMCPU, non-data-tiling, no microkernels, ArmSME
[iree_generated_e2e_runner_test(
name = "e2e_matmul_cpu_arm_sme_nondt_%s_%s%s" % (
dtype,
"_transpose_lhs" if transpose_lhs else "",
"_peel" if peel else "",
),
compiler_flags = [
"--iree-opt-data-tiling=false",
"--iree-llvmcpu-enable-scalable-vectorization",
"--iree-llvmcpu-target-triple=aarch64-unknown-unknown",
] + ([PREPROCESSING_TRANSPOSE_LHS] if transpose_lhs else []) +
([PREPROCESSING_PEEL] if peel else []),
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % dtype,
"--acc_type=%s" % dtype,
],
tags = [
"requires-arm-sme",
],
target_backends_and_drivers = [
("llvm-cpu", "local-task"),
],
target_cpu_features_variants = ["arm_64:sme:+sve,+sme"],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
) for dtype in [
"f32",
# f64 disabled because it wasn't supported by the test generator at the time
# this was added. When adding it in the future, consider passing
# --iree-input-demote-f64-to-f32=false to the compiler.
# "f64"
] for transpose_lhs in [
True,
False,
] for peel in [
True,
False,
]]
X86_64_AVX2 = [
"+avx",
"+avx2",
"+fma",
"+f16c",
]
X86_64_AVX512 = X86_64_AVX2 + [
"+avx512f",
"+avx512vl",
"+avx512cd",
"+avx512bw",
"+avx512dq",
]
X86_64_AVX512_VNNI = X86_64_AVX512 + [
"+avx512vnni",
]
X86_64_AVX512_BF16 = X86_64_AVX512 + [
"+avx512bf16",
]
# LLVMCPU, data-tiling, data-tiling + ukernels.
[iree_generated_e2e_runner_test(
name = "e2e_matmul_cpu_dt%s_%s_%s" % (
("_uk" if use_uk else ""),
lhs_rhs_type,
acc_type,
),
compiler_flags = [
"--iree-opt-data-tiling",
] + [
"--iree-llvmcpu-enable-ukernels=%s" % ("all" if use_uk else "none"),
] + (["--iree-input-demote-f64-to-f32=false"] if acc_type == "f64" else []),
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--acc_type=%s" % acc_type,
] + (["--shapes=small"] if acc_type == "f64" else []),
tags = ([
# f16/bf16 trigger internal LLVM assertion errors on riscv and wasm.
"noriscv",
"nowasm",
] if (lhs_rhs_type == "f16" or lhs_rhs_type == "bf16") else []),
target_backends_and_drivers = [
("llvm-cpu", "local-task"),
],
target_cpu_features_variants = ["generic"] +
([
"arm_64:dotprod:+dotprod",
"arm_64:i8mm:+i8mm",
"x86_64:avx512vnni:" + ",".join(X86_64_AVX512_VNNI),
] if lhs_rhs_type == "i8" and acc_type == "i32" else [
"x86_64:avx2:" + ",".join(X86_64_AVX2),
"x86_64:avx512:" + ",".join(X86_64_AVX512),
] if lhs_rhs_type == "f32" and acc_type == "f32" else [
"x86_64:avx2:" + ",".join(X86_64_AVX2),
"x86_64:avx512:" + ",".join(X86_64_AVX512),
"arm_64:fullfp16:+fullfp16",
] if lhs_rhs_type == "f16" and acc_type == "f16" else [
"x86_64:avx2:" + ",".join(X86_64_AVX2),
"x86_64:avx512:" + ",".join(X86_64_AVX512),
"arm_64:fp16fml:+fp16fml",
] if lhs_rhs_type == "f16" and acc_type == "f32" else [
"x86_64:avx2:" + ",".join(X86_64_AVX2),
"x86_64:avx512:" + ",".join(X86_64_AVX512),
"x86_64:avx512bf16:" + ",".join(X86_64_AVX512_BF16),
"arm_64:bf16:+bf16",
] if lhs_rhs_type == "bf16" and acc_type == "bf16" else [
"x86_64:avx2:" + ",".join(X86_64_AVX2),
"x86_64:avx512:" + ",".join(X86_64_AVX512),
"x86_64:avx512bf16:" + ",".join(X86_64_AVX512_BF16),
"arm_64:bf16:+bf16",
] if lhs_rhs_type == "bf16" and acc_type == "f32" else []),
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
) for use_uk in [
False,
True,
] for (lhs_rhs_type, acc_type) in (
[
("i8", "i32"),
("f32", "f32"),
("f64", "f64"),
("f16", "f16"),
("f16", "f32"),
("bf16", "bf16"),
("bf16", "f32"),
]
)]
# LLVMCPU, data-tiling, data-tiling + ukernels + late materialization.
[iree_generated_e2e_runner_test(
name = "e2e_matmul_cpu_experimental_dt%s_%s_%s" % (
("_uk" if use_uk else ""),
lhs_rhs_type,
acc_type,
),
compiler_flags = [
"--iree-opt-data-tiling",
"--iree-global-opt-enable-early-materialization=false",
] + ["--iree-llvmcpu-enable-ukernels=%s" % ("all" if use_uk else "none")],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--acc_type=%s" % acc_type,
],
tags = ([
# f16/bf16 trigger internal LLVM assertion errors on riscv and wasm.
"noriscv",
"nowasm",
] if (lhs_rhs_type == "f16" or lhs_rhs_type == "bf16") else []),
target_backends_and_drivers = [
("llvm-cpu", "local-task"),
],
target_cpu_features_variants = ["generic"] +
([
"arm_64:dotprod:+dotprod",
"arm_64:i8mm:+i8mm",
"x86_64:avx512vnni:" + ",".join(X86_64_AVX512_VNNI),
] if lhs_rhs_type == "i8" and acc_type == "i32" else [
"x86_64:avx2:" + ",".join(X86_64_AVX2),
"x86_64:avx512:" + ",".join(X86_64_AVX512),
] if lhs_rhs_type == "f32" and acc_type == "f32" else [
"x86_64:avx2:" + ",".join(X86_64_AVX2),
"x86_64:avx512:" + ",".join(X86_64_AVX512),
"arm_64:fullfp16:+fullfp16",
] if lhs_rhs_type == "f16" and acc_type == "f16" else [
"x86_64:avx2:" + ",".join(X86_64_AVX2),
"x86_64:avx512:" + ",".join(X86_64_AVX512),
"arm_64:fp16fml:+fp16fml",
] if lhs_rhs_type == "f16" and acc_type == "f32" else [
"x86_64:avx2:" + ",".join(X86_64_AVX2),
"x86_64:avx512:" + ",".join(X86_64_AVX512),
"x86_64:avx512bf16:" + ",".join(X86_64_AVX512_BF16),
"arm_64:bf16:+bf16",
] if lhs_rhs_type == "bf16" and acc_type == "bf16" else [
"x86_64:avx2:" + ",".join(X86_64_AVX2),
"x86_64:avx512:" + ",".join(X86_64_AVX512),
"x86_64:avx512bf16:" + ",".join(X86_64_AVX512_BF16),
"arm_64:bf16:+bf16",
] if lhs_rhs_type == "bf16" and acc_type == "f32" else []),
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
) for use_uk in [
False,
True,
] for (lhs_rhs_type, acc_type) in (
[
("i8", "i32"),
("f32", "f32"),
("f16", "f16"),
("f16", "f32"),
("bf16", "bf16"),
("bf16", "f32"),
]
)]
[iree_generated_e2e_runner_test(
name = "e2e_matmul_vmvx_experimental_dt%s_%s_%s" % (
("_uk" if use_uk else ""),
lhs_rhs_type,
acc_type,
),
compiler_flags = [
"--iree-opt-data-tiling",
"--iree-global-opt-enable-early-materialization=false",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--acc_type=%s" % acc_type,
"--shapes=small",
],
tags = [],
target_backends_and_drivers = [
("vmvx", "local-task"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
) for use_uk in [
False,
True,
] for (lhs_rhs_type, acc_type) in (
[
("f32", "f32"),
]
)]
[iree_generated_e2e_runner_test(
name = "e2e_matmul_cuda_experimental_dt_%s_%s" % (
lhs_rhs_type,
acc_type,
),
compiler_flags = [
"--iree-opt-data-tiling",
"--iree-global-opt-enable-early-materialization=false",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--acc_type=%s" % acc_type,
"--shapes=small",
],
tags = [],
target_backends_and_drivers = [
("cuda", "cuda"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
) for (lhs_rhs_type, acc_type) in (
[
("f32", "f32"),
]
)]
[iree_generated_e2e_runner_test(
name = "e2e_matmul_spirv_experimental_dt_%s_%s" % (
lhs_rhs_type,
acc_type,
),
compiler_flags = [
"--iree-opt-data-tiling",
"--iree-global-opt-enable-early-materialization=false",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--acc_type=%s" % acc_type,
"--shapes=small",
],
tags = [],
target_backends_and_drivers = [
("vulkan-spirv", "vulkan"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
) for (lhs_rhs_type, acc_type) in (
[
("f32", "f32"),
]
)]
###########################################################################
##
## VMVX backend
##
###########################################################################
# VMVX, data-tiling + microkernels.
[iree_generated_e2e_runner_test(
name = "e2e_matmul_vmvx_dt_uk_%s_small" % lhs_rhs_type,
compiler_flags = [
"--iree-vmvx-enable-microkernels",
"--iree-opt-data-tiling",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--acc_type=%s" % acc_type,
"--shapes=small",
],
target_backends_and_drivers = [
("vmvx", "local-task"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
) for (lhs_rhs_type, acc_type) in [
("i8", "i32"),
("f32", "f32"),
]]
###########################################################################
##
## CUDA backend
##
###########################################################################
iree_generated_e2e_runner_test(
name = "e2e_matmul_cuda_f32_large_simt",
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f32",
"--acc_type=f32",
"--shapes=easy_large_static",
"--compilation_info=LLVMGPUMatmulSimt",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-nvidia",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
)
# Testing Ampere + TensorCore path.
# WMMA TensorCore(F32): wmma.161616.f32.tf32
iree_generated_e2e_runner_test(
name = "e2e_matmul_cuda_f32_large_tensorcore",
compiler_flags = [
"--iree-cuda-target=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f32",
"--acc_type=f32",
"--shapes=easy_large_static",
"--compilation_info=LLVMGPUMatmulTensorCore",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-sm80",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
)
iree_generated_e2e_runner_test(
name = "e2e_matmul_cuda_f32",
compiler_flags = [
"--iree-cuda-target=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f32",
"--acc_type=f32",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-sm80",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
)
iree_generated_e2e_runner_test(
name = "e2e_matmul_cuda_f16",
compiler_flags = [
"--iree-cuda-target=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f16",
"--acc_type=f32",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-sm80",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
)
# MMA.SYNC TensorCore(F32): mma.sync.1688.f32.t32
iree_generated_e2e_runner_test(
name = "e2e_matmul_cuda_f32_large_mma_sync",
compiler_flags = [
"--iree-cuda-target=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f32",
"--acc_type=f32",
"--shapes=easy_large_static",
"--compilation_info=LLVMGPUMatmulTensorCoreMmaSync",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-sm80",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
)
# WMMA TensorCore(F16): wmma.161616.f16.f16
iree_generated_e2e_runner_test(
name = "e2e_matmul_cuda_f16_large_tensorcore",
compiler_flags = [
"--iree-cuda-target=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f16",
"--acc_type=f32",
"--shapes=easy_large_static",
"--compilation_info=LLVMGPUMatmulTensorCore",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-sm80",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
)
# MMA.SYNC TensorCore(F16): mma.sync.161616.f16.f16
iree_generated_e2e_runner_test(
name = "e2e_matmul_cuda_f16_large_mma_sync",
compiler_flags = [
"--iree-cuda-target=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f16",
"--acc_type=f32",
"--shapes=easy_large_static",
"--compilation_info=LLVMGPUMatmulTensorCoreMmaSync",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-sm80",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
)
[iree_generated_e2e_runner_test(
name = "e2e_matmul_cuda_%s_splitk" % lhs_rhs_type,
compiler_flags = [
"--iree-dispatch-creation-split-matmul-reduction=4",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--acc_type=%s" % acc_type,
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-nvidia",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
) for (lhs_rhs_type, acc_type) in [
("f32", "f32"),
]]
###########################################################################
##
## Vulkan backend
##
###########################################################################
[iree_generated_e2e_runner_test(
name = "e2e_matmul_vulkan_{0}_large_valhall".format(lhs_rhs_type),
compiler_flags = [
"--iree-vulkan-target=valhall",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--acc_type=%s" % acc_type,
"--shapes=easy_large_static",
"--compilation_info=SPIRVVectorizeMali",
],
tags = [
# Nvidia GPUs support a superset of Valhall features
"requires-gpu-nvidia",
"vulkan_uses_vk_khr_shader_float16_int8",
],
target_backends_and_drivers = [
("vulkan-spirv", "vulkan"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
) for (lhs_rhs_type, acc_type) in [
("i8", "i32"),
("f16", "f32"),
("f32", "f32"),
]]
[iree_generated_e2e_runner_test(
name = "e2e_matmul_vulkan_{0}_large_ampere".format(lhs_rhs_type),
compiler_flags = [
"--iree-vulkan-target=ampere",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--acc_type=%s" % acc_type,
"--shapes=easy_large_static",
"--compilation_info=SPIRVVectorizeNVIDIA",
],
tags = [
"requires-gpu-sm80",
"vulkan_uses_vk_khr_shader_float16_int8",
],
target_backends_and_drivers = [
("vulkan-spirv", "vulkan"),
],
test_runner = "//tools/testing/e2e:iree-e2e-matmul-test",
test_type = "matmul",
) for (lhs_rhs_type, acc_type) in [
("i8", "i32"),
("f16", "f32"),
("f32", "f32"),
]]
# TODO(#19465): add large matmul tests for rdna3.