blob: 6c76c3580657c132f87ae4e19d5e810d3270a8a8 [file] [log] [blame]
# Copyright 2022 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
# End-to-end matrix multiplication tests.
load("//build_tools/bazel:iree_trace_runner_test.bzl", "iree_generated_trace_runner_test")
package(
features = ["layering_check"],
licenses = ["notice"], # Apache 2.0
)
py_binary(
name = "generate_e2e_matmul_tests",
srcs = ["generate_e2e_matmul_tests.py"],
)
[iree_generated_trace_runner_test(
name = "e2e_matmul_direct_%s_small" % lhs_rhs_type,
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--shapes=small",
],
target_backends_and_drivers = [
("llvm-cpu", "local-task"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
"i8",
"f32",
]]
# Test asm
[iree_generated_trace_runner_test(
name = "e2e_matmul_mmt4d_%s_small" % lhs_rhs_type,
compiler_flags = [
"--iree-opt-data-tiling",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--shapes=small",
],
target_backends_and_drivers = [
("llvm-cpu", "local-task"),
],
target_cpu_features_variants = ["default"] +
([
"arm_64:dotprod:+dotprod",
"arm_64:i8mm:+i8mm",
] if lhs_rhs_type == "i8" else []),
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
"i8",
"f32",
]]
[iree_generated_trace_runner_test(
name = "e2e_matmul_mmt4d_%s_large" % lhs_rhs_type,
compiler_flags = [
"--iree-opt-data-tiling",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--shapes=large",
],
tags = [
# "--shapes=large" can cause timeouts on riscv emulator and sanitizers.
"noriscv",
"noasan",
"notsan",
],
target_backends_and_drivers = [
("llvm-cpu", "local-task"),
],
target_cpu_features_variants = ["default"] +
([
"arm_64:dotprod:+dotprod",
"arm_64:i8mm:+i8mm",
] if lhs_rhs_type == "i8" else []),
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
"i8",
"f32",
]]
# Test intrinsics. No need to run vmvx again, since it isn't affected by this
# codegen flag. No need to run "large" sizes, since this only differs from other
# tests in ways that are orthogonal to problem sizes.
[iree_generated_trace_runner_test(
name = "e2e_matmul_mmt4d_%s_intrinsics_%s" % (lhs_rhs_type, size),
compiler_flags = [
"--iree-codegen-mmt4d-use-intrinsics",
"--iree-opt-data-tiling",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--shapes=%s" % size,
],
target_backends_and_drivers = [
("llvm-cpu", "local-task"),
],
target_cpu_features_variants = ["default"] +
([
"arm_64:dotprod:+dotprod",
"arm_64:i8mm:+i8mm",
] if lhs_rhs_type == "i8" else []),
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
"i8",
"f32",
] for size in [
"small",
]]
# Test VMVX+ukernel, direct (not mmt4d)
[iree_generated_trace_runner_test(
name = "e2e_matmul_direct_%s_small_ukernel" % lhs_rhs_type,
compiler_flags = [
"--iree-vmvx-enable-microkernels",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--shapes=small",
],
target_backends_and_drivers = [
("vmvx", "local-task"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
"i8",
"f32",
]]
# Test VMVX+ukernel, mmt4d, with target CPU features variants relevant to each
# lhs_rhs_type.
[iree_generated_trace_runner_test(
name = "e2e_matmul_mmt4d_%s_small_vmvx_ukernel" % lhs_rhs_type,
compiler_flags = [
"--iree-vmvx-enable-microkernels",
"--iree-opt-data-tiling",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--shapes=small",
],
target_backends_and_drivers = [
("vmvx", "local-task"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
"i8",
"f32",
]]
X86_64_AVX2_FMA = [
"+avx",
"+avx2",
"+fma",
]
X86_64_AVX512_BASE = X86_64_AVX2_FMA + [
"+avx512f",
"+avx512vl",
"+avx512cd",
"+avx512bw",
"+avx512dq",
]
X86_64_AVX512_VNNI = X86_64_AVX512_BASE + [
"+avx512vnni",
]
# Test mmt4d with --iree-llvmcpu-enable-microkernels.
[iree_generated_trace_runner_test(
name = "e2e_matmul_mmt4d_%s_%s_ukernel" % (lhs_rhs_type, size),
compiler_flags = [
"--iree-llvmcpu-enable-microkernels",
"--iree-opt-data-tiling",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--shapes=%s" % size,
],
tags = [
# "--shapes=large" can cause timeouts on riscv emulator and sanitizers.
"noriscv",
"noasan",
"notsan",
] if size == "large" else [],
target_backends_and_drivers = [
("llvm-cpu", "local-task"),
],
target_cpu_features_variants = [
"default",
"x86_64:avx2_fma:" + ",".join(X86_64_AVX2_FMA),
"x86_64:avx512_base:" + ",".join(X86_64_AVX512_BASE),
] + ([
"x86_64:avx512_vnni:" + ",".join(X86_64_AVX512_VNNI),
"arm_64:dotprod:+dotprod",
"arm_64:i8mm:+i8mm",
] if lhs_rhs_type == "i8" else []),
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
"i8",
"f32",
] for size in [
"small",
"large",
]]
iree_generated_trace_runner_test(
name = "e2e_matmul_direct_f32_gpu_large_LLVMGPUMatmulSimt",
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f32",
"--shapes=gpu_large_aligned",
"--compilation_info=LLVMGPUMatmulSimt",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-nvidia",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
)
# Testing Ampere + TensorCore path.
# WMMA TensorCore(F32): wmma.161616.f32.tf32
iree_generated_trace_runner_test(
name = "e2e_matmul_direct_f32_gpu_large_LLVMGPUMatmulTensorCore",
compiler_flags = [
"--iree-hal-cuda-llvm-target-arch=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f32",
"--shapes=gpu_large_aligned",
"--compilation_info=LLVMGPUMatmulTensorCore",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-sm80",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
)
iree_generated_trace_runner_test(
name = "e2e_matmul_direct_f32_gpu_large_unaligned",
compiler_flags = [
"--iree-hal-cuda-llvm-target-arch=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f32",
"--shapes=gpu_large",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-sm80",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
)
iree_generated_trace_runner_test(
name = "e2e_matmul_direct_f16_gpu_large_unaligned",
compiler_flags = [
"--iree-hal-cuda-llvm-target-arch=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f16",
"--shapes=gpu_large",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-sm80",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
)
# MMA.SYNC TensorCore(F32): mma.sync.1688.f32.t32
iree_generated_trace_runner_test(
name = "e2e_matmul_direct_f32_gpu_large_mma_sync_LLVMGPUMatmulTensorCoreMmaSync",
compiler_flags = [
"--iree-hal-cuda-llvm-target-arch=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f32",
"--shapes=gpu_large_aligned",
"--compilation_info=LLVMGPUMatmulTensorCoreMmaSync",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-sm80",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
)
# WMMA TensorCore(F16): wmma.161616.f16.f16
iree_generated_trace_runner_test(
name = "e2e_matmul_direct_f16_gpu_large_LLVMGPUMatmulTensorCore",
compiler_flags = [
"--iree-hal-cuda-llvm-target-arch=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f16",
"--shapes=gpu_large_aligned",
"--compilation_info=LLVMGPUMatmulTensorCore",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-sm80",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
)
# MMA.SYNC TensorCore(F16): mma.sync.161616.f16.f16
iree_generated_trace_runner_test(
name = "e2e_matmul_direct_f16_gpu_large_mma_sync_LLVMGPUMatmulTensorCoreMmaSync",
compiler_flags = [
"--iree-hal-cuda-llvm-target-arch=sm_80",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f16",
"--shapes=gpu_large_aligned",
"--compilation_info=LLVMGPUMatmulTensorCoreMmaSync",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-sm80",
],
target_backends_and_drivers = [
("cuda", "cuda"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
)
[iree_generated_trace_runner_test(
name = "e2e_matmul_direct_%s_large_split_k" % lhs_rhs_type,
compiler_flags = [
"--iree-flow-split-matmul-reduction=4",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--shapes=large",
],
tags = [
# CUDA cuInit fails with sanitizer on.
"noasan",
"nomsan",
"notsan",
"noubsan",
"requires-gpu-nvidia",
# "--shapes=large" can cause timeouts on riscv emulator.
"noriscv",
],
target_backends_and_drivers = [
("cuda", "cuda"),
("llvm-cpu", "local-task"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
"f32",
]]
[iree_generated_trace_runner_test(
name = "e2e_matmul_direct_{0}_gpu_large_valhall".format(lhs_rhs_type),
compiler_flags = [
"--iree-vulkan-target-triple=valhall-unknown-android31",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--shapes=gpu_large_aligned",
"--compilation_info=SPIRVVectorizeMali",
],
tags = [
# Nvidia GPUs support a superset of Valhall features
"requires-gpu-nvidia",
"vulkan_uses_vk_khr_shader_float16_int8",
],
target_backends_and_drivers = [
("vulkan-spirv", "vulkan"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
"i8",
"f16",
"f32",
]]
[iree_generated_trace_runner_test(
name = "e2e_matmul_direct_{0}_gpu_large_ampere".format(lhs_rhs_type),
compiler_flags = [
"--iree-vulkan-target-triple=ampere-unknown-linux",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=%s" % lhs_rhs_type,
"--shapes=gpu_large_aligned",
"--compilation_info=SPIRVVectorizeNVIDIA",
],
tags = [
"requires-gpu-sm80",
"vulkan_uses_vk_khr_shader_float16_int8",
],
target_backends_and_drivers = [
("vulkan-spirv", "vulkan"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
) for lhs_rhs_type in [
"i8",
"f16",
"f32",
]]
iree_generated_trace_runner_test(
name = "e2e_matmul_direct_f32_small_no_padding",
compiler_flags = [
"--iree-codegen-enable-vector-padding=false",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f32",
"--shapes=small",
],
target_backends_and_drivers = [
("llvm-cpu", "local-task"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
)
iree_generated_trace_runner_test(
name = "e2e_matmul_direct_f16_gpu_large_rdna3",
compiler_flags = [
"--iree-vulkan-target-triple=rdna3-unknown-linux",
],
generator = ":generate_e2e_matmul_tests",
generator_args = [
"--lhs_rhs_type=f16",
"--shapes=gpu_large_aligned",
"--compilation_info=SPIRVCooperativeMatrixVectorize",
],
runner_args = [
"--require_exact_results=false",
],
tags = [
"requires-gpu",
"requires-gpu-rdna3",
"vulkan_uses_vk_khr_shader_float16_int8",
],
target_backends_and_drivers = [
("vulkan-spirv", "vulkan"),
],
trace_runner = "//tools:iree-e2e-matmul-test",
)