| # Copyright 2022 The IREE Authors |
| # |
| # Licensed under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| # End-to-end matrix multiplication tests. |
| |
| load("//build_tools/bazel:iree_e2e_generated_runner_test.bzl", "iree_generated_e2e_runner_test") |
| |
| package( |
| features = ["layering_check"], |
| licenses = ["notice"], # Apache 2.0 |
| ) |
| |
| py_binary( |
| name = "generate_e2e_matmul_tests", |
| srcs = ["generate_e2e_matmul_tests.py"], |
| ) |
| |
| ########################################################################### |
| ## |
| ## LLVMCPU backend |
| ## |
| ########################################################################### |
| |
| # LLVMCPU, non-data-tiling, no microkernels |
| [iree_generated_e2e_runner_test( |
| name = "e2e_matmul_cpu_nondt_%s_%s" % (lhs_rhs_type, acc_type), |
| compiler_flags = [ |
| "--iree-opt-data-tiling=false", |
| "--iree-llvmcpu-enable-ukernels=none", |
| "--iree-llvmcpu-enable-scalable-vectorization", |
| "--iree-llvmcpu-target-triple=aarch64-unknown-unknown", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=%s" % lhs_rhs_type, |
| "--acc_type=%s" % acc_type, |
| ], |
| tags = [ |
| # f16/bf16 trigger internal LLVM assertion errors on riscv and wasm. |
| "noriscv", |
| "nowasm", |
| ] if (lhs_rhs_type == "f16" or lhs_rhs_type == "bf16") else [], |
| target_backends_and_drivers = [ |
| ("llvm-cpu", "local-task"), |
| ], |
| target_cpu_features_variants = ["generic"] + |
| # Widening matmuls fail to lower for SVE. |
| (["arm_64:sve:+sve"] if lhs_rhs_type == acc_type else []), |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) for (lhs_rhs_type, acc_type) in [ |
| # ("i8", "i32"), # TODO(#15800): enable once compile time is reasonable |
| # ("f32", "f32"), # TODO(#15800): enable once compile time is reasonable |
| # ("f16", "f16"), # TODO(#15800): enable once compile time is reasonable |
| # ("f16", "f32"), # TODO(#15800): enable once compile time is reasonable |
| # TODO(#15258): enable bf16 tests when that bug is fixed. |
| # ("bf16", "bf16"), |
| # ("bf16", "f32"), |
| ]] |
| |
| PREPROCESSING_TRANSPOSE_LHS = "--iree-preprocessing-pass-pipeline=builtin.module\\(util.func\\(iree-preprocessing-transpose-matmul-pass{input=lhs}\\)\\)" |
| |
| PREPROCESSING_PEEL = "--iree-llvmcpu-vector-pproc-strategy=peel" |
| |
| # LLVMCPU, non-data-tiling, no microkernels, ArmSME |
| [iree_generated_e2e_runner_test( |
| name = "e2e_matmul_cpu_arm_sme_nondt_%s_%s%s" % ( |
| dtype, |
| "_transpose_lhs" if transpose_lhs else "", |
| "_peel" if peel else "", |
| ), |
| compiler_flags = [ |
| "--iree-opt-data-tiling=false", |
| "--iree-llvmcpu-enable-scalable-vectorization", |
| "--iree-llvmcpu-target-triple=aarch64-unknown-unknown", |
| ] + ([PREPROCESSING_TRANSPOSE_LHS] if transpose_lhs else []) + |
| ([PREPROCESSING_PEEL] if peel else []), |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=%s" % dtype, |
| "--acc_type=%s" % dtype, |
| ], |
| tags = [ |
| "requires-arm-sme", |
| ], |
| target_backends_and_drivers = [ |
| ("llvm-cpu", "local-task"), |
| ], |
| target_cpu_features_variants = ["arm_64:sme:+sve,+sme"], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) for dtype in [ |
| "f32", |
| # f64 disabled because it wasn't supported by the test generator at the time |
| # this was added. When adding it in the future, consider passing |
| # --iree-input-demote-f64-to-f32=false to the compiler. |
| # "f64" |
| ] for transpose_lhs in [ |
| True, |
| False, |
| ] for peel in [ |
| True, |
| False, |
| ]] |
| |
| X86_64_AVX2 = [ |
| "+avx", |
| "+avx2", |
| "+fma", |
| "+f16c", |
| ] |
| |
| X86_64_AVX512 = X86_64_AVX2 + [ |
| "+avx512f", |
| "+avx512vl", |
| "+avx512cd", |
| "+avx512bw", |
| "+avx512dq", |
| ] |
| |
| X86_64_AVX512_VNNI = X86_64_AVX512 + [ |
| "+avx512vnni", |
| ] |
| |
| X86_64_AVX512_BF16 = X86_64_AVX512 + [ |
| "+avx512bf16", |
| ] |
| |
| # LLVMCPU, data-tiling, data-tiling + ukernels. |
| [iree_generated_e2e_runner_test( |
| name = "e2e_matmul_cpu_dt%s_%s_%s" % ( |
| ("_uk" if use_uk else ""), |
| lhs_rhs_type, |
| acc_type, |
| ), |
| compiler_flags = [ |
| "--iree-opt-data-tiling", |
| ] + [ |
| "--iree-llvmcpu-enable-ukernels=%s" % ("all" if use_uk else "none"), |
| ] + (["--iree-input-demote-f64-to-f32=false"] if acc_type == "f64" else []), |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=%s" % lhs_rhs_type, |
| "--acc_type=%s" % acc_type, |
| ] + (["--shapes=small"] if acc_type == "f64" else []), |
| tags = ([ |
| # f16/bf16 trigger internal LLVM assertion errors on riscv and wasm. |
| "noriscv", |
| "nowasm", |
| ] if (lhs_rhs_type == "f16" or lhs_rhs_type == "bf16") else []), |
| target_backends_and_drivers = [ |
| ("llvm-cpu", "local-task"), |
| ], |
| target_cpu_features_variants = ["generic"] + |
| ([ |
| "arm_64:dotprod:+dotprod", |
| "arm_64:i8mm:+i8mm", |
| "x86_64:avx512vnni:" + ",".join(X86_64_AVX512_VNNI), |
| ] if lhs_rhs_type == "i8" and acc_type == "i32" else [ |
| "x86_64:avx2:" + ",".join(X86_64_AVX2), |
| "x86_64:avx512:" + ",".join(X86_64_AVX512), |
| ] if lhs_rhs_type == "f32" and acc_type == "f32" else [ |
| "x86_64:avx2:" + ",".join(X86_64_AVX2), |
| "x86_64:avx512:" + ",".join(X86_64_AVX512), |
| "arm_64:fullfp16:+fullfp16", |
| ] if lhs_rhs_type == "f16" and acc_type == "f16" else [ |
| "x86_64:avx2:" + ",".join(X86_64_AVX2), |
| "x86_64:avx512:" + ",".join(X86_64_AVX512), |
| "arm_64:fp16fml:+fp16fml", |
| ] if lhs_rhs_type == "f16" and acc_type == "f32" else [ |
| "x86_64:avx2:" + ",".join(X86_64_AVX2), |
| "x86_64:avx512:" + ",".join(X86_64_AVX512), |
| "x86_64:avx512bf16:" + ",".join(X86_64_AVX512_BF16), |
| "arm_64:bf16:+bf16", |
| ] if lhs_rhs_type == "bf16" and acc_type == "bf16" else [ |
| "x86_64:avx2:" + ",".join(X86_64_AVX2), |
| "x86_64:avx512:" + ",".join(X86_64_AVX512), |
| "x86_64:avx512bf16:" + ",".join(X86_64_AVX512_BF16), |
| "arm_64:bf16:+bf16", |
| ] if lhs_rhs_type == "bf16" and acc_type == "f32" else []), |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) for use_uk in [ |
| False, |
| True, |
| ] for (lhs_rhs_type, acc_type) in ( |
| [ |
| ("i8", "i32"), |
| ("f32", "f32"), |
| ("f64", "f64"), |
| ("f16", "f16"), |
| ("f16", "f32"), |
| ("bf16", "bf16"), |
| ("bf16", "f32"), |
| ] |
| )] |
| |
| # LLVMCPU, data-tiling, data-tiling + ukernels + late materialization. |
| [iree_generated_e2e_runner_test( |
| name = "e2e_matmul_cpu_experimental_dt%s_%s_%s" % ( |
| ("_uk" if use_uk else ""), |
| lhs_rhs_type, |
| acc_type, |
| ), |
| compiler_flags = [ |
| "--iree-opt-data-tiling", |
| "--iree-global-opt-enable-early-materialization=false", |
| ] + ["--iree-llvmcpu-enable-ukernels=%s" % ("all" if use_uk else "none")], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=%s" % lhs_rhs_type, |
| "--acc_type=%s" % acc_type, |
| ], |
| tags = ([ |
| # f16/bf16 trigger internal LLVM assertion errors on riscv and wasm. |
| "noriscv", |
| "nowasm", |
| ] if (lhs_rhs_type == "f16" or lhs_rhs_type == "bf16") else []), |
| target_backends_and_drivers = [ |
| ("llvm-cpu", "local-task"), |
| ], |
| target_cpu_features_variants = ["generic"] + |
| ([ |
| "arm_64:dotprod:+dotprod", |
| "arm_64:i8mm:+i8mm", |
| "x86_64:avx512vnni:" + ",".join(X86_64_AVX512_VNNI), |
| ] if lhs_rhs_type == "i8" and acc_type == "i32" else [ |
| "x86_64:avx2:" + ",".join(X86_64_AVX2), |
| "x86_64:avx512:" + ",".join(X86_64_AVX512), |
| ] if lhs_rhs_type == "f32" and acc_type == "f32" else [ |
| "x86_64:avx2:" + ",".join(X86_64_AVX2), |
| "x86_64:avx512:" + ",".join(X86_64_AVX512), |
| "arm_64:fullfp16:+fullfp16", |
| ] if lhs_rhs_type == "f16" and acc_type == "f16" else [ |
| "x86_64:avx2:" + ",".join(X86_64_AVX2), |
| "x86_64:avx512:" + ",".join(X86_64_AVX512), |
| "arm_64:fp16fml:+fp16fml", |
| ] if lhs_rhs_type == "f16" and acc_type == "f32" else [ |
| "x86_64:avx2:" + ",".join(X86_64_AVX2), |
| "x86_64:avx512:" + ",".join(X86_64_AVX512), |
| "x86_64:avx512bf16:" + ",".join(X86_64_AVX512_BF16), |
| "arm_64:bf16:+bf16", |
| ] if lhs_rhs_type == "bf16" and acc_type == "bf16" else [ |
| "x86_64:avx2:" + ",".join(X86_64_AVX2), |
| "x86_64:avx512:" + ",".join(X86_64_AVX512), |
| "x86_64:avx512bf16:" + ",".join(X86_64_AVX512_BF16), |
| "arm_64:bf16:+bf16", |
| ] if lhs_rhs_type == "bf16" and acc_type == "f32" else []), |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) for use_uk in [ |
| False, |
| True, |
| ] for (lhs_rhs_type, acc_type) in ( |
| [ |
| ("i8", "i32"), |
| ("f32", "f32"), |
| ("f16", "f16"), |
| ("f16", "f32"), |
| ("bf16", "bf16"), |
| ("bf16", "f32"), |
| ] |
| )] |
| |
| [iree_generated_e2e_runner_test( |
| name = "e2e_matmul_vmvx_experimental_dt%s_%s_%s" % ( |
| ("_uk" if use_uk else ""), |
| lhs_rhs_type, |
| acc_type, |
| ), |
| compiler_flags = [ |
| "--iree-opt-data-tiling", |
| "--iree-global-opt-enable-early-materialization=false", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=%s" % lhs_rhs_type, |
| "--acc_type=%s" % acc_type, |
| "--shapes=small", |
| ], |
| tags = [], |
| target_backends_and_drivers = [ |
| ("vmvx", "local-task"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) for use_uk in [ |
| False, |
| True, |
| ] for (lhs_rhs_type, acc_type) in ( |
| [ |
| ("f32", "f32"), |
| ] |
| )] |
| |
| [iree_generated_e2e_runner_test( |
| name = "e2e_matmul_cuda_experimental_dt_%s_%s" % ( |
| lhs_rhs_type, |
| acc_type, |
| ), |
| compiler_flags = [ |
| "--iree-opt-data-tiling", |
| "--iree-global-opt-enable-early-materialization=false", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=%s" % lhs_rhs_type, |
| "--acc_type=%s" % acc_type, |
| "--shapes=small", |
| ], |
| tags = [], |
| target_backends_and_drivers = [ |
| ("cuda", "cuda"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) for (lhs_rhs_type, acc_type) in ( |
| [ |
| ("f32", "f32"), |
| ] |
| )] |
| |
| [iree_generated_e2e_runner_test( |
| name = "e2e_matmul_spirv_experimental_dt_%s_%s" % ( |
| lhs_rhs_type, |
| acc_type, |
| ), |
| compiler_flags = [ |
| "--iree-opt-data-tiling", |
| "--iree-global-opt-enable-early-materialization=false", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=%s" % lhs_rhs_type, |
| "--acc_type=%s" % acc_type, |
| "--shapes=small", |
| ], |
| tags = [], |
| target_backends_and_drivers = [ |
| ("vulkan-spirv", "vulkan"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) for (lhs_rhs_type, acc_type) in ( |
| [ |
| ("f32", "f32"), |
| ] |
| )] |
| |
| ########################################################################### |
| ## |
| ## VMVX backend |
| ## |
| ########################################################################### |
| |
| # VMVX, data-tiling + microkernels. |
| [iree_generated_e2e_runner_test( |
| name = "e2e_matmul_vmvx_dt_uk_%s_small" % lhs_rhs_type, |
| compiler_flags = [ |
| "--iree-vmvx-enable-microkernels", |
| "--iree-opt-data-tiling", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=%s" % lhs_rhs_type, |
| "--acc_type=%s" % acc_type, |
| "--shapes=small", |
| ], |
| target_backends_and_drivers = [ |
| ("vmvx", "local-task"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) for (lhs_rhs_type, acc_type) in [ |
| ("i8", "i32"), |
| ("f32", "f32"), |
| ]] |
| |
| ########################################################################### |
| ## |
| ## CUDA backend |
| ## |
| ########################################################################### |
| |
| iree_generated_e2e_runner_test( |
| name = "e2e_matmul_cuda_f32_large_simt", |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=f32", |
| "--acc_type=f32", |
| "--shapes=easy_large_static", |
| "--compilation_info=LLVMGPUMatmulSimt", |
| ], |
| tags = [ |
| # CUDA cuInit fails with sanitizer on. |
| "noasan", |
| "nomsan", |
| "notsan", |
| "noubsan", |
| "requires-gpu-nvidia", |
| ], |
| target_backends_and_drivers = [ |
| ("cuda", "cuda"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) |
| |
| # Testing Ampere + TensorCore path. |
| # WMMA TensorCore(F32): wmma.161616.f32.tf32 |
| iree_generated_e2e_runner_test( |
| name = "e2e_matmul_cuda_f32_large_tensorcore", |
| compiler_flags = [ |
| "--iree-cuda-target=sm_80", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=f32", |
| "--acc_type=f32", |
| "--shapes=easy_large_static", |
| "--compilation_info=LLVMGPUMatmulTensorCore", |
| ], |
| tags = [ |
| # CUDA cuInit fails with sanitizer on. |
| "noasan", |
| "nomsan", |
| "notsan", |
| "noubsan", |
| "requires-gpu-sm80", |
| ], |
| target_backends_and_drivers = [ |
| ("cuda", "cuda"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) |
| |
| iree_generated_e2e_runner_test( |
| name = "e2e_matmul_cuda_f32", |
| compiler_flags = [ |
| "--iree-cuda-target=sm_80", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=f32", |
| "--acc_type=f32", |
| ], |
| tags = [ |
| # CUDA cuInit fails with sanitizer on. |
| "noasan", |
| "nomsan", |
| "notsan", |
| "noubsan", |
| "requires-gpu-sm80", |
| ], |
| target_backends_and_drivers = [ |
| ("cuda", "cuda"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) |
| |
| iree_generated_e2e_runner_test( |
| name = "e2e_matmul_cuda_f16", |
| compiler_flags = [ |
| "--iree-cuda-target=sm_80", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=f16", |
| "--acc_type=f32", |
| ], |
| tags = [ |
| # CUDA cuInit fails with sanitizer on. |
| "noasan", |
| "nomsan", |
| "notsan", |
| "noubsan", |
| "requires-gpu-sm80", |
| ], |
| target_backends_and_drivers = [ |
| ("cuda", "cuda"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) |
| |
| # MMA.SYNC TensorCore(F32): mma.sync.1688.f32.t32 |
| iree_generated_e2e_runner_test( |
| name = "e2e_matmul_cuda_f32_large_mma_sync", |
| compiler_flags = [ |
| "--iree-cuda-target=sm_80", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=f32", |
| "--acc_type=f32", |
| "--shapes=easy_large_static", |
| "--compilation_info=LLVMGPUMatmulTensorCoreMmaSync", |
| ], |
| tags = [ |
| # CUDA cuInit fails with sanitizer on. |
| "noasan", |
| "nomsan", |
| "notsan", |
| "noubsan", |
| "requires-gpu-sm80", |
| ], |
| target_backends_and_drivers = [ |
| ("cuda", "cuda"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) |
| |
| # WMMA TensorCore(F16): wmma.161616.f16.f16 |
| iree_generated_e2e_runner_test( |
| name = "e2e_matmul_cuda_f16_large_tensorcore", |
| compiler_flags = [ |
| "--iree-cuda-target=sm_80", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=f16", |
| "--acc_type=f32", |
| "--shapes=easy_large_static", |
| "--compilation_info=LLVMGPUMatmulTensorCore", |
| ], |
| tags = [ |
| # CUDA cuInit fails with sanitizer on. |
| "noasan", |
| "nomsan", |
| "notsan", |
| "noubsan", |
| "requires-gpu-sm80", |
| ], |
| target_backends_and_drivers = [ |
| ("cuda", "cuda"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) |
| |
| # MMA.SYNC TensorCore(F16): mma.sync.161616.f16.f16 |
| iree_generated_e2e_runner_test( |
| name = "e2e_matmul_cuda_f16_large_mma_sync", |
| compiler_flags = [ |
| "--iree-cuda-target=sm_80", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=f16", |
| "--acc_type=f32", |
| "--shapes=easy_large_static", |
| "--compilation_info=LLVMGPUMatmulTensorCoreMmaSync", |
| ], |
| tags = [ |
| # CUDA cuInit fails with sanitizer on. |
| "noasan", |
| "nomsan", |
| "notsan", |
| "noubsan", |
| "requires-gpu-sm80", |
| ], |
| target_backends_and_drivers = [ |
| ("cuda", "cuda"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) |
| |
| [iree_generated_e2e_runner_test( |
| name = "e2e_matmul_cuda_%s_splitk" % lhs_rhs_type, |
| compiler_flags = [ |
| "--iree-dispatch-creation-split-matmul-reduction=4", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=%s" % lhs_rhs_type, |
| "--acc_type=%s" % acc_type, |
| ], |
| tags = [ |
| # CUDA cuInit fails with sanitizer on. |
| "noasan", |
| "nomsan", |
| "notsan", |
| "noubsan", |
| "requires-gpu-nvidia", |
| ], |
| target_backends_and_drivers = [ |
| ("cuda", "cuda"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) for (lhs_rhs_type, acc_type) in [ |
| ("f32", "f32"), |
| ]] |
| |
| ########################################################################### |
| ## |
| ## Vulkan backend |
| ## |
| ########################################################################### |
| |
| [iree_generated_e2e_runner_test( |
| name = "e2e_matmul_vulkan_{0}_large_valhall".format(lhs_rhs_type), |
| compiler_flags = [ |
| "--iree-vulkan-target=valhall", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=%s" % lhs_rhs_type, |
| "--acc_type=%s" % acc_type, |
| "--shapes=easy_large_static", |
| "--compilation_info=SPIRVVectorizeMali", |
| ], |
| tags = [ |
| # Nvidia GPUs support a superset of Valhall features |
| "requires-gpu-nvidia", |
| "vulkan_uses_vk_khr_shader_float16_int8", |
| ], |
| target_backends_and_drivers = [ |
| ("vulkan-spirv", "vulkan"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) for (lhs_rhs_type, acc_type) in [ |
| ("i8", "i32"), |
| ("f16", "f32"), |
| ("f32", "f32"), |
| ]] |
| |
| [iree_generated_e2e_runner_test( |
| name = "e2e_matmul_vulkan_{0}_large_ampere".format(lhs_rhs_type), |
| compiler_flags = [ |
| "--iree-vulkan-target=ampere", |
| ], |
| generator = ":generate_e2e_matmul_tests", |
| generator_args = [ |
| "--lhs_rhs_type=%s" % lhs_rhs_type, |
| "--acc_type=%s" % acc_type, |
| "--shapes=easy_large_static", |
| "--compilation_info=SPIRVVectorizeNVIDIA", |
| ], |
| tags = [ |
| "requires-gpu-sm80", |
| "vulkan_uses_vk_khr_shader_float16_int8", |
| ], |
| target_backends_and_drivers = [ |
| ("vulkan-spirv", "vulkan"), |
| ], |
| test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", |
| test_type = "matmul", |
| ) for (lhs_rhs_type, acc_type) in [ |
| ("i8", "i32"), |
| ("f16", "f32"), |
| ("f32", "f32"), |
| ]] |
| |
| # TODO(#19465): add large matmul tests for rdna3. |