| ## Copyright 2022 The IREE Authors |
| # |
| # Licensed under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| """Defines IREE CUDA benchmarks.""" |
| |
| from typing import List, Tuple, Sequence |
| from benchmark_suites.iree import module_execution_configs |
| from e2e_test_framework import unique_ids |
| from e2e_test_framework.definitions import common_definitions, iree_definitions |
| from e2e_test_framework.device_specs import device_collections |
| from e2e_test_framework.models import model_groups |
| import benchmark_suites.iree.utils |
| |
| |
| class Linux_CUDA_Benchmarks(object): |
| """Benchmarks on CUDA Linux devices.""" |
| |
| SM_80_GPU_TARGET = iree_definitions.CompileTarget( |
| target_architecture=common_definitions.DeviceArchitecture.CUDA_SM80, |
| target_backend=iree_definitions.TargetBackend.CUDA, |
| target_abi=iree_definitions.TargetABI.LINUX_GNU) |
| SM_80_COMPILE_CONFIG = iree_definitions.CompileConfig.build( |
| id=unique_ids.IREE_COMPILE_CONFIG_LINUX_CUDA_SM80_DEFAULTS, |
| tags=["default-flags"], |
| compile_targets=[SM_80_GPU_TARGET]) |
| SM_80_UBENCH_MATMUL_COMPILE_CONFIG = iree_definitions.CompileConfig.build( |
| id=unique_ids.IREE_COMPILE_CONFIG_LINUX_CUDA_SM80_MATMUL_UBENCH, |
| tags=["ukernel", "matmul"], |
| compile_targets=[SM_80_GPU_TARGET], |
| extra_flags=["--iree-hal-benchmark-dispatch-repeat-count=100"]) |
| SM_80_UBENCH_MATMUL_SPLITK_COMPILE_CONFIG = iree_definitions.CompileConfig.build( |
| id=unique_ids.IREE_COMPILE_CONFIG_LINUX_CUDA_SM80_MATMUL_SPLITK_UBENCH, |
| tags=["ukernel", "matmul", "splitk"], |
| compile_targets=[SM_80_GPU_TARGET], |
| extra_flags=[ |
| "--iree-hal-benchmark-dispatch-repeat-count=100", |
| "--iree-flow-split-matmul-reduction=4", |
| "--iree-codegen-llvmgpu-use-wmma" |
| ]) |
| |
| def _generate_configs( |
| self, |
| models: Sequence[common_definitions.Model], |
| compile_config: iree_definitions.CompileConfig, |
| execution_config: iree_definitions. |
| ModuleExecutionConfig = module_execution_configs.CUDA_CONFIG, |
| run_tags: Sequence[str] = [], |
| ) -> Tuple[List[iree_definitions.ModuleGenerationConfig], |
| List[iree_definitions.E2EModelRunConfig]]: |
| gen_configs = [ |
| iree_definitions.ModuleGenerationConfig.build( |
| compile_config=compile_config, |
| imported_model=iree_definitions.ImportedModel.from_model(model)) |
| for model in models |
| ] |
| sm80_devices = device_collections.DEFAULT_DEVICE_COLLECTION.query_device_specs( |
| architecture=common_definitions.DeviceArchitecture.NVIDIA_AMPERE, |
| host_environment=common_definitions.HostEnvironment.LINUX_X86_64) |
| run_module_configs = benchmark_suites.iree.utils.generate_e2e_model_run_configs( |
| module_generation_configs=gen_configs, |
| module_execution_configs=[execution_config], |
| device_specs=sm80_devices, |
| tags=run_tags) |
| |
| return (gen_configs, run_module_configs) |
| |
| def generate( |
| self |
| ) -> Tuple[List[iree_definitions.ModuleGenerationConfig], |
| List[iree_definitions.E2EModelRunConfig]]: |
| """Generates IREE compile and run configs.""" |
| gen_configs, run_configs = self._generate_configs(model_groups.CUDA_MODELS, |
| self.SM_80_COMPILE_CONFIG) |
| # The `cuda` tag is required to put them into the CUDA benchmark preset. |
| ubench_gen_configs, ubench_run_configs = self._generate_configs( |
| model_groups.MICRO_MATMUL, |
| self.SM_80_UBENCH_MATMUL_COMPILE_CONFIG, |
| execution_config=module_execution_configs.CUDA_BATCH_SIZE_100_CONFIG, |
| run_tags=["cuda"]) |
| ubench_splitk_gen_configs, ubench_splitk_run_configs = self._generate_configs( |
| model_groups.MICRO_MATMUL_SPLITK, |
| self.SM_80_UBENCH_MATMUL_SPLITK_COMPILE_CONFIG, |
| execution_config=module_execution_configs.CUDA_BATCH_SIZE_100_CONFIG, |
| run_tags=["cuda"]) |
| long_running_gen_configs, long_running_module_configs = self._generate_configs( |
| model_groups.CUDA_MODELS_LONG, |
| self.SM_80_COMPILE_CONFIG, |
| run_tags=["cuda", "long-running"]) |
| return (gen_configs + ubench_gen_configs + ubench_splitk_gen_configs + |
| long_running_gen_configs, run_configs + ubench_run_configs + |
| ubench_splitk_run_configs + long_running_module_configs) |