blob: 256930c79f1f1d0c2eec8dbac343d3f1d7649359 [file] [log] [blame]
## Copyright 2022 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""Defines IREE CUDA benchmarks."""
from typing import List, Sequence
from benchmark_suites.iree import benchmark_presets, module_execution_configs, utils
from e2e_test_framework import unique_ids
from e2e_test_framework.definitions import common_definitions, iree_definitions
from e2e_test_framework.device_specs import device_collections
from e2e_test_framework.models import model_groups
class Linux_CUDA_Benchmarks(object):
"""Benchmarks on CUDA Linux devices."""
SM_80_GPU_TARGET = iree_definitions.CompileTarget(
target_architecture=common_definitions.DeviceArchitecture.CUDA_SM80,
target_backend=iree_definitions.TargetBackend.CUDA,
target_abi=iree_definitions.TargetABI.LINUX_GNU,
)
SM_80_COMPILE_CONFIG = iree_definitions.CompileConfig.build(
id=unique_ids.IREE_COMPILE_CONFIG_LINUX_CUDA_SM80_DEFAULTS,
tags=["default-flags"],
compile_targets=[SM_80_GPU_TARGET],
)
SM_80_UBENCH_MATMUL_COMPILE_CONFIG = iree_definitions.CompileConfig.build(
id=unique_ids.IREE_COMPILE_CONFIG_LINUX_CUDA_SM80_MATMUL_UBENCH,
tags=["ukernel", "matmul"],
compile_targets=[SM_80_GPU_TARGET],
extra_flags=["--iree-hal-benchmark-dispatch-repeat-count=100"],
)
SM_80_UBENCH_MATMUL_SPLITK_COMPILE_CONFIG = iree_definitions.CompileConfig.build(
id=unique_ids.IREE_COMPILE_CONFIG_LINUX_CUDA_SM80_MATMUL_SPLITK_UBENCH,
tags=["ukernel", "matmul", "splitk"],
compile_targets=[SM_80_GPU_TARGET],
extra_flags=[
"--iree-hal-benchmark-dispatch-repeat-count=100",
"--iree-flow-split-matmul-reduction=4",
"--iree-codegen-llvmgpu-use-wmma",
],
)
def _generate_configs(
self,
models: Sequence[common_definitions.Model],
compile_config: iree_definitions.CompileConfig,
execution_config: iree_definitions.ModuleExecutionConfig,
presets: Sequence[str],
) -> List[iree_definitions.E2EModelRunConfig]:
gen_configs = [
iree_definitions.ModuleGenerationConfig.build(
compile_config=compile_config,
imported_model=iree_definitions.ImportedModel.from_model(model),
)
for model in models
]
sm80_devices = device_collections.DEFAULT_DEVICE_COLLECTION.query_device_specs(
architecture=common_definitions.DeviceArchitecture.NVIDIA_AMPERE,
host_environment=common_definitions.HostEnvironment.LINUX_X86_64,
)
run_module_configs = utils.generate_e2e_model_run_configs(
module_generation_configs=gen_configs,
module_execution_configs=[execution_config],
device_specs=sm80_devices,
presets=presets,
)
return run_module_configs
def generate(
self,
) -> List[iree_definitions.E2EModelRunConfig]:
"""Generates IREE compile and run configs."""
# The CUDA tag is required to put them into the CUDA benchmark preset.
run_configs = self._generate_configs(
model_groups.CUDA_MODELS,
self.SM_80_COMPILE_CONFIG,
execution_config=module_execution_configs.CUDA_CONFIG,
presets=[benchmark_presets.CUDA],
)
ubench_run_configs = self._generate_configs(
model_groups.MICRO_MATMUL,
self.SM_80_UBENCH_MATMUL_COMPILE_CONFIG,
execution_config=module_execution_configs.CUDA_BATCH_SIZE_100_CONFIG,
presets=[benchmark_presets.CUDA],
)
ubench_splitk_run_configs = self._generate_configs(
model_groups.MICRO_MATMUL_SPLITK,
self.SM_80_UBENCH_MATMUL_SPLITK_COMPILE_CONFIG,
execution_config=module_execution_configs.CUDA_BATCH_SIZE_100_CONFIG,
presets=[benchmark_presets.CUDA],
)
large_module_configs = self._generate_configs(
model_groups.CUDA_MODELS_LONG,
self.SM_80_COMPILE_CONFIG,
execution_config=module_execution_configs.CUDA_CONFIG,
presets=[benchmark_presets.CUDA_LARGE],
)
return (
run_configs
+ ubench_run_configs
+ ubench_splitk_run_configs
+ large_module_configs
)