blob: be90f40ab3ff255e734bdd4889e2fa6aaedac754 [file] [log] [blame]
## Copyright 2022 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""Defines IREE Mali GPU benchmarks."""
from typing import List, Sequence
from benchmark_suites.iree import benchmark_presets, module_execution_configs, utils
from e2e_test_framework import unique_ids
from e2e_test_framework.definitions import common_definitions, iree_definitions
from e2e_test_framework.models import tflite_models, tf_models
from e2e_test_framework.device_specs import device_collections
class Android_Mali_Benchmarks(object):
"""Benchmarks on Android devices with Mali GPU."""
ARM_VALHALL_GPU_TARGET = iree_definitions.CompileTarget(
target_backend=iree_definitions.TargetBackend.VULKAN_SPIRV,
target_architecture=common_definitions.DeviceArchitecture.ARM_VALHALL,
target_abi=iree_definitions.TargetABI.VULKAN_ANDROID31,
)
DEFAULT_COMPILE_CONFIG = iree_definitions.CompileConfig.build(
id=unique_ids.IREE_COMPILE_CONFIG_ANDROID_ARM_VALHALL_DEFAULTS,
tags=["default-flags"],
compile_targets=[ARM_VALHALL_GPU_TARGET],
)
EXPERIMENTAL_COMPILE_CONFIG = iree_definitions.CompileConfig.build(
id=unique_ids.IREE_COMPILE_CONFIG_ANDROID_ARM_VALHALL_EXPERIMENTAL,
tags=["experimental-flags", "fuse-padding", "max-concurrency"],
compile_targets=[ARM_VALHALL_GPU_TARGET],
extra_flags=[
"--iree-flow-enable-fuse-padding-into-linalg-consumer-ops",
"--iree-stream-partitioning-favor=max-concurrency",
],
)
# Kernel execution
# Note that for kernel-execution benchmarks batch_size/repeat-count need to be
# low enough that the whole dispatch completes within an OS-specific timeout.
# Otherwise you'll get error like:
# ```
# INTERNAL; VK_ERROR_DEVICE_LOST; vkQueueSubmit; while invoking native function
# hal.fence.await; while calling import;
# ```
EXPERIMENTAL_REPEATED_KERNEL_COMPILE_CONFIG = iree_definitions.CompileConfig.build(
id=unique_ids.IREE_COMPILE_CONFIG_ANDROID_ARM_VALHALL_EXPERIMENTAL_REPEATED_KERNEL,
tags=[
"experimental-flags",
"fuse-padding",
"max-concurrency",
"repeated-kernel",
],
compile_targets=[ARM_VALHALL_GPU_TARGET],
extra_flags=EXPERIMENTAL_COMPILE_CONFIG.extra_flags
+ ["--iree-hal-benchmark-dispatch-repeat-count=32"],
)
EXPERIMENTAL_REPEATED_KERNEL_RUN_FLAGS = ["--batch_size=32"]
FP32_MODELS = [
tflite_models.MOBILEBERT_FP32,
]
FP16_MODELS = [tflite_models.MOBILEBERT_FP16]
QUANT_MODELS = [
tflite_models.MOBILEBERT_INT8,
]
def generate(
self,
) -> List[iree_definitions.E2EModelRunConfig]:
default_gen_configs = self._get_module_generation_configs(
compile_config=self.DEFAULT_COMPILE_CONFIG,
fp32_models=self.FP32_MODELS,
fp16_models=self.FP16_MODELS,
quant_models=self.QUANT_MODELS,
)
experimental_gen_configs = self._get_module_generation_configs(
compile_config=self.EXPERIMENTAL_COMPILE_CONFIG,
fp32_models=self.FP32_MODELS,
fp16_models=self.FP16_MODELS,
quant_models=self.QUANT_MODELS,
)
experimental_repeated_kernel_gen_configs = self._get_module_generation_configs(
compile_config=self.EXPERIMENTAL_REPEATED_KERNEL_COMPILE_CONFIG,
fp32_models=self.FP32_MODELS,
fp16_models=self.FP16_MODELS,
quant_models=self.QUANT_MODELS,
)
mali_devices = device_collections.DEFAULT_DEVICE_COLLECTION.query_device_specs(
architecture=common_definitions.DeviceArchitecture.ARM_VALHALL,
host_environment=common_definitions.HostEnvironment.ANDROID_ARMV8_2_A,
)
run_configs = utils.generate_e2e_model_run_configs(
module_generation_configs=default_gen_configs + experimental_gen_configs,
module_execution_configs=[module_execution_configs.VULKAN_CONFIG],
device_specs=mali_devices,
presets=[benchmark_presets.ANDROID_GPU],
)
run_configs += utils.generate_e2e_model_run_configs(
module_generation_configs=experimental_repeated_kernel_gen_configs,
module_execution_configs=[
module_execution_configs.VULKAN_BATCH_SIZE_32_CONFIG
],
device_specs=mali_devices,
presets=[benchmark_presets.ANDROID_GPU],
)
return run_configs
def _get_module_generation_configs(
self,
compile_config: iree_definitions.CompileConfig,
fp32_models: Sequence[common_definitions.Model],
fp16_models: Sequence[common_definitions.Model],
quant_models: Sequence[common_definitions.Model],
) -> List[iree_definitions.ModuleGenerationConfig]:
demote_compile_config = iree_definitions.CompileConfig.build(
id=compile_config.id + "-demote-f32-to-16",
tags=compile_config.tags + ["demote-f32-to-f16"],
compile_targets=compile_config.compile_targets,
extra_flags=compile_config.extra_flags + ["--iree-opt-demote-f32-to-f16"],
)
return (
[
iree_definitions.ModuleGenerationConfig.build(
compile_config=compile_config,
imported_model=iree_definitions.ImportedModel.from_model(model),
)
for model in fp32_models
]
+ [
iree_definitions.ModuleGenerationConfig.build(
compile_config=demote_compile_config,
imported_model=iree_definitions.ImportedModel.from_model(model),
)
for model in fp16_models
]
+ [
iree_definitions.ModuleGenerationConfig.build(
compile_config=compile_config,
imported_model=iree_definitions.ImportedModel.from_model(model),
)
for model in quant_models
]
)