blob: 1ea4931758a60b4b57408f49b9b5d94246eea06e [file] [log] [blame]
# Copyright 2023 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
from library import *
from matmul import ReferenceMatmulOp
from batch_matmul import ReferenceBatchMatmulOp
from pathlib import Path
import subprocess
class IreeToolsLauncher:
"""Launcher for IREE tools."""
def __init__(self, args, operation):
self.operation = operation
self.generated_path = Path(args.generated_dir, 'generated',
args.mlir_dialect)
self.args = args
self.benchmark_dispatch_repeat_count = args.batch_size
self.batch_size = args.batch_size
# paths to source dispatch mlir, compiled vmfb, and logs.
self.operation_path = self.generated_path.joinpath(
OperationKindNames[operation.operation_kind], operation.name())
self.source_mlir_file = self.operation_path.joinpath(
operation.name()).with_suffix(".mlir")
# path to cached numpy refernece input and expected output files.
self.op_reference_cache_path = Path(args.generated_dir, 'generated',
'reference_cache', operation.name())
if not self.op_reference_cache_path.exists():
self.op_reference_cache_path.mkdir(parents=True, exist_ok=True)
# path to iree-compile tool. (for compiling the input mlir file to vmfb)
self.iree_compile_path = Path(args.iree_bin_dir, 'iree-compile')
# path to iree-benchmark-module tool. (for performance benchmarking and profiling)
self.iree_benchmark_module_path = Path(args.iree_bin_dir,
'iree-benchmark-module')
# path to iree-run-module tool. (for verification)
self.iree_run_module_path = Path(args.iree_bin_dir, 'iree-run-module')
# output vmfb files for verification and profiling.
vmfb_filename = f"{operation.name()}"
if operation.operation_kind == OperationKind.SplitkMatmul:
split_k_suffix = "_".join(
['split_k_slice', str(operation.split_k_slices)])
vmfb_filename = f"{vmfb_filename}_{split_k_suffix}"
self.vmfb_verify_filepath = self.operation_path.joinpath(
self.operation.name()).with_name(f"{vmfb_filename}_verify.vmfb")
self.vmfb_profile_filepath = self.operation_path.joinpath(
self.operation.name()).with_name(f"{vmfb_filename}_profile.vmfb")
# reference implementation for the operation_kind.
self.reference_impl_map = {
OperationKind.Matmul: ReferenceMatmulOp,
OperationKind.SplitkMatmul: ReferenceMatmulOp,
OperationKind.BatchMatmul: ReferenceBatchMatmulOp,
}
def iree_compile(self, compilation_mode):
"""Compiles the input mlir file to vmfb file."""
benchmark_dispatch_repeat_count = self.benchmark_dispatch_repeat_count if compilation_mode == CompilationMode.Profile else 1
vmfb_filepath = self.vmfb_profile_filepath if compilation_mode == CompilationMode.Profile else self.vmfb_verify_filepath
# Base iree-compile commandline
cmd = [
f'{self.iree_compile_path}',
f'{self.source_mlir_file}',
"-o",
f'{vmfb_filepath}',
]
# General compilation options
cmd += [f"--iree-hal-target-backends={self.args.device}"]
if self.args.device == "cuda":
cmd += [f"--iree-hal-cuda-llvm-target-arch={self.args.cuda_arch}"]
if self.operation.operation_kind == OperationKind.SplitkMatmul:
cmd += [
f"--iree-flow-split-matmul-reduction={self.operation.split_k_slices}"
]
if self.args.use_mma_sync:
cmd += [f"--iree-codegen-llvmgpu-use-mma-sync"]
if self.args.use_wmma:
cmd += [f"--iree-codegen-llvmgpu-use-wmma"]
# Compilation options for profiling
cmd += [
f"--iree-hal-benchmark-dispatch-repeat-count={benchmark_dispatch_repeat_count}"
]
# Appends print ir options at the end of the command line.
if self.args.mlir_print_ir_after_all:
cmd += [f"--mlir-print-ir-after-all"]
if not vmfb_filepath.exists() or self.args.force_compile:
complie_mode_str = CompilationModeNames[compilation_mode]
print(f"[Compiling ({complie_mode_str})] {' '.join(cmd)}")
iree_compile_stdout_filepath = self.operation_path.joinpath(
'iree_compile_cmd_stdout.mlir')
with open(iree_compile_stdout_filepath, "w") as fp:
subprocess.run(cmd, stderr=fp)
elif self.args.verbose:
print(
f"Skipping compilation of operation: {vmfb_filepath} since it already exists."
)
def verify(self, configuration):
"""Verifies the operation with a given configuration."""
# First compile the operation to a vmfb file.
self.iree_compile(CompilationMode.Verify)
# Verify using random data distribution.
reference_run = self.reference_impl_map[self.operation.operation_kind](
self.operation, self.op_reference_cache_path, Distribution.Random,
Distribution.Random)
if not reference_run.is_cached():
reference_run()
# Commandline `iree-run-module` for verification.
cmd = [
f'{self.iree_run_module_path}', f'--module={self.vmfb_verify_filepath}',
f'--device={self.args.device}'
]
# Operation-specific verification command-line.
cmd.append(f'--function={self.operation.name()}_{configuration.name()}')
for input_file_path in reference_run.get_input_filepaths():
cmd.append(f'--input=@{input_file_path}')
for output_file_path in reference_run.get_output_filepaths():
cmd.append(f'--expected_output=@{output_file_path}')
# Print the command if verbose.
if self.args.verbose:
print(f"[Verification] {' '.join(cmd)}")
# Launch verification.
cmd_output = subprocess.check_output(cmd, text=True)
# Save the verification command and the output, only if requested
# (file writing could slow down the verification).
if self.args.save_cmds:
filepath = self.operation_path.joinpath("iree_run_module.stdout")
with open(filepath, "w") as fp:
fp.write(f"[Command] $ {' '.join(cmd)}\n")
fp.write(cmd_output)
# Parse the verification output.
m = re.search(r"\[(?P<verification_result>[a-zA-Z]+)\]", cmd_output)
if m is None:
raise ValueError(
f"Failed to parse verification output by iree-run-module: {cmd_output}"
)
verification_result = m.group('verification_result')
if self.args.verbose or verification_result != "SUCCESS":
print(cmd_output)
return verification_result
def profile(self, configuration):
"""Profiles the operation with a given configuration."""
# First compile the operation to a vmfb file.
self.iree_compile(CompilationMode.Profile)
# Commandline `iree-benchmark-module` for profiling.
cmd = [
f'{self.iree_benchmark_module_path}',
f'--module={self.vmfb_profile_filepath}', f'--device={self.args.device}'
]
# Profiling specific flags.
cmd += [f'--benchmark_repetitions={self.args.benchmark_repetitions}']
cmd += [f'--batch_size={self.batch_size}']
# Operation-specific profiling command-line.
cmd += [f'--function={self.operation.name()}_{configuration.name()}']
cmd += [f'--input={self.operation.lhs_npy_shape()}']
cmd += [f'--input={self.operation.rhs_npy_shape()}']
# Print the command if verbose.
if self.args.verbose:
print(f"[Profiling] {' '.join(cmd)}")
# Launch profiling.
cmd_output = subprocess.check_output(cmd,
text=True,
stderr=subprocess.STDOUT)
# Save the profiling command and the output, only if requested
# (file writing could slow down the profiling).
if self.args.save_cmds:
filepath = self.operation_path.joinpath("iree_benchmark_module.stdout")
with open(filepath, "w") as fp:
fp.write(f"[Command] $ {' '.join(cmd)}\n")
fp.write(cmd_output)
# Parse the profiling output.
m = re.search(r"real_time_median\s+(?P<runtime>\d+.\d+)\s+ms", cmd_output)
if m is None:
raise ValueError(
f"Failed to parse runtime from benchmark result: {cmd_output}")
runtime_in_ms = float(m.group('runtime'))
return runtime_in_ms