blob: e97ecf27713a70637cddd1492b56c6a617ecf36e [file] [log] [blame]
# Copyright 2023 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
from library import *
from matmul import ReferenceMatmulOp
from batch_matmul import ReferenceBatchMatmulOp
from pathlib import Path
import subprocess
class IreeToolsLauncher:
"""Launcher for IREE tools."""
def __init__(self, args, operation):
self.operation = operation
self.generated_path = Path(args.generated_dir, "generated", args.mlir_dialect)
self.args = args
self.benchmark_dispatch_repeat_count = args.batch_size
self.batch_size = args.batch_size
# paths to source dispatch mlir, compiled vmfb, and logs.
self.operation_path = self.generated_path.joinpath(
OperationKindNames[operation.operation_kind], operation.name()
)
self.source_mlir_file = self.operation_path.joinpath(
operation.name()
).with_suffix(".mlir")
# path to cached numpy refernece input and expected output files.
self.op_reference_cache_path = Path(
args.generated_dir, "generated", "reference_cache", operation.name()
)
if not self.op_reference_cache_path.exists():
self.op_reference_cache_path.mkdir(parents=True, exist_ok=True)
# path to iree-compile tool. (for compiling the input mlir file to vmfb)
self.iree_compile_path = Path(args.iree_bin_dir, "iree-compile")
# path to iree-benchmark-module tool. (for performance benchmarking and profiling)
self.iree_benchmark_module_path = Path(
args.iree_bin_dir, "iree-benchmark-module"
)
# path to iree-run-module tool. (for verification)
self.iree_run_module_path = Path(args.iree_bin_dir, "iree-run-module")
# output vmfb files for verification and profiling.
vmfb_filename = f"{operation.name()}"
if operation.operation_kind == OperationKind.SplitkMatmul:
split_k_suffix = "_".join(["split_k_slice", str(operation.split_k_slices)])
vmfb_filename = f"{vmfb_filename}_{split_k_suffix}"
self.vmfb_verify_filepath = self.operation_path.joinpath(
self.operation.name()
).with_name(f"{vmfb_filename}_verify.vmfb")
self.vmfb_profile_filepath = self.operation_path.joinpath(
self.operation.name()
).with_name(f"{vmfb_filename}_profile.vmfb")
# reference implementation for the operation_kind.
self.reference_impl_map = {
OperationKind.Matmul: ReferenceMatmulOp,
OperationKind.SplitkMatmul: ReferenceMatmulOp,
OperationKind.BatchMatmul: ReferenceBatchMatmulOp,
}
def iree_compile(self, compilation_mode):
"""Compiles the input mlir file to vmfb file."""
benchmark_dispatch_repeat_count = (
self.benchmark_dispatch_repeat_count
if compilation_mode == CompilationMode.Profile
else 1
)
vmfb_filepath = (
self.vmfb_profile_filepath
if compilation_mode == CompilationMode.Profile
else self.vmfb_verify_filepath
)
# Base iree-compile commandline
cmd = [
f"{self.iree_compile_path}",
f"{self.source_mlir_file}",
"-o",
f"{vmfb_filepath}",
]
# General compilation options
cmd += [f"--iree-hal-target-backends={self.args.device}"]
if self.args.device == "cuda":
cmd += [f"--iree-hal-cuda-llvm-target-arch={self.args.cuda_arch}"]
if self.operation.operation_kind == OperationKind.SplitkMatmul:
cmd += [
f"--iree-flow-split-matmul-reduction={self.operation.split_k_slices}"
]
if self.args.use_mma_sync:
cmd += [f"--iree-codegen-llvmgpu-use-mma-sync"]
if self.args.use_wmma:
cmd += [f"--iree-codegen-llvmgpu-use-wmma"]
# Compilation options for profiling
cmd += [
f"--iree-hal-benchmark-dispatch-repeat-count={benchmark_dispatch_repeat_count}"
]
# Appends print ir options at the end of the command line.
if self.args.mlir_print_ir_after_all:
cmd += [f"--mlir-print-ir-after-all"]
if not vmfb_filepath.exists() or self.args.force_compile:
complie_mode_str = CompilationModeNames[compilation_mode]
print(f"[Compiling ({complie_mode_str})] {' '.join(cmd)}")
iree_compile_stdout_filepath = self.operation_path.joinpath(
"iree_compile_cmd_stdout.mlir"
)
with open(iree_compile_stdout_filepath, "w") as fp:
subprocess.run(cmd, stderr=fp)
elif self.args.verbose:
print(
f"Skipping compilation of operation: {vmfb_filepath} since it already exists."
)
def verify(self, configuration):
"""Verifies the operation with a given configuration."""
# First compile the operation to a vmfb file.
self.iree_compile(CompilationMode.Verify)
# Verify using random data distribution.
reference_run = self.reference_impl_map[self.operation.operation_kind](
self.operation,
self.op_reference_cache_path,
Distribution.Random,
Distribution.Random,
)
if not reference_run.is_cached():
reference_run()
# Commandline `iree-run-module` for verification.
cmd = [
f"{self.iree_run_module_path}",
f"--module={self.vmfb_verify_filepath}",
f"--device={self.args.device}",
]
# Operation-specific verification command-line.
cmd.append(f"--function={self.operation.name()}_{configuration.name()}")
for input_file_path in reference_run.get_input_filepaths():
cmd.append(f"--input=@{input_file_path}")
for output_file_path in reference_run.get_output_filepaths():
cmd.append(f"--expected_output=@{output_file_path}")
# Print the command if verbose.
if self.args.verbose:
print(f"[Verification] {' '.join(cmd)}")
# Launch verification.
cmd_output = subprocess.check_output(cmd, text=True)
# Save the verification command and the output, only if requested
# (file writing could slow down the verification).
if self.args.save_cmds:
filepath = self.operation_path.joinpath("iree_run_module.stdout")
with open(filepath, "w") as fp:
fp.write(f"[Command] $ {' '.join(cmd)}\n")
fp.write(cmd_output)
# Parse the verification output.
m = re.search(r"\[(?P<verification_result>[a-zA-Z]+)\]", cmd_output)
if m is None:
raise ValueError(
f"Failed to parse verification output by iree-run-module: {cmd_output}"
)
verification_result = m.group("verification_result")
if self.args.verbose or verification_result != "SUCCESS":
print(cmd_output)
return verification_result
def profile(self, configuration):
"""Profiles the operation with a given configuration."""
# First compile the operation to a vmfb file.
self.iree_compile(CompilationMode.Profile)
# Commandline `iree-benchmark-module` for profiling.
cmd = [
f"{self.iree_benchmark_module_path}",
f"--module={self.vmfb_profile_filepath}",
f"--device={self.args.device}",
]
# Profiling specific flags.
cmd += [f"--benchmark_repetitions={self.args.benchmark_repetitions}"]
cmd += [f"--batch_size={self.batch_size}"]
# Operation-specific profiling command-line.
cmd += [f"--function={self.operation.name()}_{configuration.name()}"]
cmd += [f"--input={self.operation.lhs_npy_shape()}"]
cmd += [f"--input={self.operation.rhs_npy_shape()}"]
# Print the command if verbose.
if self.args.verbose:
print(f"[Profiling] {' '.join(cmd)}")
# Launch profiling.
cmd_output = subprocess.check_output(cmd, text=True, stderr=subprocess.STDOUT)
# Save the profiling command and the output, only if requested
# (file writing could slow down the profiling).
if self.args.save_cmds:
filepath = self.operation_path.joinpath("iree_benchmark_module.stdout")
with open(filepath, "w") as fp:
fp.write(f"[Command] $ {' '.join(cmd)}\n")
fp.write(cmd_output)
# Parse the profiling output.
m = re.search(r"real_time_median\s+(?P<runtime>\d+.\d+)\s+ms", cmd_output)
if m is None:
raise ValueError(
f"Failed to parse runtime from benchmark result: {cmd_output}"
)
runtime_in_ms = float(m.group("runtime"))
return runtime_in_ms