experimental/dispatch_profiler/library.py - 3p/openxla/iree - Git at Google

 # Copyright 2023 The IREE Authors
 #
 # Licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 import enum, re
 from enum import auto
 import numpy as np
 from abc import ABC, abstractmethod
 from collections import namedtuple

 ###################################################################################################
 # This file contains library of enumerations and classes used to build operation descritpions.
 # The operation descriptions are used to generate MLIR source files, performance tuning configuration,
 # reference implementations, and numpy input/output files.

 # The file is organized as follows:
 # 1. Enumerated `Type`s grouped together for categories, For e.g. [Arch]Type, [Data]Type etc.
 # 2. Dictonaries `Names` mapping the enumeration values to their string names.
 #    For e.g. [Arch]TypeNames, [Data]TypeNames etc.
 # 3. `Tags` for each enumeration value to be used in the generated MLIR source files.
 #    For e.g. [TranslationInfo]Tags
 ###################################################################################################


 # Architecure types
 ###################################################################################################
 class ArchType(enum.Enum):
     Cpu = auto()
     Gpu = auto()


 ArchTypeNames = {
     ArchType.Cpu: "cpu",
     ArchType.Gpu: "gpu",
 }


 class GpuArchType(enum.Enum):
     nvptx = auto()
     rocm = auto()
     spirv = auto()


 GpuArchTypeNames = {
     GpuArchType.nvptx: "nvptx",
     GpuArchType.rocm: "rocm",
     GpuArchType.spirv: "spirv",
 }


 # Operation kinds
 ###################################################################################################
 class OperationKind(enum.Enum):
     Matmul = auto()
     BatchMatmul = auto()
     SplitkMatmul = auto()
     Conv2d = auto()


 OperationKindNames = {
     OperationKind.Matmul: "matmul",
     OperationKind.SplitkMatmul: "matmul_splitk",
     OperationKind.BatchMatmul: "batch_matmul",
     OperationKind.Conv2d: "conv2d",
 }


 # MLIR dialects
 ###################################################################################################
 class MlirDialect(enum.Enum):
     Linalg = auto()
     Mhlo = auto()


 MlirDialectNames = {
     MlirDialect.Linalg: "linalg",
     MlirDialect.Mhlo: "mhlo",
 }


 # Compilation modes (verification or benchmarking/profiling)
 ###################################################################################################
 class CompilationMode(enum.Enum):
     Verify = auto()
     Profile = auto()


 CompilationModeNames = {
     CompilationMode.Verify: "verify",
     CompilationMode.Profile: "profile",
 }


 class CompilationConfigType(enum.Enum):
     Default = auto()
     Custom = auto()


 CompilationConfigTypeName = {
     CompilationConfigType.Default: "default",
     CompilationConfigType.Custom: "custom",
 }


 # Enumerations for data types and layouts
 ###################################################################################################
 class DataType(enum.Enum):
     b1 = auto()
     u4 = auto()
     u8 = auto()
     u16 = auto()
     u32 = auto()
     u64 = auto()
     s4 = auto()
     s8 = auto()
     s16 = auto()
     s32 = auto()
     s64 = auto()
     e4m3 = auto()
     e5m2 = auto()
     f16 = auto()
     bf16 = auto()
     f32 = auto()
     tf32 = auto()
     f64 = auto()
     invalid = auto()


 DataTypeName = {
     DataType.b1: "b1",
     DataType.u4: "u4",
     DataType.u8: "u8",
     DataType.u16: "u16",
     DataType.u32: "u32",
     DataType.u64: "u64",
     DataType.s4: "s4",
     DataType.s8: "s8",
     DataType.s16: "s16",
     DataType.s32: "s32",
     DataType.s64: "s64",
     DataType.e4m3: "e4m3",
     DataType.e5m2: "e5m2",
     DataType.f16: "f16",
     DataType.bf16: "bf16",
     DataType.f32: "f32",
     DataType.tf32: "tf32",
     DataType.f64: "f64",
 }

 DataTypeNumPyTag = {
     DataType.f16: np.float16,
     DataType.f32: np.float32,
 }

 DataTypeSizeInBits = {
     DataType.b1: 1,
     DataType.u4: 4,
     DataType.u8: 8,
     DataType.u16: 16,
     DataType.u32: 32,
     DataType.u64: 64,
     DataType.s4: 4,
     DataType.s8: 8,
     DataType.s16: 16,
     DataType.s32: 32,
     DataType.s64: 64,
     DataType.e4m3: 8,
     DataType.e5m2: 8,
     DataType.f16: 16,
     DataType.bf16: 16,
     DataType.f32: 32,
     DataType.tf32: 32,
     DataType.f64: 64,
 }


 class LayoutType(enum.Enum):
     ColumnMajor = auto()
     RowMajor = auto()
     NHWC = auto()
     NCWH = auto()


 # cuBLAS/cuDNN layout type names convention is followed for the layout names.
 # https://docs.nvidia.com/cuda/cublas/index.html#cublasoperation-t
 ShortLayoutTypeName = {
     LayoutType.ColumnMajor: "n",
     LayoutType.RowMajor: "t",
     LayoutType.NHWC: "nhwc",
     LayoutType.NCWH: "ncwh",
 }


 # Compilation pipelines/translation info.
 ###################################################################################################
 class TranslationInfo(enum.Enum):
     LLVMGPUMatmulSIMT = auto()
     LLVMGPUMatmulTensorCore = auto()
     LLVMGPUMatmulTensorCoreMmaSync = auto()


 TranslationInfoTag = {
     TranslationInfo.LLVMGPUMatmulSIMT: "LLVMGPUMatmulSIMT",
     TranslationInfo.LLVMGPUMatmulTensorCore: "LLVMGPUMatmulTensorCore",
     TranslationInfo.LLVMGPUMatmulTensorCoreMmaSync: "LLVMGPUMatmulTensorCoreMmaSync",
 }

 TranslationInfoName = {
     TranslationInfo.LLVMGPUMatmulSIMT: "simt_ffma",
     TranslationInfo.LLVMGPUMatmulTensorCore: "tensorcore_wmma",
     TranslationInfo.LLVMGPUMatmulTensorCoreMmaSync: "tensorcore_mmasync",
 }


 # Distribution of values in a tensor.
 ###################################################################################################
 class Distribution(enum.Enum):
     Empty = auto()
     Zeros = auto()
     Ones = auto()
     Sequential = auto()
     Identity = auto()
     Random = auto()


 DistributionName = {
     Distribution.Empty: "empty",
     Distribution.Zeros: "zeros",
     Distribution.Ones: "ones",
     Distribution.Sequential: "seq",
     Distribution.Identity: "identity",
     Distribution.Random: "random",
 }

 ###################################################################################################
 # The next part of this file contains the data structures for describing a tensor, tiles etc that
 # are built using the above enumerations. These data structures are used to create compose bigger
 # data structures that describe an operation or a sequence of operations, along with compilation
 # pipeling to form a collection of dispatches to profiled.
 ###################################################################################################


 class TensorDescription:
     """A class for tensor description."""

     def __init__(self, datatype, layout):
         self.datatype = datatype
         self.layout = layout

     def name(self):
         return "%s%s" % (DataTypeName[self.datatype], ShortLayoutTypeName[self.layout])


 class TileDescription:
     """A class for tile description."""

     def __init__(self, threadblock_shape, stages, block_dim):
         self.threadblock_shape = threadblock_shape  # in number of elements in M, N, K
         self.stages = stages  # number of shared memory stages in tile K
         self.block_dim = block_dim  # block dimension in number of threads in x, y, z

     def name(self):
         return "%dx%d_%dx%d" % (
             self.threadblock_shape[0],
             self.threadblock_shape[1],
             self.threadblock_shape[2],
             self.stages,
         )


 ###################################################################################################
 # The following part contains utility functions for which are used by the profiler tool.
 # These function may be moved as the need for create a proper structure for the
 # functionality they provide becomes apparent and necessary as we move forward.
 ###################################################################################################
 def get_np_array(tensor_description, shape, dist):
     """Returns a numpy array based on the distribution and shape."""
     # Fix the seed for reproducibility.
     np.random.seed(42)

     # Generate the numpy array based on the distribution.
     if dist == Distribution.Empty:
         return np.empty(shape)
     elif dist == Distribution.Zeros:
         return np.zeros(shape)
     elif dist == Distribution.Ones:
         return np.ones(shape)
     elif dist == Distribution.Sequential:
         return np.arange(np.prod(shape)).reshape(shape)
     elif dist == Distribution.Identity:
         return np.eye(shape[0], shape[1])
     elif dist == Distribution.Random:
         if tensor_description.datatype == DataType.s8:
             return np.random.randint(-2, 3, shape)
         elif tensor_description.datatype == DataType.u8:
             return np.random.randint(0, 4, shape)
         elif (
             tensor_description.datatype == DataType.f16
             or tensor_description.datatype == DataType.bf16
         ):
             return np.random.randint(-3, 4, shape)
         elif tensor_description.datatype == DataType.f32:
             return np.random.randint(-7, 8, shape)


 ###################################################################################################
 def SubstituteTemplate(template, values):
     """Substitutes values into a template string."""
     text = template
     for key, value in values.items():
         regex = "\\$\\{%s\\}" % key
         newtext = re.sub(regex, value, text)
         text = newtext
     return text


 ###################################################################################################
 class ReferenceOpInterface(ABC):
     """Interface for reference implementations."""

     @abstractmethod
     def get_input_filepaths(self):
         """Returns the list of inputs."""
         pass

     @abstractmethod
     def get_output_filepaths(self):
         """Returns the list of outputs/."""
         pass

     @abstractmethod
     def __call__(self):
         """Runs the reference implementation."""
         pass

     def is_cached(self):
         """Returns whether the reference run is cached."""

         # Returns False if any of the reference input are missing.
         for input_filepath in self.get_input_filepaths():
             if not input_filepath.exists():
                 return False

         # Returns False if any of the reference output are missing.
         for output_filepath in self.get_output_filepaths():
             if not output_filepath.exists():
                 return False

         # Returns True if all the reference inputs and outputs are cached.
         return True

     ###################################################################################################
	# Copyright 2023 The IREE Authors
	#
	# Licensed under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

	import enum, re
	from enum import auto
	import numpy as np
	from abc import ABC, abstractmethod
	from collections import namedtuple

	###################################################################################################
	# This file contains library of enumerations and classes used to build operation descritpions.
	# The operation descriptions are used to generate MLIR source files, performance tuning configuration,
	# reference implementations, and numpy input/output files.

	# The file is organized as follows:
	# 1. Enumerated `Type`s grouped together for categories, For e.g. [Arch]Type, [Data]Type etc.
	# 2. Dictonaries `Names` mapping the enumeration values to their string names.
	# For e.g. [Arch]TypeNames, [Data]TypeNames etc.
	# 3. `Tags` for each enumeration value to be used in the generated MLIR source files.
	# For e.g. [TranslationInfo]Tags
	###################################################################################################


	# Architecure types
	###################################################################################################
	class ArchType(enum.Enum):
	Cpu = auto()
	Gpu = auto()


	ArchTypeNames = {
	ArchType.Cpu: "cpu",
	ArchType.Gpu: "gpu",
	}


	class GpuArchType(enum.Enum):
	nvptx = auto()
	rocm = auto()
	spirv = auto()


	GpuArchTypeNames = {
	GpuArchType.nvptx: "nvptx",
	GpuArchType.rocm: "rocm",
	GpuArchType.spirv: "spirv",
	}


	# Operation kinds
	###################################################################################################
	class OperationKind(enum.Enum):
	Matmul = auto()
	BatchMatmul = auto()
	SplitkMatmul = auto()
	Conv2d = auto()


	OperationKindNames = {
	OperationKind.Matmul: "matmul",
	OperationKind.SplitkMatmul: "matmul_splitk",
	OperationKind.BatchMatmul: "batch_matmul",
	OperationKind.Conv2d: "conv2d",
	}


	# MLIR dialects
	###################################################################################################
	class MlirDialect(enum.Enum):
	Linalg = auto()
	Mhlo = auto()


	MlirDialectNames = {
	MlirDialect.Linalg: "linalg",
	MlirDialect.Mhlo: "mhlo",
	}


	# Compilation modes (verification or benchmarking/profiling)
	###################################################################################################
	class CompilationMode(enum.Enum):
	Verify = auto()
	Profile = auto()


	CompilationModeNames = {
	CompilationMode.Verify: "verify",
	CompilationMode.Profile: "profile",
	}


	class CompilationConfigType(enum.Enum):
	Default = auto()
	Custom = auto()


	CompilationConfigTypeName = {
	CompilationConfigType.Default: "default",
	CompilationConfigType.Custom: "custom",
	}


	# Enumerations for data types and layouts
	###################################################################################################
	class DataType(enum.Enum):
	b1 = auto()
	u4 = auto()
	u8 = auto()
	u16 = auto()
	u32 = auto()
	u64 = auto()
	s4 = auto()
	s8 = auto()
	s16 = auto()
	s32 = auto()
	s64 = auto()
	e4m3 = auto()
	e5m2 = auto()
	f16 = auto()
	bf16 = auto()
	f32 = auto()
	tf32 = auto()
	f64 = auto()
	invalid = auto()


	DataTypeName = {
	DataType.b1: "b1",
	DataType.u4: "u4",
	DataType.u8: "u8",
	DataType.u16: "u16",
	DataType.u32: "u32",
	DataType.u64: "u64",
	DataType.s4: "s4",
	DataType.s8: "s8",
	DataType.s16: "s16",
	DataType.s32: "s32",
	DataType.s64: "s64",
	DataType.e4m3: "e4m3",
	DataType.e5m2: "e5m2",
	DataType.f16: "f16",
	DataType.bf16: "bf16",
	DataType.f32: "f32",
	DataType.tf32: "tf32",
	DataType.f64: "f64",
	}

	DataTypeNumPyTag = {
	DataType.f16: np.float16,
	DataType.f32: np.float32,
	}

	DataTypeSizeInBits = {
	DataType.b1: 1,
	DataType.u4: 4,
	DataType.u8: 8,
	DataType.u16: 16,
	DataType.u32: 32,
	DataType.u64: 64,
	DataType.s4: 4,
	DataType.s8: 8,
	DataType.s16: 16,
	DataType.s32: 32,
	DataType.s64: 64,
	DataType.e4m3: 8,
	DataType.e5m2: 8,
	DataType.f16: 16,
	DataType.bf16: 16,
	DataType.f32: 32,
	DataType.tf32: 32,
	DataType.f64: 64,
	}


	class LayoutType(enum.Enum):
	ColumnMajor = auto()
	RowMajor = auto()
	NHWC = auto()
	NCWH = auto()


	# cuBLAS/cuDNN layout type names convention is followed for the layout names.
	# https://docs.nvidia.com/cuda/cublas/index.html#cublasoperation-t
	ShortLayoutTypeName = {
	LayoutType.ColumnMajor: "n",
	LayoutType.RowMajor: "t",
	LayoutType.NHWC: "nhwc",
	LayoutType.NCWH: "ncwh",
	}


	# Compilation pipelines/translation info.
	###################################################################################################
	class TranslationInfo(enum.Enum):
	LLVMGPUMatmulSIMT = auto()
	LLVMGPUMatmulTensorCore = auto()
	LLVMGPUMatmulTensorCoreMmaSync = auto()


	TranslationInfoTag = {
	TranslationInfo.LLVMGPUMatmulSIMT: "LLVMGPUMatmulSIMT",
	TranslationInfo.LLVMGPUMatmulTensorCore: "LLVMGPUMatmulTensorCore",
	TranslationInfo.LLVMGPUMatmulTensorCoreMmaSync: "LLVMGPUMatmulTensorCoreMmaSync",
	}

	TranslationInfoName = {
	TranslationInfo.LLVMGPUMatmulSIMT: "simt_ffma",
	TranslationInfo.LLVMGPUMatmulTensorCore: "tensorcore_wmma",
	TranslationInfo.LLVMGPUMatmulTensorCoreMmaSync: "tensorcore_mmasync",
	}


	# Distribution of values in a tensor.
	###################################################################################################
	class Distribution(enum.Enum):
	Empty = auto()
	Zeros = auto()
	Ones = auto()
	Sequential = auto()
	Identity = auto()
	Random = auto()


	DistributionName = {
	Distribution.Empty: "empty",
	Distribution.Zeros: "zeros",
	Distribution.Ones: "ones",
	Distribution.Sequential: "seq",
	Distribution.Identity: "identity",
	Distribution.Random: "random",
	}

	###################################################################################################
	# The next part of this file contains the data structures for describing a tensor, tiles etc that
	# are built using the above enumerations. These data structures are used to create compose bigger
	# data structures that describe an operation or a sequence of operations, along with compilation
	# pipeling to form a collection of dispatches to profiled.
	###################################################################################################


	class TensorDescription:
	"""A class for tensor description."""

	def __init__(self, datatype, layout):
	self.datatype = datatype
	self.layout = layout

	def name(self):
	return "%s%s" % (DataTypeName[self.datatype], ShortLayoutTypeName[self.layout])


	class TileDescription:
	"""A class for tile description."""

	def __init__(self, threadblock_shape, stages, block_dim):
	self.threadblock_shape = threadblock_shape # in number of elements in M, N, K
	self.stages = stages # number of shared memory stages in tile K
	self.block_dim = block_dim # block dimension in number of threads in x, y, z

	def name(self):
	return "%dx%d_%dx%d" % (
	self.threadblock_shape[0],
	self.threadblock_shape[1],
	self.threadblock_shape[2],
	self.stages,
	)


	###################################################################################################
	# The following part contains utility functions for which are used by the profiler tool.
	# These function may be moved as the need for create a proper structure for the
	# functionality they provide becomes apparent and necessary as we move forward.
	###################################################################################################
	def get_np_array(tensor_description, shape, dist):
	"""Returns a numpy array based on the distribution and shape."""
	# Fix the seed for reproducibility.
	np.random.seed(42)

	# Generate the numpy array based on the distribution.
	if dist == Distribution.Empty:
	return np.empty(shape)
	elif dist == Distribution.Zeros:
	return np.zeros(shape)
	elif dist == Distribution.Ones:
	return np.ones(shape)
	elif dist == Distribution.Sequential:
	return np.arange(np.prod(shape)).reshape(shape)
	elif dist == Distribution.Identity:
	return np.eye(shape[0], shape[1])
	elif dist == Distribution.Random:
	if tensor_description.datatype == DataType.s8:
	return np.random.randint(-2, 3, shape)
	elif tensor_description.datatype == DataType.u8:
	return np.random.randint(0, 4, shape)
	elif (
	tensor_description.datatype == DataType.f16
	or tensor_description.datatype == DataType.bf16
	):
	return np.random.randint(-3, 4, shape)
	elif tensor_description.datatype == DataType.f32:
	return np.random.randint(-7, 8, shape)


	###################################################################################################
	def SubstituteTemplate(template, values):
	"""Substitutes values into a template string."""
	text = template
	for key, value in values.items():
	regex = "\\$\\{%s\\}" % key
	newtext = re.sub(regex, value, text)
	text = newtext
	return text


	###################################################################################################
	class ReferenceOpInterface(ABC):
	"""Interface for reference implementations."""

	@abstractmethod
	def get_input_filepaths(self):
	"""Returns the list of inputs."""
	pass

	@abstractmethod
	def get_output_filepaths(self):
	"""Returns the list of outputs/."""
	pass

	@abstractmethod
	def __call__(self):
	"""Runs the reference implementation."""
	pass

	def is_cached(self):
	"""Returns whether the reference run is cached."""

	# Returns False if any of the reference input are missing.
	for input_filepath in self.get_input_filepaths():
	if not input_filepath.exists():
	return False

	# Returns False if any of the reference output are missing.
	for output_filepath in self.get_output_filepaths():
	if not output_filepath.exists():
	return False

	# Returns True if all the reference inputs and outputs are cached.
	return True

	###################################################################################################