blob: 1f780764f83c9cff01c180c2fcf0c7f6ef65fca9 [file] [log] [blame]
# Copyright 2024 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
load("//build_tools/bazel:build_defs.oss.bzl", "iree_cmake_extra_content", "iree_runtime_cc_library")
load("//build_tools/bazel:iree_bitcode_library.bzl", "iree_amdgpu_bitcode_library")
load("//build_tools/embed_data:build_defs.bzl", "iree_c_embed_data")
package(
default_visibility = ["//visibility:public"],
features = ["layering_check"],
licenses = ["notice"], # Apache 2.0
)
iree_cmake_extra_content(
content = """
if(NOT IREE_TARGET_BACKEND_ROCM)
return()
endif()
""",
inline = True,
)
# Target archs for ukernels. https://llvm.org/docs/AMDGPUUsage.html#processors
# In general, we won't support all ukernels on all of these archs. It's fine to
# support a ukernel on just one of these archs, and that will be the generic
# case with "multi_mma" ukernels which will be entirely specific to the matrix
# intrinsics found on each arch.
gpu_archs = [
"gfx90a",
"gfx942",
"gfx1030",
"gfx1100",
]
# Element type combinations for the argmax ukernel.
argmax_types = [
"bf16i32",
"bf16i64",
"f16i32",
"f16i64",
"f32i32",
"f32i64",
]
[iree_amdgpu_bitcode_library(
name = "iree_uk_amdgpu_argmax_%s_%s" % (type, gpu_arch),
srcs = [
"common.h",
"iree_uk_amdgpu_argmax_%s.c" % type,
],
out = "iree_uk_amdgpu_argmax_%s.%s.bc" % (type, gpu_arch),
gpu_arch = gpu_arch,
) for type in argmax_types for gpu_arch in gpu_archs]
argmax_bc_files = [
":iree_uk_amdgpu_argmax_%s.%s.bc" % (type, gpu_arch)
for type in argmax_types
for gpu_arch in gpu_archs
]
iree_amdgpu_bitcode_library(
name = "iree_uk_amdgpu_multi_mma_mfma_i32_16x16x32_i8_gfx942",
srcs = [
"common.h",
"iree_uk_amdgpu_multi_mma_mfma_i32_16x16x32_i8.c",
],
out = "iree_uk_amdgpu_multi_mma_mfma_i32_16x16x32_i8.gfx942.bc",
gpu_arch = "gfx942",
)
iree_c_embed_data(
name = "iree_uk_amdgpu_bitcode",
srcs = argmax_bc_files + [
"iree_uk_amdgpu_multi_mma_mfma_i32_16x16x32_i8.gfx942.bc",
],
c_file_output = "iree_uk_amdgpu_bitcode.c",
flatten = True,
h_file_output = "iree_uk_amdgpu_bitcode.h",
)