Adding 'amdgpu' target device and flatbuffer for HAL executables. (#18933)
The schema may change as the branch gets closer to merging but the
refactoring in the compiler for serializing multiple ABIs will remain
the same.
diff --git a/compiler/plugins/target/ROCM/BUILD.bazel b/compiler/plugins/target/ROCM/BUILD.bazel
index 7962cf8..9692d1a 100644
--- a/compiler/plugins/target/ROCM/BUILD.bazel
+++ b/compiler/plugins/target/ROCM/BUILD.bazel
@@ -39,6 +39,7 @@
"//compiler/src/iree/compiler/Dialect/HAL/Utils:LLVMLinkerUtils",
"//compiler/src/iree/compiler/PluginAPI",
"//compiler/src/iree/compiler/Utils",
+ "//runtime/src/iree/schemas:amdgpu_executable_def_c_fbs",
"//runtime/src/iree/schemas:executable_debug_info_c_fbs",
"//runtime/src/iree/schemas:hip_executable_def_c_fbs",
"@llvm-project//llvm:AMDGPUCodeGen",
diff --git a/compiler/plugins/target/ROCM/CMakeLists.txt b/compiler/plugins/target/ROCM/CMakeLists.txt
index 9430dca..938261a 100644
--- a/compiler/plugins/target/ROCM/CMakeLists.txt
+++ b/compiler/plugins/target/ROCM/CMakeLists.txt
@@ -64,6 +64,7 @@
iree::compiler::Dialect::HAL::Utils::LLVMLinkerUtils
iree::compiler::PluginAPI
iree::compiler::Utils
+ iree::schemas::amdgpu_executable_def_c_fbs
iree::schemas::executable_debug_info_c_fbs
iree::schemas::hip_executable_def_c_fbs
PUBLIC
diff --git a/compiler/plugins/target/ROCM/ROCMTarget.cpp b/compiler/plugins/target/ROCM/ROCMTarget.cpp
index e384dd7..c860b63 100644
--- a/compiler/plugins/target/ROCM/ROCMTarget.cpp
+++ b/compiler/plugins/target/ROCM/ROCMTarget.cpp
@@ -23,6 +23,7 @@
#include "iree/compiler/PluginAPI/Client.h"
#include "iree/compiler/Utils/FlatbufferUtils.h"
#include "iree/compiler/Utils/ToolUtils.h"
+#include "iree/schemas/amdgpu_executable_def_builder.h"
#include "iree/schemas/hip_executable_def_builder.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -54,7 +55,9 @@
namespace {
-struct ROCmOptions {
+// TODO(#18792): rename flags back to iree-rocm- as they are not HIP-specific.
+// Only iree-hip-legacy-sync applies uniquely to HIP.
+struct ROCMOptions {
std::string target = "";
std::string targetFeatures = "";
std::string bitcodeDirectory = getDefaultBitcodeDirectory();
@@ -196,45 +199,9 @@
}
} // namespace
-class ROCMTargetDevice final : public TargetDevice {
-public:
- ROCMTargetDevice(const ROCmOptions &options) : options(options) {}
-
- IREE::HAL::DeviceTargetAttr
- getDefaultDeviceTarget(MLIRContext *context,
- const TargetRegistry &targetRegistry) const override {
- Builder b(context);
-
- SmallVector<NamedAttribute> deviceConfigAttrs;
- if (options.legacySync) {
- // Indicates that the runtime HAL driver operates only in the legacy
- // synchronous mode.
- deviceConfigAttrs.emplace_back(b.getStringAttr("legacy_sync"),
- b.getUnitAttr());
- }
- auto deviceConfigAttr = b.getDictionaryAttr(deviceConfigAttrs);
-
- SmallVector<NamedAttribute> executableConfigAttrs;
- auto executableConfigAttr = b.getDictionaryAttr(executableConfigAttrs);
-
- // If we had multiple target environments we would generate one target attr
- // per environment, with each setting its own environment attribute.
- SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
- targetRegistry.getTargetBackend("rocm")->getDefaultExecutableTargets(
- context, "rocm", executableConfigAttr, executableTargetAttrs);
-
- return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("hip"),
- deviceConfigAttr,
- executableTargetAttrs);
- }
-
-private:
- const ROCmOptions &options;
-};
-
class ROCMTargetBackend final : public TargetBackend {
public:
- ROCMTargetBackend(const ROCmOptions &options) : options(options) {}
+ ROCMTargetBackend(const ROCMOptions &options) : options(options) {}
std::string getLegacyDefaultDeviceID() const override { return "hip"; }
@@ -242,31 +209,43 @@
MLIRContext *context, StringRef deviceID, DictionaryAttr deviceConfigAttr,
SmallVectorImpl<IREE::HAL::ExecutableTargetAttr> &executableTargetAttrs)
const override {
- if (auto target = getExecutableTarget(context))
+ if (auto target = getExecutableTarget(deviceID, context)) {
executableTargetAttrs.push_back(target);
+ }
}
IREE::HAL::ExecutableTargetAttr
- getExecutableTarget(MLIRContext *context) const {
+ getExecutableTarget(StringRef deviceID, MLIRContext *context) const {
Builder b(context);
SmallVector<NamedAttribute> configItems;
auto addConfig = [&](StringRef name, Attribute value) {
configItems.emplace_back(b.getStringAttr(name), value);
};
- if (failed(options.verify(b)))
+ if (failed(options.verify(b))) {
return nullptr;
+ }
- if (auto target = GPU::getHIPTargetDetails(options.target,
- options.targetFeatures, context))
+ addConfig("abi", b.getStringAttr(deviceID));
+ std::string format;
+ if (deviceID == "amdgpu") {
+ format = options.target;
+ } else {
+ format = "rocm-hsaco-fb"; // legacy HIP
+ }
+
+ if (auto target = GPU::getHIPTargetDetails(
+ options.target, options.targetFeatures, context)) {
addConfig("iree.gpu.target", target);
+ }
addConfig("ukernels", b.getStringAttr(options.enableROCMUkernels));
- if (options.wavesPerEu > 0)
+ if (options.wavesPerEu > 0) {
addConfig("waves_per_eu", b.getI64IntegerAttr(options.wavesPerEu));
+ }
return b.getAttr<IREE::HAL::ExecutableTargetAttr>(
- b.getStringAttr("rocm"), b.getStringAttr("rocm-hsaco-fb"),
+ b.getStringAttr("rocm"), b.getStringAttr(format),
b.getDictionaryAttr(configItems));
}
@@ -356,9 +335,10 @@
return success();
}
- LogicalResult serializeExecutable(const SerializationOptions &serOptions,
- IREE::HAL::ExecutableVariantOp variantOp,
- OpBuilder &executableBuilder) override {
+ LogicalResult
+ serializeExecutable(const SerializationOptions &serializationOptions,
+ IREE::HAL::ExecutableVariantOp variantOp,
+ OpBuilder &executableBuilder) override {
ModuleOp innerModuleOp = variantOp.getInnerModule();
auto targetAttr = variantOp.getTargetAttr();
StringRef targetArch = options.target;
@@ -552,18 +532,18 @@
return failure();
}
- if (!serOptions.dumpIntermediatesPath.empty()) {
- dumpModuleToPath(serOptions.dumpIntermediatesPath,
- serOptions.dumpBaseName, variantOp.getName(),
+ if (!serializationOptions.dumpIntermediatesPath.empty()) {
+ dumpModuleToPath(serializationOptions.dumpIntermediatesPath,
+ serializationOptions.dumpBaseName, variantOp.getName(),
".linked.ll", *llvmModule);
}
// Run LLVM optimization passes.
optimizeModule(*llvmModule, *targetMachine, options.passPlugins,
options.slpVectorization);
- if (!serOptions.dumpIntermediatesPath.empty()) {
- dumpModuleToPath(serOptions.dumpIntermediatesPath,
- serOptions.dumpBaseName, variantOp.getName(),
+ if (!serializationOptions.dumpIntermediatesPath.empty()) {
+ dumpModuleToPath(serializationOptions.dumpIntermediatesPath,
+ serializationOptions.dumpBaseName, variantOp.getName(),
".optimized.ll", *llvmModule);
}
@@ -572,7 +552,7 @@
}
// Dump the assembly output.
- if (!serOptions.dumpIntermediatesPath.empty()) {
+ if (!serializationOptions.dumpIntermediatesPath.empty()) {
auto moduleCopy = llvm::CloneModule(*llvmModule);
if (!moduleCopy) {
llvm::errs() << "Error: cloning LLVM IR failed\n";
@@ -580,9 +560,9 @@
}
std::string targetISA =
translateModuleToISA(*moduleCopy.get(), *targetMachine);
- dumpDataToPath(serOptions.dumpIntermediatesPath,
- serOptions.dumpBaseName, variantOp.getName(), ".rocmasm",
- targetISA);
+ dumpDataToPath(serializationOptions.dumpIntermediatesPath,
+ serializationOptions.dumpBaseName, variantOp.getName(),
+ ".rocmasm", targetISA);
}
// Serialize hsaco kernel into the binary that we will embed in the
@@ -593,23 +573,136 @@
return failure();
}
- if (!serOptions.dumpBinariesPath.empty()) {
- dumpDataToPath(serOptions.dumpBinariesPath, serOptions.dumpBaseName,
- variantOp.getName(), ".hsaco", targetHSACO);
+ if (!serializationOptions.dumpBinariesPath.empty()) {
+ dumpDataToPath(serializationOptions.dumpBinariesPath,
+ serializationOptions.dumpBaseName, variantOp.getName(),
+ ".hsaco", targetHSACO);
}
+ // Wrap the HSACO ELF binary in a Flatbuffers container.
+ FailureOr<DenseIntElementsAttr> binaryContainer;
+ if (targetAttr.getConfiguration() &&
+ targetAttr.getConfiguration().getAs<StringAttr>("abi") == "amdgpu") {
+ binaryContainer = serializeAMDGPUBinaryContainer(
+ serializationOptions, variantOp, exportOps, targetHSACO);
+ } else {
+ binaryContainer = serializeHIPBinaryContainer(
+ serializationOptions, variantOp, exportOps, targetHSACO);
+ }
+ if (failed(binaryContainer) || !binaryContainer.value()) {
+ return failure();
+ }
+
+ // Add the binary data to the target executable.
+ executableBuilder.create<iree_compiler::IREE::HAL::ExecutableBinaryOp>(
+ variantOp.getLoc(), variantOp.getSymName(),
+ variantOp.getTarget().getFormat(), binaryContainer.value());
+
+ return success();
+ }
+
+protected:
+ FailureOr<DenseIntElementsAttr> serializeAMDGPUBinaryContainer(
+ const SerializationOptions &serializationOptions,
+ IREE::HAL::ExecutableVariantOp variantOp,
+ ArrayRef<IREE::HAL::ExecutableExportOp> exportOps,
+ StringRef hsacoModule) {
+ iree_compiler::FlatbufferBuilder builder;
+ iree_hal_amdgpu_ExecutableDef_start_as_root(builder);
+
+ // Attach embedded source file contents.
+ auto sourceFilesRef = createSourceFilesVec(
+ serializationOptions.debugLevel, variantOp.getSourcesAttr(), builder);
+
+ // Only a single module today.
+ SmallVector<iree_hal_amdgpu_ModuleDef_ref_t> moduleRefs;
+ {
+ auto hsacoImageRef = flatbuffers_string_create(
+ builder, hsacoModule.data(), hsacoModule.size());
+ moduleRefs.push_back(
+ iree_hal_amdgpu_ModuleDef_create(builder, hsacoImageRef));
+ }
+ auto modulesRef = builder.createOffsetVecDestructive(moduleRefs);
+
+ // Generate optional per-export debug information.
+ // May be empty if no debug information was requested.
+ auto exportDebugInfos =
+ createExportDefs(serializationOptions.debugLevel, exportOps, builder);
+
+ SmallVector<iree_hal_amdgpu_ExportDef_ref_t> exportRefs;
+ exportRefs.resize(exportOps.size(), 0);
+ for (auto exportOp : exportOps) {
+ auto ordinalAttr = exportOp.getOrdinalAttr();
+ if (!ordinalAttr) {
+ return mlir::emitError(exportOp.getLoc())
+ << "could not compile rocm binary: export op is missing ordinal";
+ }
+ int64_t ordinal = ordinalAttr.getInt();
+
+ auto symbolNameRef = builder.createString(exportOp.getName());
+
+ iree_hal_amdgpu_Dims_t workgroupSize = {0};
+ if (auto workgroupSizeAttr = exportOp.getWorkgroupSize()) {
+ auto workgroupSizeDims = workgroupSizeAttr->getValue();
+ workgroupSize.x = cast<IntegerAttr>(workgroupSizeDims[0]).getInt();
+ workgroupSize.y = cast<IntegerAttr>(workgroupSizeDims[1]).getInt();
+ workgroupSize.z = cast<IntegerAttr>(workgroupSizeDims[2]).getInt();
+ }
+
+ auto layoutAttr = exportOp.getLayoutAttr();
+ uint32_t constantCount = static_cast<uint32_t>(layoutAttr.getConstants());
+ SmallVector<iree_hal_amdgpu_BindingBits_enum_t> bindingFlags;
+ for (auto bindingAttr : layoutAttr.getBindings()) {
+ iree_hal_amdgpu_BindingBits_enum_t flags = 0;
+ if (allEnumBitsSet(bindingAttr.getFlags(),
+ IREE::HAL::DescriptorFlags::ReadOnly)) {
+ flags |= iree_hal_amdgpu_BindingBits_READ_ONLY;
+ }
+ if (allEnumBitsSet(bindingAttr.getFlags(),
+ IREE::HAL::DescriptorFlags::Indirect)) {
+ flags |= iree_hal_amdgpu_BindingBits_INDIRECT;
+ }
+ bindingFlags.push_back(flags);
+ }
+ auto bindingFlagsRef = iree_hal_amdgpu_BindingBits_vec_create(
+ builder, bindingFlags.data(), bindingFlags.size());
+
+ iree_hal_amdgpu_ExportDef_start(builder);
+ iree_hal_amdgpu_ExportDef_symbol_name_add(builder, symbolNameRef);
+ iree_hal_amdgpu_ExportDef_workgroup_size_add(builder, &workgroupSize);
+ iree_hal_amdgpu_ExportDef_constant_count_add(builder, constantCount);
+ iree_hal_amdgpu_ExportDef_binding_flags_add(builder, bindingFlagsRef);
+ iree_hal_amdgpu_ExportDef_debug_info_add(builder,
+ exportDebugInfos[ordinal]);
+ exportRefs[ordinal] = iree_hal_amdgpu_ExportDef_end(builder);
+ }
+ auto exportsRef = builder.createOffsetVecDestructive(exportRefs);
+
+ iree_hal_amdgpu_ExecutableDef_exports_add(builder, exportsRef);
+ iree_hal_amdgpu_ExecutableDef_modules_add(builder, modulesRef);
+ iree_hal_amdgpu_ExecutableDef_source_files_add(builder, sourceFilesRef);
+ iree_hal_amdgpu_ExecutableDef_end_as_root(builder);
+
+ return builder.getBufferAttr(variantOp.getContext());
+ }
+
+ FailureOr<DenseIntElementsAttr>
+ serializeHIPBinaryContainer(const SerializationOptions &serializationOptions,
+ IREE::HAL::ExecutableVariantOp variantOp,
+ ArrayRef<IREE::HAL::ExecutableExportOp> exportOps,
+ StringRef hsacoModule) {
iree_compiler::FlatbufferBuilder builder;
iree_hal_hip_ExecutableDef_start_as_root(builder);
// Attach embedded source file contents.
auto sourceFilesRef = createSourceFilesVec(
- serOptions.debugLevel, variantOp.getSourcesAttr(), builder);
+ serializationOptions.debugLevel, variantOp.getSourcesAttr(), builder);
// Only a single module today.
SmallVector<iree_hal_hip_ModuleDef_ref_t> moduleRefs;
{
auto hsacoImageRef = flatbuffers_string_create(
- builder, targetHSACO.c_str(), targetHSACO.size());
+ builder, hsacoModule.data(), hsacoModule.size());
moduleRefs.push_back(
iree_hal_hip_ModuleDef_create(builder, hsacoImageRef));
}
@@ -618,7 +711,7 @@
// Generate optional per-export debug information.
// May be empty if no debug information was requested.
auto exportDebugInfos =
- createExportDefs(serOptions.debugLevel, exportOps, builder);
+ createExportDefs(serializationOptions.debugLevel, exportOps, builder);
SmallVector<iree_hal_hip_ExportDef_ref_t> exportRefs;
exportRefs.resize(exportOps.size(), 0);
@@ -682,27 +775,91 @@
iree_hal_hip_ExecutableDef_source_files_add(builder, sourceFilesRef);
iree_hal_hip_ExecutableDef_end_as_root(builder);
- // Add the binary data to the target executable.
- executableBuilder.create<iree_compiler::IREE::HAL::ExecutableBinaryOp>(
- variantOp.getLoc(), variantOp.getSymName(),
- variantOp.getTarget().getFormat(),
- builder.getBufferAttr(executableBuilder.getContext()));
-
- return success();
+ return builder.getBufferAttr(variantOp.getContext());
}
private:
- const ROCmOptions &options;
+ const ROCMOptions &options;
+};
+
+class AMDGPUTargetDevice final : public TargetDevice {
+public:
+ AMDGPUTargetDevice(const ROCMOptions &options) : options(options) {}
+
+ IREE::HAL::DeviceTargetAttr
+ getDefaultDeviceTarget(MLIRContext *context,
+ const TargetRegistry &targetRegistry) const override {
+ Builder b(context);
+
+ SmallVector<NamedAttribute> deviceConfigAttrs;
+ auto deviceConfigAttr = b.getDictionaryAttr(deviceConfigAttrs);
+
+ SmallVector<NamedAttribute> executableConfigAttrs;
+ auto executableConfigAttr = b.getDictionaryAttr(executableConfigAttrs);
+
+ // If we had multiple target environments we would generate one target attr
+ // per environment, with each setting its own environment attribute.
+ SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
+ targetRegistry.getTargetBackend("rocm")->getDefaultExecutableTargets(
+ context, "amdgpu", executableConfigAttr, executableTargetAttrs);
+
+ return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("amdgpu"),
+ deviceConfigAttr,
+ executableTargetAttrs);
+ }
+
+private:
+ const ROCMOptions &options;
+};
+
+class HIPTargetDevice final : public TargetDevice {
+public:
+ HIPTargetDevice(const ROCMOptions &options) : options(options) {}
+
+ IREE::HAL::DeviceTargetAttr
+ getDefaultDeviceTarget(MLIRContext *context,
+ const TargetRegistry &targetRegistry) const override {
+ Builder b(context);
+
+ SmallVector<NamedAttribute> deviceConfigAttrs;
+ if (options.legacySync) {
+ // Indicates that the runtime HAL driver operates only in the legacy
+ // synchronous mode.
+ deviceConfigAttrs.emplace_back(b.getStringAttr("legacy_sync"),
+ b.getUnitAttr());
+ }
+ auto deviceConfigAttr = b.getDictionaryAttr(deviceConfigAttrs);
+
+ SmallVector<NamedAttribute> executableConfigAttrs;
+ auto executableConfigAttr = b.getDictionaryAttr(executableConfigAttrs);
+
+ // If we had multiple target environments we would generate one target attr
+ // per environment, with each setting its own environment attribute.
+ SmallVector<IREE::HAL::ExecutableTargetAttr> executableTargetAttrs;
+ targetRegistry.getTargetBackend("rocm")->getDefaultExecutableTargets(
+ context, "hip", executableConfigAttr, executableTargetAttrs);
+
+ return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("hip"),
+ deviceConfigAttr,
+ executableTargetAttrs);
+ }
+
+private:
+ const ROCMOptions &options;
};
namespace {
struct ROCMSession final
- : PluginSession<ROCMSession, ROCmOptions,
+ : PluginSession<ROCMSession, ROCMOptions,
PluginActivationPolicy::DefaultActivated> {
void populateHALTargetDevices(IREE::HAL::TargetDeviceList &targets) {
+ // #hal.device.target<"amdgpu", ...
+ targets.add("amdgpu", [&]() {
+ return std::make_shared<AMDGPUTargetDevice>(options);
+ });
// #hal.device.target<"hip", ...
targets.add("hip",
- [&]() { return std::make_shared<ROCMTargetDevice>(options); });
+ [&]() { return std::make_shared<HIPTargetDevice>(options); });
}
void populateHALTargetBackends(IREE::HAL::TargetBackendList &targets) {
// #hal.executable.target<"rocm", ...
@@ -728,4 +885,4 @@
return true;
}
-IREE_DEFINE_COMPILER_OPTION_FLAGS(mlir::iree_compiler::IREE::HAL::ROCmOptions);
+IREE_DEFINE_COMPILER_OPTION_FLAGS(mlir::iree_compiler::IREE::HAL::ROCMOptions);
diff --git a/compiler/plugins/target/ROCM/ROCMTargetUtils.cpp b/compiler/plugins/target/ROCM/ROCMTargetUtils.cpp
index 7453af7..a1757af 100644
--- a/compiler/plugins/target/ROCM/ROCMTargetUtils.cpp
+++ b/compiler/plugins/target/ROCM/ROCMTargetUtils.cpp
@@ -35,7 +35,7 @@
diagnostic, *llvm_context));
if (!module) {
- mlir::emitError(loc) << "error loading HIP LLVM module: "
+ mlir::emitError(loc) << "error loading ROCM LLVM module: "
<< diagnostic.getFilename().str() << ":"
<< diagnostic.getLineNo() << ":"
<< diagnostic.getColumnNo() << ": "
@@ -90,7 +90,7 @@
auto setAlwaysInline = [&](llvm::Module &module) {
if (targetMachine.getTargetCPU().contains("gfx10") ||
targetMachine.getTargetCPU().contains("gfx11")) {
- // some ROCM/HIP functions for gfx10 or gfx11 has accuracy issue if
+ // Some ROCM/HIP functions for gfx10 or gfx11 has accuracy issue if
// inlined.
return;
}
diff --git a/runtime/src/iree/schemas/BUILD.bazel b/runtime/src/iree/schemas/BUILD.bazel
index a8fbfca..e98a425 100644
--- a/runtime/src/iree/schemas/BUILD.bazel
+++ b/runtime/src/iree/schemas/BUILD.bazel
@@ -21,6 +21,13 @@
]
iree_flatbuffer_c_library(
+ name = "amdgpu_executable_def_c_fbs",
+ srcs = ["amdgpu_executable_def.fbs"],
+ flatcc_args = FLATCC_ARGS,
+ includes = ["executable_debug_info.fbs"],
+)
+
+iree_flatbuffer_c_library(
name = "bytecode_module_def_c_fbs",
srcs = ["bytecode_module_def.fbs"],
flatcc_args = FLATCC_ARGS,
@@ -70,6 +77,7 @@
iree_build_test(
name = "schema_build_test",
targets = [
+ ":amdgpu_executable_def_c_fbs",
":bytecode_module_def_c_fbs",
":cuda_executable_def_c_fbs",
":executable_debug_info_c_fbs",
diff --git a/runtime/src/iree/schemas/CMakeLists.txt b/runtime/src/iree/schemas/CMakeLists.txt
index 574b2ca..f30430d 100644
--- a/runtime/src/iree/schemas/CMakeLists.txt
+++ b/runtime/src/iree/schemas/CMakeLists.txt
@@ -12,6 +12,21 @@
flatbuffer_c_library(
NAME
+ amdgpu_executable_def_c_fbs
+ SRCS
+ "amdgpu_executable_def.fbs"
+ FLATCC_ARGS
+ "--reader"
+ "--builder"
+ "--verifier"
+ "--json"
+ INCLUDES
+ "executable_debug_info.fbs"
+ PUBLIC
+)
+
+flatbuffer_c_library(
+ NAME
bytecode_module_def_c_fbs
SRCS
"bytecode_module_def.fbs"
diff --git a/runtime/src/iree/schemas/amdgpu_executable_def.fbs b/runtime/src/iree/schemas/amdgpu_executable_def.fbs
new file mode 100644
index 0000000..43efdb0
--- /dev/null
+++ b/runtime/src/iree/schemas/amdgpu_executable_def.fbs
@@ -0,0 +1,63 @@
+// Copyright 2024 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+include "iree/schemas/executable_debug_info.fbs";
+
+namespace iree.hal.amdgpu;
+
+// 'AMDGPU v1 Executable'.
+file_identifier "AMD1";
+file_extension "amd1";
+
+// A struct for the kernel block size along each dimension.
+struct Dims {
+ x:uint32;
+ y:uint32;
+ z:uint32;
+}
+
+// Describes the behavior of each binding.
+enum BindingBits:uint64 (bit_flags) {
+ READ_ONLY = 0, // 1u << 0
+ INDIRECT = 1, // 1u << 1
+}
+
+// Information about an exported function on the executable.
+table ExportDef {
+ // String name of the exported function symbol in the module.
+ symbol_name:string;
+
+ // Workgroup size for the export.
+ workgroup_size:Dims;
+
+ // Total number of 32-bit push constants used by the export.
+ constant_count:uint32;
+
+ // Binding count and flags for each binding.
+ binding_flags:[BindingBits];
+
+ // Optional debug information related to the export.
+ debug_info:iree.hal.debug.ExportDef;
+}
+
+// A library containing one or more exported functions.
+table ModuleDef {
+ // AMD ELF image for loading an hsa_executable_t.
+ image:string;
+}
+
+table ExecutableDef {
+ // Exported functions in canonical executable entry point order.
+ exports:[ExportDef];
+
+ // Modules containing executable code.
+ modules:[ModuleDef];
+
+ // Embedded source files sorted ascending by path.
+ source_files:[iree.hal.debug.SourceFileDef];
+}
+
+root_type ExecutableDef;