Adding an executable environment and plumbing through processor info.
This adds an iree_hal_executable_environment_*_t struct to wrap up
the existing import table, a new processor info struct, and a new
reserved spot for specialization constants.
diff --git a/experimental/web/sample_static/device_multithreaded.c b/experimental/web/sample_static/device_multithreaded.c
index d4ffab5..bde6cc2 100644
--- a/experimental/web/sample_static/device_multithreaded.c
+++ b/experimental/web/sample_static/device_multithreaded.c
@@ -16,13 +16,10 @@
iree_hal_task_device_params_t params;
iree_hal_task_device_params_initialize(¶ms);
- // Load the statically embedded library.
- const iree_hal_executable_library_header_t** static_library =
- mnist_linked_llvm_library_query(
- IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION,
- /*reserved=*/NULL);
- const iree_hal_executable_library_header_t** libraries[1] = {static_library};
-
+ // Register the statically linked executable library.
+ const iree_hal_executable_library_query_fn_t* libraries[] = {
+ mnist_linked_llvm_library_query,
+ };
iree_hal_executable_loader_t* library_loader = NULL;
iree_status_t status = iree_hal_static_library_loader_create(
IREE_ARRAYSIZE(libraries), libraries,
diff --git a/experimental/web/sample_static/device_sync.c b/experimental/web/sample_static/device_sync.c
index 82e291d..082c730 100644
--- a/experimental/web/sample_static/device_sync.c
+++ b/experimental/web/sample_static/device_sync.c
@@ -13,13 +13,10 @@
iree_hal_sync_device_params_t params;
iree_hal_sync_device_params_initialize(¶ms);
- // Load the statically embedded library.
- const iree_hal_executable_library_header_t** static_library =
- mnist_linked_llvm_library_query(
- IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION,
- /*reserved=*/NULL);
- const iree_hal_executable_library_header_t** libraries[1] = {static_library};
-
+ // Register the statically linked executable library.
+ const iree_hal_executable_library_query_fn_t* libraries[] = {
+ mnist_linked_llvm_library_query,
+ };
iree_hal_executable_loader_t* library_loader = NULL;
iree_status_t status = iree_hal_static_library_loader_create(
IREE_ARRAYSIZE(libraries), libraries,
diff --git a/iree/base/internal/BUILD b/iree/base/internal/BUILD
index 7368255..ab122af 100644
--- a/iree/base/internal/BUILD
+++ b/iree/base/internal/BUILD
@@ -103,6 +103,16 @@
)
cc_library(
+ name = "cpu",
+ srcs = ["cpu.c"],
+ hdrs = ["cpu.h"],
+ deps = [
+ "//iree/base",
+ "//iree/base:core_headers",
+ ],
+)
+
+cc_library(
name = "dynamic_library",
srcs = [
"dynamic_library_posix.c",
diff --git a/iree/base/internal/CMakeLists.txt b/iree/base/internal/CMakeLists.txt
index ca24dce..08c0e3b 100644
--- a/iree/base/internal/CMakeLists.txt
+++ b/iree/base/internal/CMakeLists.txt
@@ -94,6 +94,19 @@
iree_cc_library(
NAME
+ cpu
+ HDRS
+ "cpu.h"
+ SRCS
+ "cpu.c"
+ DEPS
+ iree::base
+ iree::base::core_headers
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
dynamic_library
HDRS
"dynamic_library.h"
diff --git a/iree/base/internal/cpu.c b/iree/base/internal/cpu.c
new file mode 100644
index 0000000..3e22abc
--- /dev/null
+++ b/iree/base/internal/cpu.c
@@ -0,0 +1,65 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/cpu.h"
+
+#include "iree/base/target_platform.h"
+
+//===----------------------------------------------------------------------===//
+// iree_cpu_*
+//===----------------------------------------------------------------------===//
+
+#if defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_EMSCRIPTEN) || \
+ defined(IREE_PLATFORM_LINUX)
+
+#include <sched.h>
+
+extern __attribute__((weak)) int sched_getcpu(void) {
+ // TODO(benvanik): emulate with syscall/vdso/etc.
+ errno = ENOSYS;
+ return -1;
+}
+
+iree_cpu_processor_id_t iree_cpu_query_processor_id(void) {
+ // This path is relatively portable and should work on linux/bsd/etc-likes.
+ // We may want to use getcpu when available so that we can get the group ID.
+ // https://man7.org/linux/man-pages/man3/sched_getcpu.3.html
+ //
+ // libc implementations can use vDSO and other fun stuff to make this really
+ // cheap: http://git.musl-libc.org/cgit/musl/tree/src/sched/sched_getcpu.c
+ int id = sched_getcpu();
+ return id != -1 ? id : 0;
+}
+
+#elif defined(IREE_PLATFORM_WINDOWS)
+
+iree_cpu_processor_id_t iree_cpu_query_processor_id(void) {
+ PROCESSOR_NUMBER pn;
+ GetCurrentProcessorNumberEx(&pn);
+ return 64 * pn.Group + pn.Number;
+}
+
+#else
+
+// No implementation.
+// We could allow an iree/base/config.h override to externalize this.
+iree_cpu_processor_id_t iree_cpu_query_processor_id(void) { return 0; }
+
+#endif // IREE_PLATFORM_*
+
+void iree_cpu_requery_processor_id(iree_cpu_processor_tag_t* IREE_RESTRICT tag,
+ iree_cpu_processor_id_t* IREE_RESTRICT
+ processor_id) {
+ IREE_ASSERT_ARGUMENT(tag);
+ IREE_ASSERT_ARGUMENT(processor_id);
+
+ // TODO(benvanik): set a frequency for this and use a coarse timer
+ // (CLOCK_MONOTONIC_COARSE) to do a ~4-10Hz refresh. We can store the last
+ // query time and the last processor ID in the tag and only perform the query
+ // if it has changed.
+
+ *processor_id = iree_cpu_query_processor_id();
+}
diff --git a/iree/base/internal/cpu.h b/iree/base/internal/cpu.h
new file mode 100644
index 0000000..914f39d
--- /dev/null
+++ b/iree/base/internal/cpu.h
@@ -0,0 +1,40 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_CPU_H_
+#define IREE_BASE_INTERNAL_CPU_H_
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_cpu_*
+//===----------------------------------------------------------------------===//
+
+typedef uint32_t iree_cpu_processor_id_t;
+typedef uint32_t iree_cpu_processor_tag_t;
+
+// Returns the ID of the logical processor executing this code.
+iree_cpu_processor_id_t iree_cpu_query_processor_id(void);
+
+// Returns the ID of the logical processor executing this code, using |tag| to
+// memoize the query in cases where it does not change frequently.
+// |tag| must be initialized to 0 on first call and may be reset to 0 by the
+// caller at any time to invalidate the cached result.
+void iree_cpu_requery_processor_id(iree_cpu_processor_tag_t* IREE_RESTRICT tag,
+ iree_cpu_processor_id_t* IREE_RESTRICT
+ processor_id);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_BASE_INTERNAL_ARENA_H_
diff --git a/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp b/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
index 94031f7..ac16485 100644
--- a/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
+++ b/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
@@ -63,6 +63,81 @@
// versions in the same compiled output.
class HALDispatchABI {
public:
+ // Matches the field order in iree_hal_processor_v0_t.
+ enum class ProcessorField {
+ data = 0,
+ };
+
+ // Matches IREE_HAL_PROCESSOR_DATA_CAPACITY_V0.
+ static constexpr int ProcessorDataCapacity = 8;
+
+ // Returns a Type representing iree_hal_processor_v0_t.
+ static LLVM::LLVMStructType getProcessorType(
+ MLIRContext *context, LLVMTypeConverter *typeConverter) {
+ auto structType =
+ LLVM::LLVMStructType::getIdentified(context, "iree_hal_processor_v0_t");
+ if (structType.isInitialized()) return structType;
+
+ auto uint64Type = IntegerType::get(context, 64);
+ SmallVector<Type> fieldTypes;
+
+ // uint64_t data[IREE_HAL_PROCESSOR_DATA_CAPACITY_V0];
+ fieldTypes.push_back(
+ LLVM::LLVMArrayType::get(uint64Type, ProcessorDataCapacity));
+
+ LogicalResult bodySet = structType.setBody(fieldTypes, /*isPacked=*/false);
+ assert(succeeded(bodySet) &&
+ "could not set the body of an identified struct");
+ (void)bodySet;
+
+ return structType;
+ }
+
+ // Matches the field order in iree_hal_executable_environment_v0_t.
+ enum class EnvironmentField {
+ constants = 0,
+ import_thunk = 1,
+ imports = 2,
+ processor = 3,
+ };
+
+ // Returns a Type representing iree_hal_executable_environment_v0_t.
+ static LLVM::LLVMStructType getEnvironmentType(
+ MLIRContext *context, LLVMTypeConverter *typeConverter,
+ LLVM::LLVMStructType processorType) {
+ auto structType = LLVM::LLVMStructType::getIdentified(
+ context, "iree_hal_executable_environment_v0_t");
+ if (structType.isInitialized()) return structType;
+
+ auto int8Type = IntegerType::get(context, 8);
+ auto uint32Type = IntegerType::get(context, 32);
+ auto int8PtrType = LLVM::LLVMPointerType::get(int8Type);
+ auto uint32PtrType = LLVM::LLVMPointerType::get(uint32Type);
+ SmallVector<Type, 4> fieldTypes;
+
+ // const uint32_t* constants;
+ fieldTypes.push_back(uint32PtrType);
+
+ // iree_hal_executable_import_thunk_v0_t import_thunk;
+ // const iree_hal_executable_import_v0_t* imports;
+ auto importType = LLVM::LLVMFunctionType::get(uint32Type, int8PtrType);
+ auto importPtrType = LLVM::LLVMPointerType::get(importType);
+ auto importThunkType =
+ LLVM::LLVMFunctionType::get(uint32Type, {importPtrType, int8PtrType});
+ fieldTypes.push_back(LLVM::LLVMPointerType::get(importThunkType));
+ fieldTypes.push_back(LLVM::LLVMPointerType::get(importPtrType));
+
+ // iree_hal_processor_v0_t processor;
+ fieldTypes.push_back(processorType);
+
+ LogicalResult bodySet = structType.setBody(fieldTypes, /*isPacked=*/false);
+ assert(succeeded(bodySet) &&
+ "could not set the body of an identified struct");
+ (void)bodySet;
+
+ return structType;
+ }
+
// Returns a Type representing iree_hal_vec3_t.
static Type getVec3Type(MLIRContext *context) {
auto uint32Type = IntegerType::get(context, 32);
@@ -70,7 +145,7 @@
}
// Matches the field order in iree_hal_executable_dispatch_state_v0_t.
- enum class Field {
+ enum class StateField {
workgroup_count = 0,
workgroup_size = 1,
push_constant_count = 2,
@@ -78,13 +153,14 @@
binding_count = 4,
binding_ptrs = 5,
binding_lengths = 6,
- import_thunk = 7,
- imports = 8,
+ processor_id = 7,
+ environment = 8,
};
// Returns a Type representing iree_hal_executable_dispatch_state_v0_t.
static LLVM::LLVMStructType getDispatchStateType(
- MLIRContext *context, LLVMTypeConverter *typeConverter) {
+ MLIRContext *context, LLVMTypeConverter *typeConverter,
+ LLVM::LLVMStructType environmentType) {
auto structType = LLVM::LLVMStructType::getIdentified(
context, "iree_hal_executable_dispatch_state_v0_t");
if (structType.isInitialized()) return structType;
@@ -114,14 +190,11 @@
fieldTypes.push_back(LLVM::LLVMPointerType::get(int8PtrType));
fieldTypes.push_back(LLVM::LLVMPointerType::get(indexType));
- // iree_hal_executable_import_thunk_v0_t import_thunk;
- // const iree_hal_executable_import_v0_t* imports;
- auto importType = LLVM::LLVMFunctionType::get(uint32Type, int8PtrType);
- auto importPtrType = LLVM::LLVMPointerType::get(importType);
- auto importThunkType =
- LLVM::LLVMFunctionType::get(uint32Type, {importPtrType, int8PtrType});
- fieldTypes.push_back(LLVM::LLVMPointerType::get(importThunkType));
- fieldTypes.push_back(LLVM::LLVMPointerType::get(importPtrType));
+ // uint32_t processor_id;
+ fieldTypes.push_back(uint32Type);
+
+ // const iree_hal_executable_environment_v0_t* environment;
+ fieldTypes.push_back(LLVM::LLVMPointerType::get(environmentType));
LogicalResult bodySet = structType.setBody(fieldTypes, /*isPacked=*/false);
assert(succeeded(bodySet) &&
@@ -136,11 +209,14 @@
// `iree/hal/local/executable_library.h`.
static SmallVector<Type, 5> getInputTypes(MLIRContext *context,
LLVMTypeConverter *typeConverter) {
+ auto dispatchStateType = LLVM::LLVMStructType::getIdentified(
+ context, "iree_hal_executable_dispatch_state_v0_t");
+ assert(dispatchStateType &&
+ "dispatch state type must be defined by ConvertToLLVM");
return SmallVector<Type, 5>{
// const iree_hal_executable_dispatch_state_v0_t* IREE_RESTRICT
// dispatch_state
- LLVM::LLVMPointerType::get(
- getDispatchStateType(context, typeConverter)),
+ LLVM::LLVMPointerType::get(dispatchStateType),
// const iree_hal_vec3_t* IREE_RESTRICT workgroup_id
LLVM::LLVMPointerType::get(getVec3Type(context)),
// void* IREE_RESTRICT local_memory
@@ -152,8 +228,11 @@
LLVMTypeConverter *typeConverter)
: funcOp(funcOp),
typeConverter(typeConverter),
- dispatchStateType(
- getDispatchStateType(funcOp.getContext(), typeConverter)) {}
+ processorType(getProcessorType(funcOp.getContext(), typeConverter)),
+ environmentType(getEnvironmentType(funcOp.getContext(), typeConverter,
+ processorType)),
+ dispatchStateType(getDispatchStateType(
+ funcOp.getContext(), typeConverter, environmentType)) {}
LLVM::LLVMFuncOp getFuncOp() { return funcOp; }
@@ -173,7 +252,7 @@
Value loadWorkgroupCount(Location loc, int32_t dim, Type resultType,
OpBuilder &builder) {
auto workgroupCountValue =
- loadFieldValue(loc, Field::workgroup_count, builder);
+ loadFieldValue(loc, StateField::workgroup_count, builder);
auto dimValue = builder.createOrFold<LLVM::ExtractValueOp>(
loc, builder.getIntegerType(32), workgroupCountValue,
builder.getI64ArrayAttr(dim));
@@ -184,7 +263,7 @@
Value loadWorkgroupSize(Location loc, int32_t dim, Type resultType,
OpBuilder &builder) {
auto workgroupSizeValue =
- loadFieldValue(loc, Field::workgroup_size, builder);
+ loadFieldValue(loc, StateField::workgroup_size, builder);
auto dimValue = builder.createOrFold<LLVM::ExtractValueOp>(
loc, builder.getIntegerType(32), workgroupSizeValue,
builder.getI64ArrayAttr(dim));
@@ -199,7 +278,7 @@
// Returns the total push constant count as an index-converted type.
Value loadPushConstantCount(Location loc, OpBuilder &builder) {
- auto value = loadFieldValue(loc, Field::push_constant_count, builder);
+ auto value = loadFieldValue(loc, StateField::push_constant_count, builder);
return castValueToType(loc, value,
typeConverter->convertType(builder.getIndexType()),
builder);
@@ -209,7 +288,7 @@
Value loadPushConstant(Location loc, int64_t offset, Type resultType,
OpBuilder &builder) {
auto constantsPtrValue =
- loadFieldValue(loc, Field::push_constants, builder);
+ loadFieldValue(loc, StateField::push_constants, builder);
auto offsetValue = getIndexValue(loc, offset, builder);
Value constantPtrValue = builder.create<LLVM::GEPOp>(
loc, constantsPtrValue.getType(), constantsPtrValue, offsetValue);
@@ -219,7 +298,7 @@
// Returns the total binding count as an index-converted type.
Value loadBindingCount(Location loc, OpBuilder &builder) {
- auto value = loadFieldValue(loc, Field::binding_count, builder);
+ auto value = loadFieldValue(loc, StateField::binding_count, builder);
return castValueToType(loc, value,
typeConverter->convertType(builder.getIndexType()),
builder);
@@ -229,7 +308,7 @@
// Equivalent to:
// int8_t** base_ptr = &state->binding_ptrs[ordinal];
Value loadBindingPtr(Location loc, int64_t ordinal, OpBuilder &builder) {
- auto ptrsPtrValue = loadFieldValue(loc, Field::binding_ptrs, builder);
+ auto ptrsPtrValue = loadFieldValue(loc, StateField::binding_ptrs, builder);
auto ordinalValue = getIndexValue(loc, ordinal, builder);
auto elementPtrValue = builder.createOrFold<LLVM::GEPOp>(
loc, ptrsPtrValue.getType(), ptrsPtrValue, ordinalValue);
@@ -238,7 +317,8 @@
// Loads the byte length of the binding |ordinal| as an index-converted type.
Value loadBindingLength(Location loc, int64_t ordinal, OpBuilder &builder) {
- auto lengthsPtrValue = loadFieldValue(loc, Field::binding_lengths, builder);
+ auto lengthsPtrValue =
+ loadFieldValue(loc, StateField::binding_lengths, builder);
auto ordinalValue = getIndexValue(loc, ordinal, builder);
auto elementPtrValue = builder.createOrFold<LLVM::GEPOp>(
loc, lengthsPtrValue.getType(), lengthsPtrValue, ordinalValue);
@@ -313,11 +393,43 @@
}
}
+ // Loads the processor ID the code is (most likely) being run on.
+ // Equivalent to:
+ // uint32_t processor_id = state->processor_id;
+ Value loadProcessorID(Location loc, OpBuilder &builder) {
+ return loadFieldValue(loc, StateField::processor_id, builder);
+ }
+
+ // Loads a processor information data field at the given index.
+ // May be 0 if the field is not available.
+ Value loadProcessorData(Location loc, int64_t index, OpBuilder &builder) {
+ // Load the value; it should always be in bounds.
+ Value dataArrayValue = loadFieldValue(loc, ProcessorField::data, builder);
+ Type elementType =
+ dataArrayValue.getType().cast<LLVM::LLVMArrayType>().getElementType();
+ Value dataValue = builder.create<LLVM::ExtractValueOp>(
+ loc, elementType, dataArrayValue, builder.getI64ArrayAttr(index));
+ return dataValue;
+ }
+
+ // Loads an executable constant at |index| and casts it to |resultType|.
+ Value loadExecutableConstant(Location loc, int64_t index, Type resultType,
+ OpBuilder &builder) {
+ auto constantsPtrValue =
+ loadFieldValue(loc, EnvironmentField::constants, builder);
+ auto indexValue = getIndexValue(loc, index, builder);
+ Value constantPtrValue = builder.create<LLVM::GEPOp>(
+ loc, constantsPtrValue.getType(), constantsPtrValue, indexValue);
+ Value constantValue = builder.create<LLVM::LoadOp>(loc, constantPtrValue);
+ return castValueToType(loc, constantValue, resultType, builder);
+ }
+
// Loads the import function pointer of the import |ordinal|.
// Equivalent to:
// iree_hal_executable_import_v0_t func_ptr = state->imports[ordinal];
Value loadImportFuncPtr(Location loc, int64_t ordinal, OpBuilder &builder) {
- auto importsPtrValue = loadFieldValue(loc, Field::imports, builder);
+ auto importsPtrValue =
+ loadFieldValue(loc, EnvironmentField::imports, builder);
auto ordinalValue = getIndexValue(loc, ordinal, builder);
auto elementPtrValue = builder.createOrFold<LLVM::GEPOp>(
loc, importsPtrValue.getType(), importsPtrValue, ordinalValue);
@@ -343,7 +455,8 @@
// Returns 0 on success and non-zero otherwise.
Value callImport(Location loc, unsigned importOrdinal, Value params,
OpBuilder &builder) {
- auto thunkPtrValue = loadFieldValue(loc, Field::import_thunk, builder);
+ auto thunkPtrValue =
+ loadFieldValue(loc, EnvironmentField::import_thunk, builder);
auto importPtrValue = loadImportFuncPtr(loc, importOrdinal, builder);
auto callOp =
builder.create<LLVM::CallOp>(loc, TypeRange{builder.getI32Type()},
@@ -356,14 +469,33 @@
}
private:
- Value loadFieldValue(Location loc, Field field, OpBuilder &builder) {
- auto statePtrValue = funcOp.getArgument(0);
- auto stateValue = builder.createOrFold<LLVM::LoadOp>(loc, statePtrValue);
- auto fieldType = dispatchStateType.getBody()[(int)field];
+ Value loadFieldValue(Location loc, StateField field, OpBuilder &builder) {
+ Value statePtrValue = funcOp.getArgument(0);
+ Value stateValue = builder.createOrFold<LLVM::LoadOp>(loc, statePtrValue);
+ Type fieldType = dispatchStateType.getBody()[(int)field];
return builder.createOrFold<LLVM::ExtractValueOp>(
loc, fieldType, stateValue, builder.getI64ArrayAttr((int)field));
}
+ Value loadFieldValue(Location loc, EnvironmentField field,
+ OpBuilder &builder) {
+ Value environmentPtrValue =
+ loadFieldValue(loc, StateField::environment, builder);
+ Value environmentValue =
+ builder.create<LLVM::LoadOp>(loc, environmentPtrValue);
+ Type fieldType = environmentType.getBody()[(int)field];
+ return builder.createOrFold<LLVM::ExtractValueOp>(
+ loc, fieldType, environmentValue, builder.getI64ArrayAttr((int)field));
+ }
+
+ Value loadFieldValue(Location loc, ProcessorField field, OpBuilder &builder) {
+ Value processorValue =
+ loadFieldValue(loc, EnvironmentField::processor, builder);
+ Type fieldType = processorType.getBody()[(int)field];
+ return builder.createOrFold<LLVM::ExtractValueOp>(
+ loc, fieldType, processorValue, builder.getI64ArrayAttr((int)field));
+ }
+
Value getIndexValue(Location loc, int64_t value, OpBuilder &builder) {
return builder.createOrFold<LLVM::ConstantOp>(
loc, typeConverter->convertType(builder.getIndexType()),
@@ -379,6 +511,8 @@
LLVM::LLVMFuncOp funcOp;
LLVMTypeConverter *typeConverter;
+ LLVM::LLVMStructType processorType;
+ LLVM::LLVMStructType environmentType;
LLVM::LLVMStructType dispatchStateType;
};
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp
index 19b1565..f92986a 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp
@@ -55,6 +55,16 @@
return type;
}
+// %struct.iree_hal_executable_environment_v0_t = type {
+// ...
+// }
+static llvm::StructType *makeEnvironmentType(llvm::LLVMContext &context) {
+ auto *type = llvm::StructType::getTypeByName(
+ context, "iree_hal_executable_environment_v0_t");
+ assert(type && "environment type must be defined by ConvertToLLVM");
+ return type;
+}
+
// %struct.anon = type { i32, i32, i32 }
// %union.iree_hal_vec3_t = type { %struct.anon }
static llvm::StructType *makeVec3Type(llvm::LLVMContext &context) {
@@ -75,14 +85,7 @@
}
// %struct.iree_hal_executable_dispatch_state_v0_t = type {
-// %union.iree_hal_vec3_t,
-// %union.iree_hal_vec3_t,
-// i64,
-// i32*,
-// i64,
-// i8**,
-// i64*,
-// %struct.iree_hal_executable_import_table_v0_t*
+// ...
// }
static llvm::StructType *makeDispatchStateType(llvm::LLVMContext &context) {
auto *type = llvm::StructType::getTypeByName(
@@ -241,16 +244,16 @@
llvm::Function *LibraryBuilder::build(StringRef queryFuncName) {
auto &context = module->getContext();
auto *i32Type = llvm::IntegerType::getInt32Ty(context);
- auto *ptrType = llvm::Type::getInt8PtrTy(context);
+ auto *environmentType = makeEnvironmentType(context)->getPointerTo();
auto *libraryHeaderType = makeLibraryHeaderType(context);
// %struct.iree_hal_executable_library_header_t**
- // @iree_hal_library_query(i32, void*)
+ // @iree_hal_library_query(i32, %struct.iree_hal_executable_environment_v0_t*)
auto *queryFuncType =
llvm::FunctionType::get(libraryHeaderType->getPointerTo(),
{
i32Type,
- ptrType,
+ environmentType,
},
/*isVarArg=*/false);
auto *func =
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/StaticLibraryGenerator.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/StaticLibraryGenerator.cpp
index 871e5f6..bc6bd70 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/StaticLibraryGenerator.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/StaticLibraryGenerator.cpp
@@ -54,7 +54,8 @@
const std::string &query_function_name) {
os << "const iree_hal_executable_library_header_t**\n"
<< query_function_name << "(\n"
- << "iree_hal_executable_library_version_t max_version, void* reserved);\n";
+ << "iree_hal_executable_library_version_t max_version, const "
+ "iree_hal_executable_environment_v0_t* environment);\n";
}
static void generateSuffix(llvm::raw_ostream &os,
diff --git a/iree/hal/local/BUILD b/iree/hal/local/BUILD
index 1d1b786..9f98151 100644
--- a/iree/hal/local/BUILD
+++ b/iree/hal/local/BUILD
@@ -17,6 +17,19 @@
)
cc_library(
+ name = "executable_environment",
+ srcs = ["executable_environment.c"],
+ hdrs = ["executable_environment.h"],
+ deps = [
+ ":executable_library",
+ "//iree/base",
+ "//iree/base:tracing",
+ "//iree/base/internal:cpu",
+ "//iree/hal",
+ ],
+)
+
+cc_library(
name = "executable_library",
hdrs = ["executable_library.h"],
)
@@ -25,6 +38,7 @@
name = "executable_library_benchmark",
srcs = ["executable_library_benchmark.c"],
deps = [
+ ":executable_environment",
":executable_library",
":local",
"//iree/base",
@@ -44,9 +58,10 @@
"executable_library_test.c",
],
deps = [
+ ":executable_environment",
+ ":executable_library",
"//iree/base",
"//iree/base:core_headers",
- "//iree/hal/local:executable_library",
],
)
@@ -71,6 +86,7 @@
"local_executable_layout.h",
],
deps = [
+ ":executable_environment",
":executable_library",
"//iree/base",
"//iree/base:core_headers",
@@ -145,6 +161,7 @@
"task_semaphore.h",
],
deps = [
+ ":executable_environment",
":executable_library",
":local",
"//iree/base",
diff --git a/iree/hal/local/CMakeLists.txt b/iree/hal/local/CMakeLists.txt
index 0befb9c..ef600b2 100644
--- a/iree/hal/local/CMakeLists.txt
+++ b/iree/hal/local/CMakeLists.txt
@@ -12,6 +12,22 @@
iree_cc_library(
NAME
+ executable_environment
+ HDRS
+ "executable_environment.h"
+ SRCS
+ "executable_environment.c"
+ DEPS
+ ::executable_library
+ iree::base
+ iree::base::internal::cpu
+ iree::base::tracing
+ iree::hal
+ PUBLIC
+)
+
+iree_cc_library(
+ NAME
executable_library
HDRS
"executable_library.h"
@@ -24,6 +40,7 @@
SRCS
"executable_library_benchmark.c"
DEPS
+ ::executable_environment
::executable_library
::local
iree::base
@@ -43,9 +60,10 @@
"executable_library_demo.h"
"executable_library_test.c"
DEPS
+ ::executable_environment
+ ::executable_library
iree::base
iree::base::core_headers
- iree::hal::local::executable_library
)
iree_cc_library(
@@ -68,6 +86,7 @@
"local_executable_cache.c"
"local_executable_layout.c"
DEPS
+ ::executable_environment
::executable_library
iree::base
iree::base::core_headers
@@ -131,6 +150,7 @@
"task_queue_state.c"
"task_semaphore.c"
DEPS
+ ::executable_environment
::executable_library
::local
iree::base
diff --git a/iree/hal/local/elf/BUILD b/iree/hal/local/elf/BUILD
index aacd5f3..a55763b 100644
--- a/iree/hal/local/elf/BUILD
+++ b/iree/hal/local/elf/BUILD
@@ -41,6 +41,7 @@
":elf_module",
"//iree/base",
"//iree/base:core_headers",
+ "//iree/hal/local:executable_environment",
"//iree/hal/local:executable_library",
"//iree/hal/local/elf/testdata:elementwise_mul",
],
diff --git a/iree/hal/local/elf/CMakeLists.txt b/iree/hal/local/elf/CMakeLists.txt
index 1c5e254..1350aa3 100644
--- a/iree/hal/local/elf/CMakeLists.txt
+++ b/iree/hal/local/elf/CMakeLists.txt
@@ -37,6 +37,7 @@
iree::base
iree::base::core_headers
iree::hal::local::elf::testdata::elementwise_mul
+ iree::hal::local::executable_environment
iree::hal::local::executable_library
)
diff --git a/iree/hal/local/elf/elf_module_test_main.c b/iree/hal/local/elf/elf_module_test_main.c
index 8e87150..d420a94 100644
--- a/iree/hal/local/elf/elf_module_test_main.c
+++ b/iree/hal/local/elf/elf_module_test_main.c
@@ -7,6 +7,7 @@
#include "iree/base/api.h"
#include "iree/base/target_platform.h"
#include "iree/hal/local/elf/elf_module.h"
+#include "iree/hal/local/executable_environment.h"
#include "iree/hal/local/executable_library.h"
// ELF modules for various platforms embedded in the binary:
@@ -60,6 +61,10 @@
IREE_RETURN_IF_ERROR(iree_elf_module_initialize_from_memory(
file_data, &import_table, iree_allocator_system(), &module));
+ iree_hal_executable_environment_v0_t environment;
+ iree_hal_executable_environment_initialize(iree_allocator_system(),
+ &environment);
+
void* query_fn_ptr = NULL;
IREE_RETURN_IF_ERROR(iree_elf_module_lookup_export(
&module, IREE_HAL_EXECUTABLE_LIBRARY_EXPORT_NAME, &query_fn_ptr));
@@ -71,7 +76,7 @@
library.header =
(const iree_hal_executable_library_header_t**)iree_elf_call_p_ip(
query_fn_ptr, IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION,
- /*reserved=*/NULL);
+ &environment);
if (library.header == NULL) {
return iree_make_status(IREE_STATUS_NOT_FOUND, "library header is empty");
}
@@ -117,6 +122,8 @@
dispatch_state.binding_count = 1;
dispatch_state.binding_lengths = binding_lengths;
dispatch_state.binding_ptrs = binding_ptrs;
+ dispatch_state.processor_id = iree_cpu_query_processor_id();
+ dispatch_state.environment = &environment;
iree_hal_vec3_t workgroup_id = {{0, 0, 0}};
void* local_memory = NULL;
int ret = iree_elf_call_i_ppp((const void*)library.v0->exports.ptrs[0],
diff --git a/iree/hal/local/executable_environment.c b/iree/hal/local/executable_environment.c
new file mode 100644
index 0000000..cebe4e2
--- /dev/null
+++ b/iree/hal/local/executable_environment.c
@@ -0,0 +1,40 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/executable_environment.h"
+
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// iree_hal_processor_*_t
+//===----------------------------------------------------------------------===//
+
+void iree_hal_processor_query(iree_allocator_t temp_allocator,
+ iree_hal_processor_v0_t* out_processor) {
+ IREE_ASSERT_ARGUMENT(out_processor);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ memset(out_processor, 0, sizeof(*out_processor));
+
+ // TODO(benvanik): define processor features we want to query for each arch.
+ // This needs to be baked into the executable library API and made consistent
+ // with the compiler side producing the executables that access it.
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_environment_*_t
+//===----------------------------------------------------------------------===//
+
+void iree_hal_executable_environment_initialize(
+ iree_allocator_t temp_allocator,
+ iree_hal_executable_environment_v0_t* out_environment) {
+ IREE_ASSERT_ARGUMENT(out_environment);
+ IREE_TRACE_ZONE_BEGIN(z0);
+ memset(out_environment, 0, sizeof(*out_environment));
+ iree_hal_processor_query(temp_allocator, &out_environment->processor);
+ IREE_TRACE_ZONE_END(z0);
+}
diff --git a/iree/hal/local/executable_environment.h b/iree/hal/local/executable_environment.h
new file mode 100644
index 0000000..b4d23ca
--- /dev/null
+++ b/iree/hal/local/executable_environment.h
@@ -0,0 +1,47 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_EXECUTABLE_ENVIRONMENT_H_
+#define IREE_HAL_LOCAL_EXECUTABLE_ENVIRONMENT_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/cpu.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/executable_library.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_hal_processor_*_t
+//===----------------------------------------------------------------------===//
+
+// Queries the current processor information and writes it to |out_processor|.
+// |temp_allocator| may be used for temporary allocations required while
+// querying. If the processor cannot be queried then |out_processor| will be
+// zeroed.
+void iree_hal_processor_query(iree_allocator_t temp_allocator,
+ iree_hal_processor_v0_t* out_processor);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_environment_*_t
+//===----------------------------------------------------------------------===//
+
+// Initializes |out_environment| to the default empty environment.
+// No imports will be available unless overridden during loading.
+// |temp_allocator| may be used for temporary allocations during initialization.
+void iree_hal_executable_environment_initialize(
+ iree_allocator_t temp_allocator,
+ iree_hal_executable_environment_v0_t* out_environment);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_LOCAL_EXECUTABLE_ENVIRONMENT_H_
diff --git a/iree/hal/local/executable_library.h b/iree/hal/local/executable_library.h
index 86c4a4e..bc82857 100644
--- a/iree/hal/local/executable_library.h
+++ b/iree/hal/local/executable_library.h
@@ -8,7 +8,7 @@
#define IREE_HAL_LOCAL_EXECUTABLE_LIBRARY_H_
// NOTE: this file is designed to be a standalone header: it is embedded in the
-// compiler and must not take any dependences on the runtime HAL code.
+// compiler and must not take any dependencies on the runtime HAL code.
// Changes here will require changes to the compiler and must be versioned as if
// this was a schema: backwards-incompatible changes require version bumps or
// the ability to feature-detect at runtime.
@@ -75,6 +75,9 @@
// Versioning and interface querying
//===----------------------------------------------------------------------===//
+typedef struct iree_hal_executable_environment_v0_t
+ iree_hal_executable_environment_v0_t;
+
// Known valid version values.
typedef enum iree_hal_executable_library_version_e {
// iree_hal_executable_library_v0_t is used as the API communication
@@ -110,12 +113,21 @@
} iree_hal_executable_library_header_t;
// Exported function from dynamic libraries for querying library information.
+//
// The provided |max_version| is the maximum version the caller supports;
// callees must return NULL if their lowest available version is greater
// than the max version supported by the caller.
+//
+// The provided |environment| field contains information about the hosting
+// execution environment that the executable may use to specialize its
+// implementation, such as using specific imports or exporting
+// architecture-specific dispatch routines. Some environmental properties may
+// change per-invocation such as the CPU info when performing dispatches on
+// heterogenous processors that may change over the lifetime of the program.
typedef const iree_hal_executable_library_header_t** (
*iree_hal_executable_library_query_fn_t)(
- iree_hal_executable_library_version_t max_version, void* reserved);
+ iree_hal_executable_library_version_t max_version,
+ const iree_hal_executable_environment_v0_t* environment);
// Function name exported from dynamic libraries (pass to dlsym).
#define IREE_HAL_EXECUTABLE_LIBRARY_EXPORT_NAME \
@@ -194,6 +206,57 @@
const char* const* symbols;
} iree_hal_executable_import_table_v0_t;
+// Maximum number of data fields in iree_hal_processor_v0_t.
+#define IREE_HAL_PROCESSOR_DATA_CAPACITY_V0 8
+
+// Architecture-specific CPU information available to executables.
+// This encodes zero or more fields of opaque processor data.
+// The intent is that this structure can be put in .rodata when there are no
+// runtime features that need to be queried.
+//
+// The format of the data is architecture-specific as by construction no value
+// will ever be used in a compiled binary from another architecture. This
+// allows us to simplify this interface as we can't for example load the same
+// executable library for both aarch64 on riscv32 and don't need to normalize
+// any of the fields across them both.
+typedef struct iree_hal_processor_v0_t {
+ // Opaque architecture-specific encoding in 64-bit words.
+ // This may represent a fixed-length data structure, a series of hardware
+ // registers, or key-value pairs.
+ //
+ // The contents are opaque here as to support out-of-tree architectures. The
+ // runtime code deriving the identifier/flags and providing it here is losely
+ // coupled with the compiler code emitting checks based on the identifier and
+ // only those two places ever need to change.
+ uint64_t data[IREE_HAL_PROCESSOR_DATA_CAPACITY_V0];
+} iree_hal_processor_v0_t;
+static_assert(sizeof(iree_hal_processor_v0_t) % sizeof(uint64_t) == 0,
+ "8-byte alignment required");
+
+// Defines the environment in which the executable is being used.
+// Executables only have access to the information in this structure and must
+// make all decisions based on it; this ensures executables are portable across
+// operating environments (Linux, Mac, bare-metal, web, etc) by not having
+// platform-specific syscalls and register query emulation.
+typedef struct iree_hal_executable_environment_v0_t {
+ // Specialization constants available to the executable, if any.
+ // Contains as many as declared in the library header.
+ const uint32_t* constants;
+
+ // Thunk function for calling imports. All calls must be made through this.
+ iree_hal_executable_import_thunk_v0_t import_thunk;
+ // Optional imported functions available for use within the executable.
+ // Contains one entry per imported function. If an import was marked as weak
+ // then the corresponding entry may be NULL.
+ const iree_hal_executable_import_v0_t* imports;
+
+ // Optional architecture-specific CPU information.
+ // In heterogenous processors this may represent any of the subarchitecture
+ // types as it is derived from the core the calling thread is scheduled on.
+ // Will be all zeros if unavailable.
+ iree_hal_processor_v0_t processor;
+} iree_hal_executable_environment_v0_t;
+
typedef union iree_hal_vec3_t {
struct {
uint32_t x;
@@ -227,12 +290,12 @@
// The length of each binding in bytes, 1:1 with |binding_ptrs|.
const size_t* binding_lengths;
- // Thunk function for calling imports. All calls must be made through this.
- iree_hal_executable_import_thunk_v0_t import_thunk;
- // Optional imported functions available for use within the executable.
- // Contains one entry per imported function. If an import was marked as weak
- // then the corresponding entry may be NULL.
- const iree_hal_executable_import_v0_t* imports;
+ // Logical processor identifier used to index into processor info fields.
+ // Depending on the implementation this may be an ordinal, a bitfield, or an
+ // opaque unique identifier.
+ uint32_t processor_id;
+ // Optional executable environment information.
+ const iree_hal_executable_environment_v0_t* environment;
} iree_hal_executable_dispatch_state_v0_t;
// Function signature of exported executable entry points.
diff --git a/iree/hal/local/executable_library_benchmark.c b/iree/hal/local/executable_library_benchmark.c
index 1002f4a..aecb746 100644
--- a/iree/hal/local/executable_library_benchmark.c
+++ b/iree/hal/local/executable_library_benchmark.c
@@ -300,8 +300,7 @@
.binding_count = dispatch_params.binding_count,
.binding_ptrs = binding_ptrs,
.binding_lengths = binding_lengths,
- .import_thunk = NULL, // not yet implemented
- .imports = NULL, // not yet implemented
+ .environment = &local_executable->environment,
};
// Execute benchmark the workgroup invocation.
diff --git a/iree/hal/local/executable_library_demo.c b/iree/hal/local/executable_library_demo.c
index baa9cd7..eee0c20 100644
--- a/iree/hal/local/executable_library_demo.c
+++ b/iree/hal/local/executable_library_demo.c
@@ -105,7 +105,8 @@
// example, an executable may want to swap out a few entry points to an
// architecture-specific version.
const iree_hal_executable_library_header_t** demo_executable_library_query(
- iree_hal_executable_library_version_t max_version, void* reserved) {
+ iree_hal_executable_library_version_t max_version,
+ const iree_hal_executable_environment_v0_t* environment) {
return max_version <= 0
? (const iree_hal_executable_library_header_t**)&library
: NULL;
diff --git a/iree/hal/local/executable_library_demo.h b/iree/hal/local/executable_library_demo.h
index ce99ab1..f458768 100644
--- a/iree/hal/local/executable_library_demo.h
+++ b/iree/hal/local/executable_library_demo.h
@@ -43,7 +43,8 @@
// bindings: 0
//
const iree_hal_executable_library_header_t** demo_executable_library_query(
- iree_hal_executable_library_version_t max_version, void* reserved);
+ iree_hal_executable_library_version_t max_version,
+ const iree_hal_executable_environment_v0_t* environment);
#ifdef __cplusplus
} // extern "C"
diff --git a/iree/hal/local/executable_library_test.c b/iree/hal/local/executable_library_test.c
index 8c3cf4a..e15e94a 100644
--- a/iree/hal/local/executable_library_test.c
+++ b/iree/hal/local/executable_library_test.c
@@ -10,6 +10,7 @@
#include <string.h>
#include "iree/base/api.h"
+#include "iree/hal/local/executable_environment.h"
#include "iree/hal/local/executable_library_demo.h"
// Demonstration of the HAL-side of the iree_hal_executable_library_t ABI.
@@ -27,6 +28,11 @@
//
// See iree/hal/local/executable_library.h for more information.
int main(int argc, char** argv) {
+ // Default environment.
+ iree_hal_executable_environment_v0_t environment;
+ iree_hal_executable_environment_initialize(iree_allocator_system(),
+ &environment);
+
// Query the library header at the requested version.
// The query call in this example is going into the handwritten demo code
// but could be targeted at generated files or runtime-loaded shared objects.
@@ -35,7 +41,7 @@
const iree_hal_executable_library_v0_t* v0;
} library;
library.header = demo_executable_library_query(
- IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION, /*reserved=*/NULL);
+ IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION, &environment);
const iree_hal_executable_library_header_t* header = *library.header;
IREE_ASSERT_NE(header, NULL, "version may not have matched");
IREE_ASSERT_LE(
@@ -82,8 +88,8 @@
.binding_count = IREE_ARRAYSIZE(binding_ptrs),
.binding_ptrs = binding_ptrs,
.binding_lengths = binding_lengths,
- .import_thunk = NULL, // not yet implemented
- .imports = NULL, // not yet implemented
+ .processor_id = iree_cpu_query_processor_id(),
+ .environment = &environment,
};
for (uint32_t z = 0; z < dispatch_state.workgroup_count.z; ++z) {
for (uint32_t y = 0; y < dispatch_state.workgroup_count.y; ++y) {
diff --git a/iree/hal/local/executable_loader.c b/iree/hal/local/executable_loader.c
index 1630b0c..0d90ec1 100644
--- a/iree/hal/local/executable_loader.c
+++ b/iree/hal/local/executable_loader.c
@@ -6,8 +6,6 @@
#include "iree/hal/local/executable_loader.h"
-#include "iree/base/api.h"
-
iree_status_t iree_hal_executable_import_provider_resolve(
const iree_hal_executable_import_provider_t import_provider,
iree_string_view_t symbol_name, void** out_fn_ptr) {
diff --git a/iree/hal/local/executable_loader.h b/iree/hal/local/executable_loader.h
index b370c06..6092948 100644
--- a/iree/hal/local/executable_loader.h
+++ b/iree/hal/local/executable_loader.h
@@ -107,12 +107,12 @@
iree_hal_executable_caching_mode_t caching_mode,
iree_string_view_t executable_format);
-// Tries loading the |executable_data| provided in the given
-// |executable_format|. May fail even if the executable is valid if it requires
-// features not supported by the current host or runtime (such as available
-// architectures, imports, etc).
+// Tries loading the executable data provided in the given format.
+// May fail even if the executable is valid if it requires features not
+// supported by the current host or runtime (such as available architectures,
+// imports, etc).
//
-// Depending on loader ability the |caching_mode| is used to enable certain
+// Depending on loader ability the caching_mode is used to enable certain
// features such as instrumented profiling. Not all formats support these
// features and cooperation of both the compiler producing the executables and
// the runtime loader and system are required.
diff --git a/iree/hal/local/inline_command_buffer.c b/iree/hal/local/inline_command_buffer.c
index 07d95c5..a64ef51 100644
--- a/iree/hal/local/inline_command_buffer.c
+++ b/iree/hal/local/inline_command_buffer.c
@@ -14,6 +14,7 @@
#include "iree/base/internal/fpu_state.h"
#include "iree/base/internal/math.h"
#include "iree/base/tracing.h"
+#include "iree/hal/local/executable_environment.h"
#include "iree/hal/local/executable_library.h"
#include "iree/hal/local/local_descriptor_set_layout.h"
#include "iree/hal/local/local_executable.h"
@@ -56,6 +57,9 @@
// Individual dispatches must populate the dynamically changing fields like
// push_constant_count and binding_count.
iree_hal_executable_dispatch_state_v0_t dispatch_state;
+
+ // An opaque tag used to reduce the cost of processor ID queries.
+ iree_cpu_processor_tag_t processor_tag;
} state;
} iree_hal_inline_command_buffer_t;
@@ -153,11 +157,23 @@
static iree_status_t iree_hal_inline_command_buffer_flush_tasks(
iree_hal_inline_command_buffer_t* command_buffer);
+// Updates the cached processor ID field in the command buffer.
+static void iree_hal_inline_command_buffer_update_processor_id(
+ iree_hal_inline_command_buffer_t* command_buffer) {
+ iree_cpu_requery_processor_id(
+ &command_buffer->state.processor_tag,
+ &command_buffer->state.dispatch_state.processor_id);
+}
+
static iree_status_t iree_hal_inline_command_buffer_begin(
iree_hal_command_buffer_t* base_command_buffer) {
iree_hal_inline_command_buffer_t* command_buffer =
iree_hal_inline_command_buffer_cast(base_command_buffer);
iree_hal_inline_command_buffer_reset(command_buffer);
+
+ // Query the processor ID we start out on. We may update it during execution.
+ iree_hal_inline_command_buffer_update_processor_id(command_buffer);
+
return iree_ok_status();
}
@@ -395,10 +411,16 @@
IREE_HAL_WORKGROUP_LOCAL_MEMORY_PAGE_SIZE
: 0;
+ // Update the ID of the processor we are running on.
+ // We don't know how much time has passed since we last updated as we are
+ // running inline with the user program; if we knew we were going to be
+ // handling a batch of dispatches we could reduce the amount of times we call
+ // this - but that's what the task system is for.
+ iree_hal_inline_command_buffer_update_processor_id(command_buffer);
+
iree_hal_executable_dispatch_state_v0_t* dispatch_state =
&command_buffer->state.dispatch_state;
- dispatch_state->import_thunk = local_executable->import_thunk;
- dispatch_state->imports = local_executable->imports;
+ dispatch_state->environment = &local_executable->environment;
// TODO(benvanik): expose on API or keep fixed on executable.
dispatch_state->workgroup_size.x = 1;
diff --git a/iree/hal/local/loaders/BUILD b/iree/hal/local/loaders/BUILD
index 5ed7cb3..a780c38 100644
--- a/iree/hal/local/loaders/BUILD
+++ b/iree/hal/local/loaders/BUILD
@@ -45,6 +45,7 @@
"//iree/base:tracing",
"//iree/hal",
"//iree/hal/local",
+ "//iree/hal/local:executable_environment",
"//iree/hal/local:executable_library",
],
)
diff --git a/iree/hal/local/loaders/CMakeLists.txt b/iree/hal/local/loaders/CMakeLists.txt
index 49b3214..a2a1c3d 100644
--- a/iree/hal/local/loaders/CMakeLists.txt
+++ b/iree/hal/local/loaders/CMakeLists.txt
@@ -42,6 +42,7 @@
iree::base::tracing
iree::hal
iree::hal::local
+ iree::hal::local::executable_environment
iree::hal::local::executable_library
DEFINES
"IREE_HAL_HAVE_STATIC_LIBRARY_LOADER=1"
diff --git a/iree/hal/local/loaders/embedded_library_loader.c b/iree/hal/local/loaders/embedded_library_loader.c
index 689a0ad..bdffc70 100644
--- a/iree/hal/local/loaders/embedded_library_loader.c
+++ b/iree/hal/local/loaders/embedded_library_loader.c
@@ -53,7 +53,7 @@
executable->library.header =
(const iree_hal_executable_library_header_t**)iree_elf_call_p_ip(
query_fn, IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION,
- /*reserved=*/NULL);
+ &executable->base.environment);
if (!executable->library.header) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
@@ -98,15 +98,16 @@
// All calls from the loaded ELF route through our thunk function so that we
// can adapt to ABI differences.
- executable->base.import_thunk =
+ executable->base.environment.import_thunk =
(iree_hal_executable_import_thunk_v0_t)iree_elf_thunk_i_p;
// Allocate storage for the imports.
IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0, iree_allocator_malloc(
- executable->base.host_allocator,
- import_table->count * sizeof(*executable->base.imports),
- (void**)&executable->base.imports));
+ z0,
+ iree_allocator_malloc(
+ executable->base.host_allocator,
+ import_table->count * sizeof(*executable->base.environment.imports),
+ (void**)&executable->base.environment.imports));
// Try to resolve each import.
// NOTE: imports are sorted alphabetically and if we cared we could use this
@@ -117,7 +118,7 @@
z0,
iree_hal_executable_import_provider_resolve(
import_provider, iree_make_cstring_view(import_table->symbols[i]),
- (void**)&executable->base.imports[i]));
+ (void**)&executable->base.environment.imports[i]));
}
IREE_TRACE_ZONE_END(z0);
@@ -198,8 +199,9 @@
iree_elf_module_deinitialize(&executable->module);
- if (executable->base.imports != NULL) {
- iree_allocator_free(host_allocator, (void*)executable->base.imports);
+ if (executable->base.environment.imports != NULL) {
+ iree_allocator_free(host_allocator,
+ (void*)executable->base.environment.imports);
}
iree_hal_local_executable_deinitialize(
diff --git a/iree/hal/local/loaders/static_library_loader.c b/iree/hal/local/loaders/static_library_loader.c
index 1e7819c..34f934b 100644
--- a/iree/hal/local/loaders/static_library_loader.c
+++ b/iree/hal/local/loaders/static_library_loader.c
@@ -13,6 +13,7 @@
#include "iree/base/tracing.h"
#include "iree/hal/api.h"
+#include "iree/hal/local/executable_environment.h"
#include "iree/hal/local/local_executable.h"
#include "iree/hal/local/local_executable_layout.h"
@@ -166,30 +167,15 @@
iree_status_t iree_hal_static_library_loader_create(
iree_host_size_t library_count,
- const iree_hal_executable_library_header_t** const* libraries,
+ const iree_hal_executable_library_query_fn_t* library_query_fns,
iree_hal_executable_import_provider_t import_provider,
iree_allocator_t host_allocator,
iree_hal_executable_loader_t** out_executable_loader) {
+ IREE_ASSERT_ARGUMENT(!library_count || library_query_fns);
IREE_ASSERT_ARGUMENT(out_executable_loader);
*out_executable_loader = NULL;
IREE_TRACE_ZONE_BEGIN(z0);
- // Verify the libraries provided all match our expected version.
- // It's rare they won't, however static libraries generated with a newer
- // version of the IREE compiler that are then linked with an older version of
- // the runtime are difficult to spot otherwise.
- for (iree_host_size_t i = 0; i < library_count; ++i) {
- const iree_hal_executable_library_header_t* header = *libraries[i];
- if (header->version > IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION) {
- IREE_TRACE_ZONE_END(z0);
- return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
- "executable does not support this version of the "
- "runtime (executable: %d, runtime: %d)",
- header->version,
- IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION);
- }
- }
-
iree_hal_static_library_loader_t* executable_loader = NULL;
iree_host_size_t total_size =
sizeof(*executable_loader) +
@@ -202,9 +188,45 @@
&executable_loader->base);
executable_loader->host_allocator = host_allocator;
executable_loader->library_count = library_count;
- memcpy((void*)executable_loader->libraries, libraries,
- sizeof(libraries[0]) * library_count);
+
+ // Default environment to enable initialization.
+ iree_hal_executable_environment_v0_t environment;
+ iree_hal_executable_environment_initialize(host_allocator, &environment);
+
+ // Query and verify the libraries provided all match our expected version.
+ // It's rare they won't, however static libraries generated with a newer
+ // version of the IREE compiler that are then linked with an older version
+ // of the runtime are difficult to spot otherwise.
+ for (iree_host_size_t i = 0; i < library_count; ++i) {
+ const iree_hal_executable_library_header_t* const* header_ptr =
+ library_query_fns[i](IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION,
+ &environment);
+ if (!header_ptr) {
+ status = iree_make_status(
+ IREE_STATUS_UNAVAILABLE,
+ "failed to query library header for runtime version %d",
+ IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION);
+ break;
+ }
+ const iree_hal_executable_library_header_t* header = *header_ptr;
+ IREE_TRACE_ZONE_APPEND_TEXT(z0, header->name);
+ if (header->version > IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION) {
+ status = iree_make_status(
+ IREE_STATUS_FAILED_PRECONDITION,
+ "executable does not support this version of the "
+ "runtime (executable: %d, runtime: %d)",
+ header->version, IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION);
+ break;
+ }
+ memcpy((void*)&executable_loader->libraries[i], &header_ptr,
+ sizeof(header_ptr));
+ }
+ }
+
+ if (iree_status_is_ok(status)) {
*out_executable_loader = (iree_hal_executable_loader_t*)executable_loader;
+ } else {
+ iree_allocator_free(host_allocator, executable_loader);
}
IREE_TRACE_ZONE_END(z0);
diff --git a/iree/hal/local/loaders/static_library_loader.h b/iree/hal/local/loaders/static_library_loader.h
index 36487d8..12648e9 100644
--- a/iree/hal/local/loaders/static_library_loader.h
+++ b/iree/hal/local/loaders/static_library_loader.h
@@ -35,7 +35,7 @@
// within and across loaders will result in undefined behavior.
iree_status_t iree_hal_static_library_loader_create(
iree_host_size_t library_count,
- const iree_hal_executable_library_header_t** const* libraries,
+ const iree_hal_executable_library_query_fn_t* library_query_fns,
iree_hal_executable_import_provider_t import_provider,
iree_allocator_t host_allocator,
iree_hal_executable_loader_t** out_executable_loader);
diff --git a/iree/hal/local/loaders/system_library_loader.c b/iree/hal/local/loaders/system_library_loader.c
index d327eaa..f77f432 100644
--- a/iree/hal/local/loaders/system_library_loader.c
+++ b/iree/hal/local/loaders/system_library_loader.c
@@ -147,7 +147,8 @@
// Query for a compatible version of the library.
executable->library.header =
- query_fn(IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION, /*reserved=*/NULL);
+ query_fn(IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION,
+ &executable->base.environment);
if (!executable->library.header) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
@@ -209,14 +210,16 @@
IREE_TRACE_ZONE_BEGIN(z0);
// Pass all imports right through.
- executable->base.import_thunk = iree_hal_system_executable_import_thunk_v0;
+ executable->base.environment.import_thunk =
+ iree_hal_system_executable_import_thunk_v0;
// Allocate storage for the imports.
IREE_RETURN_AND_END_ZONE_IF_ERROR(
- z0, iree_allocator_malloc(
- executable->base.host_allocator,
- import_table->count * sizeof(*executable->base.imports),
- (void**)&executable->base.imports));
+ z0,
+ iree_allocator_malloc(
+ executable->base.host_allocator,
+ import_table->count * sizeof(*executable->base.environment.imports),
+ (void**)&executable->base.environment.imports));
// Try to resolve each import.
// NOTE: imports are sorted alphabetically and if we cared we could use this
@@ -227,7 +230,7 @@
z0,
iree_hal_executable_import_provider_resolve(
import_provider, iree_make_cstring_view(import_table->symbols[i]),
- (void**)&executable->base.imports[i]));
+ (void**)&executable->base.environment.imports[i]));
}
IREE_TRACE_ZONE_END(z0);
@@ -302,6 +305,11 @@
iree_dynamic_library_release(executable->handle);
+ if (executable->base.environment.imports != NULL) {
+ iree_allocator_free(host_allocator,
+ (void*)executable->base.environment.imports);
+ }
+
iree_hal_local_executable_deinitialize(
(iree_hal_local_executable_t*)base_executable);
iree_allocator_free(host_allocator, executable);
diff --git a/iree/hal/local/local_executable.c b/iree/hal/local/local_executable.c
index c98cbc2..cdcf828 100644
--- a/iree/hal/local/local_executable.c
+++ b/iree/hal/local/local_executable.c
@@ -7,6 +7,7 @@
#include "iree/hal/local/local_executable.h"
#include "iree/base/tracing.h"
+#include "iree/hal/local/executable_environment.h"
void iree_hal_local_executable_initialize(
const iree_hal_local_executable_vtable_t* vtable,
@@ -29,9 +30,9 @@
// Function attributes are optional and populated by the parent type.
out_base_executable->dispatch_attrs = NULL;
- // Imports will be provided by the parent type, if needed.
- out_base_executable->import_thunk = NULL;
- out_base_executable->imports = NULL;
+ // Default environment with no imports assigned.
+ iree_hal_executable_environment_initialize(host_allocator,
+ &out_base_executable->environment);
}
void iree_hal_local_executable_deinitialize(
diff --git a/iree/hal/local/local_executable.h b/iree/hal/local/local_executable.h
index c55c867..7993295 100644
--- a/iree/hal/local/local_executable.h
+++ b/iree/hal/local/local_executable.h
@@ -28,12 +28,8 @@
// of memory required by the function.
const iree_hal_executable_dispatch_attrs_v0_t* dispatch_attrs;
- // Thunk function for calling imports. All calls must be made through this.
- iree_hal_executable_import_thunk_v0_t import_thunk;
- // Optional imported functions available for use within the executable.
- // Contains one entry per imported function. If an import was marked as weak
- // then the corresponding entry may be NULL.
- const iree_hal_executable_import_v0_t* imports;
+ // Execution environment.
+ iree_hal_executable_environment_v0_t environment;
} iree_hal_local_executable_t;
typedef struct iree_hal_local_executable_vtable_t {
diff --git a/iree/hal/local/task_command_buffer.c b/iree/hal/local/task_command_buffer.c
index 010a480..63efb51 100644
--- a/iree/hal/local/task_command_buffer.c
+++ b/iree/hal/local/task_command_buffer.c
@@ -13,6 +13,7 @@
#include "iree/base/api.h"
#include "iree/base/tracing.h"
+#include "iree/hal/local/executable_environment.h"
#include "iree/hal/local/executable_library.h"
#include "iree/hal/local/local_descriptor_set_layout.h"
#include "iree/hal/local/local_executable.h"
@@ -838,11 +839,8 @@
state.binding_lengths = (size_t*)cmd_ptr;
cmd_ptr += cmd->binding_count * sizeof(*state.binding_lengths);
- // When we support imports we can populate those here based on what the
- // executable declared (as each executable may import a unique set of
- // functions).
- state.import_thunk = cmd->executable->import_thunk;
- state.imports = cmd->executable->imports;
+ state.processor_id = tile_context->processor_id;
+ state.environment = &cmd->executable->environment;
iree_status_t status = iree_hal_local_executable_issue_call(
cmd->executable, cmd->ordinal, &state,
diff --git a/iree/samples/static_library/static_library_demo.c b/iree/samples/static_library/static_library_demo.c
index 8e1aeec..d787ab3 100644
--- a/iree/samples/static_library/static_library_demo.c
+++ b/iree/samples/static_library/static_library_demo.c
@@ -14,7 +14,8 @@
extern const iree_hal_executable_library_header_t**
simple_mul_dispatch_0_library_query(
- iree_hal_executable_library_version_t max_version, void* reserved);
+ iree_hal_executable_library_version_t max_version,
+ const iree_hal_executable_environment_v0_t* environment);
// A function to create the bytecode or C module.
extern iree_status_t create_module(iree_vm_module_t** module);
@@ -25,25 +26,19 @@
// released by the caller.
iree_status_t create_device_with_static_loader(iree_allocator_t host_allocator,
iree_hal_device_t** out_device) {
- iree_status_t status = iree_ok_status();
-
// Set paramters for the device created in the next step.
iree_hal_sync_device_params_t params;
iree_hal_sync_device_params_initialize(¶ms);
- // Load the statically embedded library
- const iree_hal_executable_library_header_t** static_library =
- simple_mul_dispatch_0_library_query(
- IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION, /*reserved=*/NULL);
- const iree_hal_executable_library_header_t** libraries[1] = {static_library};
-
+ // Register the statically linked executable library.
+ const iree_hal_executable_library_query_fn_t libraries[] = {
+ simple_mul_dispatch_0_library_query,
+ };
iree_hal_executable_loader_t* library_loader = NULL;
- if (iree_status_is_ok(status)) {
- status = iree_hal_static_library_loader_create(
- IREE_ARRAYSIZE(libraries), libraries,
- iree_hal_executable_import_provider_null(), host_allocator,
- &library_loader);
- }
+ iree_status_t status = iree_hal_static_library_loader_create(
+ IREE_ARRAYSIZE(libraries), libraries,
+ iree_hal_executable_import_provider_null(), host_allocator,
+ &library_loader);
// Use the default host allocator for buffer allocations.
iree_string_view_t identifier = iree_make_cstring_view("sync");
diff --git a/iree/task/BUILD b/iree/task/BUILD
index af417f5..1352b69 100644
--- a/iree/task/BUILD
+++ b/iree/task/BUILD
@@ -73,6 +73,7 @@
"//iree/base:tracing",
"//iree/base/internal",
"//iree/base/internal:atomic_slist",
+ "//iree/base/internal:cpu",
"//iree/base/internal:event_pool",
"//iree/base/internal:fpu_state",
"//iree/base/internal:prng",
diff --git a/iree/task/CMakeLists.txt b/iree/task/CMakeLists.txt
index bdddd8f..1d39c33 100644
--- a/iree/task/CMakeLists.txt
+++ b/iree/task/CMakeLists.txt
@@ -68,6 +68,7 @@
iree::base::core_headers
iree::base::internal
iree::base::internal::atomic_slist
+ iree::base::internal::cpu
iree::base::internal::event_pool
iree::base::internal::fpu_state
iree::base::internal::prng
diff --git a/iree/task/task.c b/iree/task/task.c
index f11bedc..afab9eb 100644
--- a/iree/task/task.c
+++ b/iree/task/task.c
@@ -706,7 +706,8 @@
}
void iree_task_dispatch_shard_execute(
- iree_task_dispatch_shard_t* task, iree_byte_span_t worker_local_memory,
+ iree_task_dispatch_shard_t* task, iree_cpu_processor_id_t processor_id,
+ iree_byte_span_t worker_local_memory,
iree_task_submission_t* pending_submission) {
IREE_TRACE_ZONE_BEGIN(z0);
@@ -751,6 +752,9 @@
memset(&shard_statistics, 0, sizeof(shard_statistics));
tile_context.statistics = &shard_statistics;
+ // Hint as to which processor we are running on.
+ tile_context.processor_id = processor_id;
+
// Loop over all tiles until they are all processed.
const uint32_t tile_count = dispatch_task->tile_count;
const uint32_t tiles_per_reservation = dispatch_task->tiles_per_reservation;
diff --git a/iree/task/task.h b/iree/task/task.h
index 0dc3958..9ae2290 100644
--- a/iree/task/task.h
+++ b/iree/task/task.h
@@ -14,6 +14,7 @@
#include "iree/base/api.h"
#include "iree/base/internal/atomic_slist.h"
#include "iree/base/internal/atomics.h"
+#include "iree/base/internal/cpu.h"
#include "iree/base/internal/synchronization.h"
#include "iree/task/affinity_set.h"
@@ -539,8 +540,9 @@
// Shared statistics counters for the dispatch shard.
iree_task_dispatch_statistics_t* statistics;
- // TODO(benvanik): cpuid uarch.
- // TODO(benvanik): per-tile coroutine storage.
+ // Opaque ID of the processor executing the tile.
+ // May be slightly out of date or 0 if the processor could not be queried.
+ iree_cpu_processor_id_t processor_id;
} iree_task_tile_context_t;
typedef struct iree_task_dispatch_t iree_task_dispatch_t;
diff --git a/iree/task/task_impl.h b/iree/task/task_impl.h
index afb9ac8..ee1b5a3 100644
--- a/iree/task/task_impl.h
+++ b/iree/task/task_impl.h
@@ -111,13 +111,18 @@
// May block the caller for an indeterminate amount of time and should only be
// called from threads owned by or donated to the executor.
//
+// |processor_id| is a guess as to which logical processor the shard is
+// executing on. It may be out of date or 0 if the processor could not be
+// queried.
+//
// |worker_local_memory| is a block of memory exclusively available to the shard
// during execution. Contents are undefined both before and after execution.
//
// Errors are propagated to the parent scope and the dispatch will fail once
// all shards have completed.
void iree_task_dispatch_shard_execute(
- iree_task_dispatch_shard_t* task, iree_byte_span_t worker_local_memory,
+ iree_task_dispatch_shard_t* task, iree_cpu_processor_id_t processor_id,
+ iree_byte_span_t worker_local_memory,
iree_task_submission_t* pending_submission);
#ifdef __cplusplus
diff --git a/iree/task/worker.c b/iree/task/worker.c
index a367043..e0c4ea6 100644
--- a/iree/task/worker.c
+++ b/iree/task/worker.c
@@ -37,6 +37,8 @@
iree_prng_minilcg128_initialize(iree_prng_splitmix64_next(seed_prng),
&out_worker->theft_prng);
out_worker->local_memory = local_memory;
+ out_worker->processor_id = 0;
+ out_worker->processor_tag = 0;
iree_task_worker_state_t initial_state = IREE_TASK_WORKER_STATE_RUNNING;
if (executor->scheduling_mode &
@@ -186,9 +188,9 @@
break;
}
case IREE_TASK_TYPE_DISPATCH_SHARD: {
- iree_task_dispatch_shard_execute((iree_task_dispatch_shard_t*)task,
- worker->local_memory,
- pending_submission);
+ iree_task_dispatch_shard_execute(
+ (iree_task_dispatch_shard_t*)task, worker->processor_id,
+ worker->local_memory, pending_submission);
break;
}
default:
@@ -251,10 +253,20 @@
return true; // try again
}
+// Updates the cached processor ID field in the worker.
+static void iree_task_worker_update_processor_id(iree_task_worker_t* worker) {
+ iree_cpu_requery_processor_id(&worker->processor_tag, &worker->processor_id);
+}
+
// Alternates between pumping ready tasks in the worker queue and waiting
// for more tasks to arrive. Only returns when the worker has been asked by
// the executor to exit.
static void iree_task_worker_pump_until_exit(iree_task_worker_t* worker) {
+ // Initial processor ID assignment. We normally refresh this upon waking from
+ // a wait but it's possible that there's already work pending and we want to
+ // be able to process it with the proper processor ID immediately.
+ iree_task_worker_update_processor_id(worker);
+
// Pump the thread loop to process more tasks.
while (true) {
// If we fail to find any work to do we'll wait at the end of this loop.
@@ -277,6 +289,9 @@
break;
}
+ // TODO(benvanik): we could try to update the processor ID here before we
+ // begin a new batch of work - assuming it's not too expensive.
+
iree_task_submission_t pending_submission;
iree_task_submission_initialize(&pending_submission);
@@ -321,6 +336,10 @@
iree_notification_commit_wait(&worker->wake_notification, wait_token,
IREE_TIME_INFINITE_FUTURE);
IREE_TRACE_ZONE_END(z_wait);
+
+ // Woke from a wait - query the processor ID in case we migrated during
+ // the sleep.
+ iree_task_worker_update_processor_id(worker);
}
// Wait completed.
diff --git a/iree/task/worker.h b/iree/task/worker.h
index 1cc2d0f..6e6c435 100644
--- a/iree/task/worker.h
+++ b/iree/task/worker.h
@@ -110,6 +110,15 @@
// remain valid so that the executor can query its state.
iree_thread_t* thread;
+ // Guess at the current processor ID.
+ // This is updated infrequently as it can be semi-expensive to determine
+ // (on some platforms at least 1 syscall involved). We always update it upon
+ // waking as idle waits are the most likely place the worker will be migrated
+ // across processors.
+ iree_cpu_processor_id_t processor_id;
+ // An opaque tag used to reduce the cost of processor ID queries.
+ iree_cpu_processor_tag_t processor_tag;
+
// Destructive interference padding between the mailbox and local task queue
// to ensure that the worker - who is pounding on local_task_queue - doesn't
// contend with submissions or coordinators dropping new tasks in the mailbox.