Adding an executable environment and plumbing through processor info.
This adds an iree_hal_executable_environment_*_t struct to wrap up
the existing import table, a new processor info struct, and a new
reserved spot for specialization constants.
diff --git a/experimental/web/sample_static/device_multithreaded.c b/experimental/web/sample_static/device_multithreaded.c
index d4ffab5..bde6cc2 100644
--- a/experimental/web/sample_static/device_multithreaded.c
+++ b/experimental/web/sample_static/device_multithreaded.c
@@ -16,13 +16,10 @@
   iree_hal_task_device_params_t params;
   iree_hal_task_device_params_initialize(&params);
 
-  // Load the statically embedded library.
-  const iree_hal_executable_library_header_t** static_library =
-      mnist_linked_llvm_library_query(
-          IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION,
-          /*reserved=*/NULL);
-  const iree_hal_executable_library_header_t** libraries[1] = {static_library};
-
+  // Register the statically linked executable library.
+  const iree_hal_executable_library_query_fn_t* libraries[] = {
+      mnist_linked_llvm_library_query,
+  };
   iree_hal_executable_loader_t* library_loader = NULL;
   iree_status_t status = iree_hal_static_library_loader_create(
       IREE_ARRAYSIZE(libraries), libraries,
diff --git a/experimental/web/sample_static/device_sync.c b/experimental/web/sample_static/device_sync.c
index 82e291d..082c730 100644
--- a/experimental/web/sample_static/device_sync.c
+++ b/experimental/web/sample_static/device_sync.c
@@ -13,13 +13,10 @@
   iree_hal_sync_device_params_t params;
   iree_hal_sync_device_params_initialize(&params);
 
-  // Load the statically embedded library.
-  const iree_hal_executable_library_header_t** static_library =
-      mnist_linked_llvm_library_query(
-          IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION,
-          /*reserved=*/NULL);
-  const iree_hal_executable_library_header_t** libraries[1] = {static_library};
-
+  // Register the statically linked executable library.
+  const iree_hal_executable_library_query_fn_t* libraries[] = {
+      mnist_linked_llvm_library_query,
+  };
   iree_hal_executable_loader_t* library_loader = NULL;
   iree_status_t status = iree_hal_static_library_loader_create(
       IREE_ARRAYSIZE(libraries), libraries,
diff --git a/iree/base/internal/BUILD b/iree/base/internal/BUILD
index 7368255..ab122af 100644
--- a/iree/base/internal/BUILD
+++ b/iree/base/internal/BUILD
@@ -103,6 +103,16 @@
 )
 
 cc_library(
+    name = "cpu",
+    srcs = ["cpu.c"],
+    hdrs = ["cpu.h"],
+    deps = [
+        "//iree/base",
+        "//iree/base:core_headers",
+    ],
+)
+
+cc_library(
     name = "dynamic_library",
     srcs = [
         "dynamic_library_posix.c",
diff --git a/iree/base/internal/CMakeLists.txt b/iree/base/internal/CMakeLists.txt
index ca24dce..08c0e3b 100644
--- a/iree/base/internal/CMakeLists.txt
+++ b/iree/base/internal/CMakeLists.txt
@@ -94,6 +94,19 @@
 
 iree_cc_library(
   NAME
+    cpu
+  HDRS
+    "cpu.h"
+  SRCS
+    "cpu.c"
+  DEPS
+    iree::base
+    iree::base::core_headers
+  PUBLIC
+)
+
+iree_cc_library(
+  NAME
     dynamic_library
   HDRS
     "dynamic_library.h"
diff --git a/iree/base/internal/cpu.c b/iree/base/internal/cpu.c
new file mode 100644
index 0000000..3e22abc
--- /dev/null
+++ b/iree/base/internal/cpu.c
@@ -0,0 +1,65 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/internal/cpu.h"
+
+#include "iree/base/target_platform.h"
+
+//===----------------------------------------------------------------------===//
+// iree_cpu_*
+//===----------------------------------------------------------------------===//
+
+#if defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_EMSCRIPTEN) || \
+    defined(IREE_PLATFORM_LINUX)
+
+#include <sched.h>
+
+extern __attribute__((weak)) int sched_getcpu(void) {
+  // TODO(benvanik): emulate with syscall/vdso/etc.
+  errno = ENOSYS;
+  return -1;
+}
+
+iree_cpu_processor_id_t iree_cpu_query_processor_id(void) {
+  // This path is relatively portable and should work on linux/bsd/etc-likes.
+  // We may want to use getcpu when available so that we can get the group ID.
+  // https://man7.org/linux/man-pages/man3/sched_getcpu.3.html
+  //
+  // libc implementations can use vDSO and other fun stuff to make this really
+  // cheap: http://git.musl-libc.org/cgit/musl/tree/src/sched/sched_getcpu.c
+  int id = sched_getcpu();
+  return id != -1 ? id : 0;
+}
+
+#elif defined(IREE_PLATFORM_WINDOWS)
+
+iree_cpu_processor_id_t iree_cpu_query_processor_id(void) {
+  PROCESSOR_NUMBER pn;
+  GetCurrentProcessorNumberEx(&pn);
+  return 64 * pn.Group + pn.Number;
+}
+
+#else
+
+// No implementation.
+// We could allow an iree/base/config.h override to externalize this.
+iree_cpu_processor_id_t iree_cpu_query_processor_id(void) { return 0; }
+
+#endif  // IREE_PLATFORM_*
+
+void iree_cpu_requery_processor_id(iree_cpu_processor_tag_t* IREE_RESTRICT tag,
+                                   iree_cpu_processor_id_t* IREE_RESTRICT
+                                       processor_id) {
+  IREE_ASSERT_ARGUMENT(tag);
+  IREE_ASSERT_ARGUMENT(processor_id);
+
+  // TODO(benvanik): set a frequency for this and use a coarse timer
+  // (CLOCK_MONOTONIC_COARSE) to do a ~4-10Hz refresh. We can store the last
+  // query time and the last processor ID in the tag and only perform the query
+  // if it has changed.
+
+  *processor_id = iree_cpu_query_processor_id();
+}
diff --git a/iree/base/internal/cpu.h b/iree/base/internal/cpu.h
new file mode 100644
index 0000000..914f39d
--- /dev/null
+++ b/iree/base/internal/cpu.h
@@ -0,0 +1,40 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_BASE_INTERNAL_CPU_H_
+#define IREE_BASE_INTERNAL_CPU_H_
+
+#include <stddef.h>
+
+#include "iree/base/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_cpu_*
+//===----------------------------------------------------------------------===//
+
+typedef uint32_t iree_cpu_processor_id_t;
+typedef uint32_t iree_cpu_processor_tag_t;
+
+// Returns the ID of the logical processor executing this code.
+iree_cpu_processor_id_t iree_cpu_query_processor_id(void);
+
+// Returns the ID of the logical processor executing this code, using |tag| to
+// memoize the query in cases where it does not change frequently.
+// |tag| must be initialized to 0 on first call and may be reset to 0 by the
+// caller at any time to invalidate the cached result.
+void iree_cpu_requery_processor_id(iree_cpu_processor_tag_t* IREE_RESTRICT tag,
+                                   iree_cpu_processor_id_t* IREE_RESTRICT
+                                       processor_id);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+
+#endif  // IREE_BASE_INTERNAL_ARENA_H_
diff --git a/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp b/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
index 94031f7..ac16485 100644
--- a/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
+++ b/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
@@ -63,6 +63,81 @@
 // versions in the same compiled output.
 class HALDispatchABI {
  public:
+  // Matches the field order in iree_hal_processor_v0_t.
+  enum class ProcessorField {
+    data = 0,
+  };
+
+  // Matches IREE_HAL_PROCESSOR_DATA_CAPACITY_V0.
+  static constexpr int ProcessorDataCapacity = 8;
+
+  // Returns a Type representing iree_hal_processor_v0_t.
+  static LLVM::LLVMStructType getProcessorType(
+      MLIRContext *context, LLVMTypeConverter *typeConverter) {
+    auto structType =
+        LLVM::LLVMStructType::getIdentified(context, "iree_hal_processor_v0_t");
+    if (structType.isInitialized()) return structType;
+
+    auto uint64Type = IntegerType::get(context, 64);
+    SmallVector<Type> fieldTypes;
+
+    // uint64_t data[IREE_HAL_PROCESSOR_DATA_CAPACITY_V0];
+    fieldTypes.push_back(
+        LLVM::LLVMArrayType::get(uint64Type, ProcessorDataCapacity));
+
+    LogicalResult bodySet = structType.setBody(fieldTypes, /*isPacked=*/false);
+    assert(succeeded(bodySet) &&
+           "could not set the body of an identified struct");
+    (void)bodySet;
+
+    return structType;
+  }
+
+  // Matches the field order in iree_hal_executable_environment_v0_t.
+  enum class EnvironmentField {
+    constants = 0,
+    import_thunk = 1,
+    imports = 2,
+    processor = 3,
+  };
+
+  // Returns a Type representing iree_hal_executable_environment_v0_t.
+  static LLVM::LLVMStructType getEnvironmentType(
+      MLIRContext *context, LLVMTypeConverter *typeConverter,
+      LLVM::LLVMStructType processorType) {
+    auto structType = LLVM::LLVMStructType::getIdentified(
+        context, "iree_hal_executable_environment_v0_t");
+    if (structType.isInitialized()) return structType;
+
+    auto int8Type = IntegerType::get(context, 8);
+    auto uint32Type = IntegerType::get(context, 32);
+    auto int8PtrType = LLVM::LLVMPointerType::get(int8Type);
+    auto uint32PtrType = LLVM::LLVMPointerType::get(uint32Type);
+    SmallVector<Type, 4> fieldTypes;
+
+    // const uint32_t* constants;
+    fieldTypes.push_back(uint32PtrType);
+
+    // iree_hal_executable_import_thunk_v0_t import_thunk;
+    // const iree_hal_executable_import_v0_t* imports;
+    auto importType = LLVM::LLVMFunctionType::get(uint32Type, int8PtrType);
+    auto importPtrType = LLVM::LLVMPointerType::get(importType);
+    auto importThunkType =
+        LLVM::LLVMFunctionType::get(uint32Type, {importPtrType, int8PtrType});
+    fieldTypes.push_back(LLVM::LLVMPointerType::get(importThunkType));
+    fieldTypes.push_back(LLVM::LLVMPointerType::get(importPtrType));
+
+    // iree_hal_processor_v0_t processor;
+    fieldTypes.push_back(processorType);
+
+    LogicalResult bodySet = structType.setBody(fieldTypes, /*isPacked=*/false);
+    assert(succeeded(bodySet) &&
+           "could not set the body of an identified struct");
+    (void)bodySet;
+
+    return structType;
+  }
+
   // Returns a Type representing iree_hal_vec3_t.
   static Type getVec3Type(MLIRContext *context) {
     auto uint32Type = IntegerType::get(context, 32);
@@ -70,7 +145,7 @@
   }
 
   // Matches the field order in iree_hal_executable_dispatch_state_v0_t.
-  enum class Field {
+  enum class StateField {
     workgroup_count = 0,
     workgroup_size = 1,
     push_constant_count = 2,
@@ -78,13 +153,14 @@
     binding_count = 4,
     binding_ptrs = 5,
     binding_lengths = 6,
-    import_thunk = 7,
-    imports = 8,
+    processor_id = 7,
+    environment = 8,
   };
 
   // Returns a Type representing iree_hal_executable_dispatch_state_v0_t.
   static LLVM::LLVMStructType getDispatchStateType(
-      MLIRContext *context, LLVMTypeConverter *typeConverter) {
+      MLIRContext *context, LLVMTypeConverter *typeConverter,
+      LLVM::LLVMStructType environmentType) {
     auto structType = LLVM::LLVMStructType::getIdentified(
         context, "iree_hal_executable_dispatch_state_v0_t");
     if (structType.isInitialized()) return structType;
@@ -114,14 +190,11 @@
     fieldTypes.push_back(LLVM::LLVMPointerType::get(int8PtrType));
     fieldTypes.push_back(LLVM::LLVMPointerType::get(indexType));
 
-    // iree_hal_executable_import_thunk_v0_t import_thunk;
-    // const iree_hal_executable_import_v0_t* imports;
-    auto importType = LLVM::LLVMFunctionType::get(uint32Type, int8PtrType);
-    auto importPtrType = LLVM::LLVMPointerType::get(importType);
-    auto importThunkType =
-        LLVM::LLVMFunctionType::get(uint32Type, {importPtrType, int8PtrType});
-    fieldTypes.push_back(LLVM::LLVMPointerType::get(importThunkType));
-    fieldTypes.push_back(LLVM::LLVMPointerType::get(importPtrType));
+    // uint32_t processor_id;
+    fieldTypes.push_back(uint32Type);
+
+    // const iree_hal_executable_environment_v0_t* environment;
+    fieldTypes.push_back(LLVM::LLVMPointerType::get(environmentType));
 
     LogicalResult bodySet = structType.setBody(fieldTypes, /*isPacked=*/false);
     assert(succeeded(bodySet) &&
@@ -136,11 +209,14 @@
   // `iree/hal/local/executable_library.h`.
   static SmallVector<Type, 5> getInputTypes(MLIRContext *context,
                                             LLVMTypeConverter *typeConverter) {
+    auto dispatchStateType = LLVM::LLVMStructType::getIdentified(
+        context, "iree_hal_executable_dispatch_state_v0_t");
+    assert(dispatchStateType &&
+           "dispatch state type must be defined by ConvertToLLVM");
     return SmallVector<Type, 5>{
         // const iree_hal_executable_dispatch_state_v0_t* IREE_RESTRICT
         //   dispatch_state
-        LLVM::LLVMPointerType::get(
-            getDispatchStateType(context, typeConverter)),
+        LLVM::LLVMPointerType::get(dispatchStateType),
         // const iree_hal_vec3_t* IREE_RESTRICT workgroup_id
         LLVM::LLVMPointerType::get(getVec3Type(context)),
         // void* IREE_RESTRICT local_memory
@@ -152,8 +228,11 @@
                           LLVMTypeConverter *typeConverter)
       : funcOp(funcOp),
         typeConverter(typeConverter),
-        dispatchStateType(
-            getDispatchStateType(funcOp.getContext(), typeConverter)) {}
+        processorType(getProcessorType(funcOp.getContext(), typeConverter)),
+        environmentType(getEnvironmentType(funcOp.getContext(), typeConverter,
+                                           processorType)),
+        dispatchStateType(getDispatchStateType(
+            funcOp.getContext(), typeConverter, environmentType)) {}
 
   LLVM::LLVMFuncOp getFuncOp() { return funcOp; }
 
@@ -173,7 +252,7 @@
   Value loadWorkgroupCount(Location loc, int32_t dim, Type resultType,
                            OpBuilder &builder) {
     auto workgroupCountValue =
-        loadFieldValue(loc, Field::workgroup_count, builder);
+        loadFieldValue(loc, StateField::workgroup_count, builder);
     auto dimValue = builder.createOrFold<LLVM::ExtractValueOp>(
         loc, builder.getIntegerType(32), workgroupCountValue,
         builder.getI64ArrayAttr(dim));
@@ -184,7 +263,7 @@
   Value loadWorkgroupSize(Location loc, int32_t dim, Type resultType,
                           OpBuilder &builder) {
     auto workgroupSizeValue =
-        loadFieldValue(loc, Field::workgroup_size, builder);
+        loadFieldValue(loc, StateField::workgroup_size, builder);
     auto dimValue = builder.createOrFold<LLVM::ExtractValueOp>(
         loc, builder.getIntegerType(32), workgroupSizeValue,
         builder.getI64ArrayAttr(dim));
@@ -199,7 +278,7 @@
 
   // Returns the total push constant count as an index-converted type.
   Value loadPushConstantCount(Location loc, OpBuilder &builder) {
-    auto value = loadFieldValue(loc, Field::push_constant_count, builder);
+    auto value = loadFieldValue(loc, StateField::push_constant_count, builder);
     return castValueToType(loc, value,
                            typeConverter->convertType(builder.getIndexType()),
                            builder);
@@ -209,7 +288,7 @@
   Value loadPushConstant(Location loc, int64_t offset, Type resultType,
                          OpBuilder &builder) {
     auto constantsPtrValue =
-        loadFieldValue(loc, Field::push_constants, builder);
+        loadFieldValue(loc, StateField::push_constants, builder);
     auto offsetValue = getIndexValue(loc, offset, builder);
     Value constantPtrValue = builder.create<LLVM::GEPOp>(
         loc, constantsPtrValue.getType(), constantsPtrValue, offsetValue);
@@ -219,7 +298,7 @@
 
   // Returns the total binding count as an index-converted type.
   Value loadBindingCount(Location loc, OpBuilder &builder) {
-    auto value = loadFieldValue(loc, Field::binding_count, builder);
+    auto value = loadFieldValue(loc, StateField::binding_count, builder);
     return castValueToType(loc, value,
                            typeConverter->convertType(builder.getIndexType()),
                            builder);
@@ -229,7 +308,7 @@
   // Equivalent to:
   //   int8_t** base_ptr = &state->binding_ptrs[ordinal];
   Value loadBindingPtr(Location loc, int64_t ordinal, OpBuilder &builder) {
-    auto ptrsPtrValue = loadFieldValue(loc, Field::binding_ptrs, builder);
+    auto ptrsPtrValue = loadFieldValue(loc, StateField::binding_ptrs, builder);
     auto ordinalValue = getIndexValue(loc, ordinal, builder);
     auto elementPtrValue = builder.createOrFold<LLVM::GEPOp>(
         loc, ptrsPtrValue.getType(), ptrsPtrValue, ordinalValue);
@@ -238,7 +317,8 @@
 
   // Loads the byte length of the binding |ordinal| as an index-converted type.
   Value loadBindingLength(Location loc, int64_t ordinal, OpBuilder &builder) {
-    auto lengthsPtrValue = loadFieldValue(loc, Field::binding_lengths, builder);
+    auto lengthsPtrValue =
+        loadFieldValue(loc, StateField::binding_lengths, builder);
     auto ordinalValue = getIndexValue(loc, ordinal, builder);
     auto elementPtrValue = builder.createOrFold<LLVM::GEPOp>(
         loc, lengthsPtrValue.getType(), lengthsPtrValue, ordinalValue);
@@ -313,11 +393,43 @@
     }
   }
 
+  // Loads the processor ID the code is (most likely) being run on.
+  // Equivalent to:
+  //   uint32_t processor_id = state->processor_id;
+  Value loadProcessorID(Location loc, OpBuilder &builder) {
+    return loadFieldValue(loc, StateField::processor_id, builder);
+  }
+
+  // Loads a processor information data field at the given index.
+  // May be 0 if the field is not available.
+  Value loadProcessorData(Location loc, int64_t index, OpBuilder &builder) {
+    // Load the value; it should always be in bounds.
+    Value dataArrayValue = loadFieldValue(loc, ProcessorField::data, builder);
+    Type elementType =
+        dataArrayValue.getType().cast<LLVM::LLVMArrayType>().getElementType();
+    Value dataValue = builder.create<LLVM::ExtractValueOp>(
+        loc, elementType, dataArrayValue, builder.getI64ArrayAttr(index));
+    return dataValue;
+  }
+
+  // Loads an executable constant at |index| and casts it to |resultType|.
+  Value loadExecutableConstant(Location loc, int64_t index, Type resultType,
+                               OpBuilder &builder) {
+    auto constantsPtrValue =
+        loadFieldValue(loc, EnvironmentField::constants, builder);
+    auto indexValue = getIndexValue(loc, index, builder);
+    Value constantPtrValue = builder.create<LLVM::GEPOp>(
+        loc, constantsPtrValue.getType(), constantsPtrValue, indexValue);
+    Value constantValue = builder.create<LLVM::LoadOp>(loc, constantPtrValue);
+    return castValueToType(loc, constantValue, resultType, builder);
+  }
+
   // Loads the import function pointer of the import |ordinal|.
   // Equivalent to:
   //   iree_hal_executable_import_v0_t func_ptr = state->imports[ordinal];
   Value loadImportFuncPtr(Location loc, int64_t ordinal, OpBuilder &builder) {
-    auto importsPtrValue = loadFieldValue(loc, Field::imports, builder);
+    auto importsPtrValue =
+        loadFieldValue(loc, EnvironmentField::imports, builder);
     auto ordinalValue = getIndexValue(loc, ordinal, builder);
     auto elementPtrValue = builder.createOrFold<LLVM::GEPOp>(
         loc, importsPtrValue.getType(), importsPtrValue, ordinalValue);
@@ -343,7 +455,8 @@
   // Returns 0 on success and non-zero otherwise.
   Value callImport(Location loc, unsigned importOrdinal, Value params,
                    OpBuilder &builder) {
-    auto thunkPtrValue = loadFieldValue(loc, Field::import_thunk, builder);
+    auto thunkPtrValue =
+        loadFieldValue(loc, EnvironmentField::import_thunk, builder);
     auto importPtrValue = loadImportFuncPtr(loc, importOrdinal, builder);
     auto callOp =
         builder.create<LLVM::CallOp>(loc, TypeRange{builder.getI32Type()},
@@ -356,14 +469,33 @@
   }
 
  private:
-  Value loadFieldValue(Location loc, Field field, OpBuilder &builder) {
-    auto statePtrValue = funcOp.getArgument(0);
-    auto stateValue = builder.createOrFold<LLVM::LoadOp>(loc, statePtrValue);
-    auto fieldType = dispatchStateType.getBody()[(int)field];
+  Value loadFieldValue(Location loc, StateField field, OpBuilder &builder) {
+    Value statePtrValue = funcOp.getArgument(0);
+    Value stateValue = builder.createOrFold<LLVM::LoadOp>(loc, statePtrValue);
+    Type fieldType = dispatchStateType.getBody()[(int)field];
     return builder.createOrFold<LLVM::ExtractValueOp>(
         loc, fieldType, stateValue, builder.getI64ArrayAttr((int)field));
   }
 
+  Value loadFieldValue(Location loc, EnvironmentField field,
+                       OpBuilder &builder) {
+    Value environmentPtrValue =
+        loadFieldValue(loc, StateField::environment, builder);
+    Value environmentValue =
+        builder.create<LLVM::LoadOp>(loc, environmentPtrValue);
+    Type fieldType = environmentType.getBody()[(int)field];
+    return builder.createOrFold<LLVM::ExtractValueOp>(
+        loc, fieldType, environmentValue, builder.getI64ArrayAttr((int)field));
+  }
+
+  Value loadFieldValue(Location loc, ProcessorField field, OpBuilder &builder) {
+    Value processorValue =
+        loadFieldValue(loc, EnvironmentField::processor, builder);
+    Type fieldType = processorType.getBody()[(int)field];
+    return builder.createOrFold<LLVM::ExtractValueOp>(
+        loc, fieldType, processorValue, builder.getI64ArrayAttr((int)field));
+  }
+
   Value getIndexValue(Location loc, int64_t value, OpBuilder &builder) {
     return builder.createOrFold<LLVM::ConstantOp>(
         loc, typeConverter->convertType(builder.getIndexType()),
@@ -379,6 +511,8 @@
 
   LLVM::LLVMFuncOp funcOp;
   LLVMTypeConverter *typeConverter;
+  LLVM::LLVMStructType processorType;
+  LLVM::LLVMStructType environmentType;
   LLVM::LLVMStructType dispatchStateType;
 };
 
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp
index 19b1565..f92986a 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp
@@ -55,6 +55,16 @@
   return type;
 }
 
+// %struct.iree_hal_executable_environment_v0_t = type {
+//   ...
+// }
+static llvm::StructType *makeEnvironmentType(llvm::LLVMContext &context) {
+  auto *type = llvm::StructType::getTypeByName(
+      context, "iree_hal_executable_environment_v0_t");
+  assert(type && "environment type must be defined by ConvertToLLVM");
+  return type;
+}
+
 // %struct.anon = type { i32, i32, i32 }
 // %union.iree_hal_vec3_t = type { %struct.anon }
 static llvm::StructType *makeVec3Type(llvm::LLVMContext &context) {
@@ -75,14 +85,7 @@
 }
 
 // %struct.iree_hal_executable_dispatch_state_v0_t = type {
-//   %union.iree_hal_vec3_t,
-//   %union.iree_hal_vec3_t,
-//   i64,
-//   i32*,
-//   i64,
-//   i8**,
-//   i64*,
-//   %struct.iree_hal_executable_import_table_v0_t*
+//   ...
 // }
 static llvm::StructType *makeDispatchStateType(llvm::LLVMContext &context) {
   auto *type = llvm::StructType::getTypeByName(
@@ -241,16 +244,16 @@
 llvm::Function *LibraryBuilder::build(StringRef queryFuncName) {
   auto &context = module->getContext();
   auto *i32Type = llvm::IntegerType::getInt32Ty(context);
-  auto *ptrType = llvm::Type::getInt8PtrTy(context);
+  auto *environmentType = makeEnvironmentType(context)->getPointerTo();
   auto *libraryHeaderType = makeLibraryHeaderType(context);
 
   // %struct.iree_hal_executable_library_header_t**
-  // @iree_hal_library_query(i32, void*)
+  // @iree_hal_library_query(i32, %struct.iree_hal_executable_environment_v0_t*)
   auto *queryFuncType =
       llvm::FunctionType::get(libraryHeaderType->getPointerTo(),
                               {
                                   i32Type,
-                                  ptrType,
+                                  environmentType,
                               },
                               /*isVarArg=*/false);
   auto *func =
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/StaticLibraryGenerator.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/StaticLibraryGenerator.cpp
index 871e5f6..bc6bd70 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/StaticLibraryGenerator.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/StaticLibraryGenerator.cpp
@@ -54,7 +54,8 @@
                                   const std::string &query_function_name) {
   os << "const iree_hal_executable_library_header_t**\n"
      << query_function_name << "(\n"
-     << "iree_hal_executable_library_version_t max_version, void* reserved);\n";
+     << "iree_hal_executable_library_version_t max_version, const "
+        "iree_hal_executable_environment_v0_t* environment);\n";
 }
 
 static void generateSuffix(llvm::raw_ostream &os,
diff --git a/iree/hal/local/BUILD b/iree/hal/local/BUILD
index 1d1b786..9f98151 100644
--- a/iree/hal/local/BUILD
+++ b/iree/hal/local/BUILD
@@ -17,6 +17,19 @@
 )
 
 cc_library(
+    name = "executable_environment",
+    srcs = ["executable_environment.c"],
+    hdrs = ["executable_environment.h"],
+    deps = [
+        ":executable_library",
+        "//iree/base",
+        "//iree/base:tracing",
+        "//iree/base/internal:cpu",
+        "//iree/hal",
+    ],
+)
+
+cc_library(
     name = "executable_library",
     hdrs = ["executable_library.h"],
 )
@@ -25,6 +38,7 @@
     name = "executable_library_benchmark",
     srcs = ["executable_library_benchmark.c"],
     deps = [
+        ":executable_environment",
         ":executable_library",
         ":local",
         "//iree/base",
@@ -44,9 +58,10 @@
         "executable_library_test.c",
     ],
     deps = [
+        ":executable_environment",
+        ":executable_library",
         "//iree/base",
         "//iree/base:core_headers",
-        "//iree/hal/local:executable_library",
     ],
 )
 
@@ -71,6 +86,7 @@
         "local_executable_layout.h",
     ],
     deps = [
+        ":executable_environment",
         ":executable_library",
         "//iree/base",
         "//iree/base:core_headers",
@@ -145,6 +161,7 @@
         "task_semaphore.h",
     ],
     deps = [
+        ":executable_environment",
         ":executable_library",
         ":local",
         "//iree/base",
diff --git a/iree/hal/local/CMakeLists.txt b/iree/hal/local/CMakeLists.txt
index 0befb9c..ef600b2 100644
--- a/iree/hal/local/CMakeLists.txt
+++ b/iree/hal/local/CMakeLists.txt
@@ -12,6 +12,22 @@
 
 iree_cc_library(
   NAME
+    executable_environment
+  HDRS
+    "executable_environment.h"
+  SRCS
+    "executable_environment.c"
+  DEPS
+    ::executable_library
+    iree::base
+    iree::base::internal::cpu
+    iree::base::tracing
+    iree::hal
+  PUBLIC
+)
+
+iree_cc_library(
+  NAME
     executable_library
   HDRS
     "executable_library.h"
@@ -24,6 +40,7 @@
   SRCS
     "executable_library_benchmark.c"
   DEPS
+    ::executable_environment
     ::executable_library
     ::local
     iree::base
@@ -43,9 +60,10 @@
     "executable_library_demo.h"
     "executable_library_test.c"
   DEPS
+    ::executable_environment
+    ::executable_library
     iree::base
     iree::base::core_headers
-    iree::hal::local::executable_library
 )
 
 iree_cc_library(
@@ -68,6 +86,7 @@
     "local_executable_cache.c"
     "local_executable_layout.c"
   DEPS
+    ::executable_environment
     ::executable_library
     iree::base
     iree::base::core_headers
@@ -131,6 +150,7 @@
     "task_queue_state.c"
     "task_semaphore.c"
   DEPS
+    ::executable_environment
     ::executable_library
     ::local
     iree::base
diff --git a/iree/hal/local/elf/BUILD b/iree/hal/local/elf/BUILD
index aacd5f3..a55763b 100644
--- a/iree/hal/local/elf/BUILD
+++ b/iree/hal/local/elf/BUILD
@@ -41,6 +41,7 @@
         ":elf_module",
         "//iree/base",
         "//iree/base:core_headers",
+        "//iree/hal/local:executable_environment",
         "//iree/hal/local:executable_library",
         "//iree/hal/local/elf/testdata:elementwise_mul",
     ],
diff --git a/iree/hal/local/elf/CMakeLists.txt b/iree/hal/local/elf/CMakeLists.txt
index 1c5e254..1350aa3 100644
--- a/iree/hal/local/elf/CMakeLists.txt
+++ b/iree/hal/local/elf/CMakeLists.txt
@@ -37,6 +37,7 @@
     iree::base
     iree::base::core_headers
     iree::hal::local::elf::testdata::elementwise_mul
+    iree::hal::local::executable_environment
     iree::hal::local::executable_library
 )
 
diff --git a/iree/hal/local/elf/elf_module_test_main.c b/iree/hal/local/elf/elf_module_test_main.c
index 8e87150..d420a94 100644
--- a/iree/hal/local/elf/elf_module_test_main.c
+++ b/iree/hal/local/elf/elf_module_test_main.c
@@ -7,6 +7,7 @@
 #include "iree/base/api.h"
 #include "iree/base/target_platform.h"
 #include "iree/hal/local/elf/elf_module.h"
+#include "iree/hal/local/executable_environment.h"
 #include "iree/hal/local/executable_library.h"
 
 // ELF modules for various platforms embedded in the binary:
@@ -60,6 +61,10 @@
   IREE_RETURN_IF_ERROR(iree_elf_module_initialize_from_memory(
       file_data, &import_table, iree_allocator_system(), &module));
 
+  iree_hal_executable_environment_v0_t environment;
+  iree_hal_executable_environment_initialize(iree_allocator_system(),
+                                             &environment);
+
   void* query_fn_ptr = NULL;
   IREE_RETURN_IF_ERROR(iree_elf_module_lookup_export(
       &module, IREE_HAL_EXECUTABLE_LIBRARY_EXPORT_NAME, &query_fn_ptr));
@@ -71,7 +76,7 @@
   library.header =
       (const iree_hal_executable_library_header_t**)iree_elf_call_p_ip(
           query_fn_ptr, IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION,
-          /*reserved=*/NULL);
+          &environment);
   if (library.header == NULL) {
     return iree_make_status(IREE_STATUS_NOT_FOUND, "library header is empty");
   }
@@ -117,6 +122,8 @@
   dispatch_state.binding_count = 1;
   dispatch_state.binding_lengths = binding_lengths;
   dispatch_state.binding_ptrs = binding_ptrs;
+  dispatch_state.processor_id = iree_cpu_query_processor_id();
+  dispatch_state.environment = &environment;
   iree_hal_vec3_t workgroup_id = {{0, 0, 0}};
   void* local_memory = NULL;
   int ret = iree_elf_call_i_ppp((const void*)library.v0->exports.ptrs[0],
diff --git a/iree/hal/local/executable_environment.c b/iree/hal/local/executable_environment.c
new file mode 100644
index 0000000..cebe4e2
--- /dev/null
+++ b/iree/hal/local/executable_environment.c
@@ -0,0 +1,40 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/local/executable_environment.h"
+
+#include "iree/base/tracing.h"
+
+//===----------------------------------------------------------------------===//
+// iree_hal_processor_*_t
+//===----------------------------------------------------------------------===//
+
+void iree_hal_processor_query(iree_allocator_t temp_allocator,
+                              iree_hal_processor_v0_t* out_processor) {
+  IREE_ASSERT_ARGUMENT(out_processor);
+  IREE_TRACE_ZONE_BEGIN(z0);
+  memset(out_processor, 0, sizeof(*out_processor));
+
+  // TODO(benvanik): define processor features we want to query for each arch.
+  // This needs to be baked into the executable library API and made consistent
+  // with the compiler side producing the executables that access it.
+
+  IREE_TRACE_ZONE_END(z0);
+}
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_environment_*_t
+//===----------------------------------------------------------------------===//
+
+void iree_hal_executable_environment_initialize(
+    iree_allocator_t temp_allocator,
+    iree_hal_executable_environment_v0_t* out_environment) {
+  IREE_ASSERT_ARGUMENT(out_environment);
+  IREE_TRACE_ZONE_BEGIN(z0);
+  memset(out_environment, 0, sizeof(*out_environment));
+  iree_hal_processor_query(temp_allocator, &out_environment->processor);
+  IREE_TRACE_ZONE_END(z0);
+}
diff --git a/iree/hal/local/executable_environment.h b/iree/hal/local/executable_environment.h
new file mode 100644
index 0000000..b4d23ca
--- /dev/null
+++ b/iree/hal/local/executable_environment.h
@@ -0,0 +1,47 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_LOCAL_EXECUTABLE_ENVIRONMENT_H_
+#define IREE_HAL_LOCAL_EXECUTABLE_ENVIRONMENT_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/internal/cpu.h"
+#include "iree/hal/api.h"
+#include "iree/hal/local/executable_library.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+//===----------------------------------------------------------------------===//
+// iree_hal_processor_*_t
+//===----------------------------------------------------------------------===//
+
+// Queries the current processor information and writes it to |out_processor|.
+// |temp_allocator| may be used for temporary allocations required while
+// querying. If the processor cannot be queried then |out_processor| will be
+// zeroed.
+void iree_hal_processor_query(iree_allocator_t temp_allocator,
+                              iree_hal_processor_v0_t* out_processor);
+
+//===----------------------------------------------------------------------===//
+// iree_hal_executable_environment_*_t
+//===----------------------------------------------------------------------===//
+
+// Initializes |out_environment| to the default empty environment.
+// No imports will be available unless overridden during loading.
+// |temp_allocator| may be used for temporary allocations during initialization.
+void iree_hal_executable_environment_initialize(
+    iree_allocator_t temp_allocator,
+    iree_hal_executable_environment_v0_t* out_environment);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+
+#endif  // IREE_HAL_LOCAL_EXECUTABLE_ENVIRONMENT_H_
diff --git a/iree/hal/local/executable_library.h b/iree/hal/local/executable_library.h
index 86c4a4e..bc82857 100644
--- a/iree/hal/local/executable_library.h
+++ b/iree/hal/local/executable_library.h
@@ -8,7 +8,7 @@
 #define IREE_HAL_LOCAL_EXECUTABLE_LIBRARY_H_
 
 // NOTE: this file is designed to be a standalone header: it is embedded in the
-// compiler and must not take any dependences on the runtime HAL code.
+// compiler and must not take any dependencies on the runtime HAL code.
 // Changes here will require changes to the compiler and must be versioned as if
 // this was a schema: backwards-incompatible changes require version bumps or
 // the ability to feature-detect at runtime.
@@ -75,6 +75,9 @@
 // Versioning and interface querying
 //===----------------------------------------------------------------------===//
 
+typedef struct iree_hal_executable_environment_v0_t
+    iree_hal_executable_environment_v0_t;
+
 // Known valid version values.
 typedef enum iree_hal_executable_library_version_e {
   // iree_hal_executable_library_v0_t is used as the API communication
@@ -110,12 +113,21 @@
 } iree_hal_executable_library_header_t;
 
 // Exported function from dynamic libraries for querying library information.
+//
 // The provided |max_version| is the maximum version the caller supports;
 // callees must return NULL if their lowest available version is greater
 // than the max version supported by the caller.
+//
+// The provided |environment| field contains information about the hosting
+// execution environment that the executable may use to specialize its
+// implementation, such as using specific imports or exporting
+// architecture-specific dispatch routines. Some environmental properties may
+// change per-invocation such as the CPU info when performing dispatches on
+// heterogenous processors that may change over the lifetime of the program.
 typedef const iree_hal_executable_library_header_t** (
     *iree_hal_executable_library_query_fn_t)(
-    iree_hal_executable_library_version_t max_version, void* reserved);
+    iree_hal_executable_library_version_t max_version,
+    const iree_hal_executable_environment_v0_t* environment);
 
 // Function name exported from dynamic libraries (pass to dlsym).
 #define IREE_HAL_EXECUTABLE_LIBRARY_EXPORT_NAME \
@@ -194,6 +206,57 @@
   const char* const* symbols;
 } iree_hal_executable_import_table_v0_t;
 
+// Maximum number of data fields in iree_hal_processor_v0_t.
+#define IREE_HAL_PROCESSOR_DATA_CAPACITY_V0 8
+
+// Architecture-specific CPU information available to executables.
+// This encodes zero or more fields of opaque processor data.
+// The intent is that this structure can be put in .rodata when there are no
+// runtime features that need to be queried.
+//
+// The format of the data is architecture-specific as by construction no value
+// will ever be used in a compiled binary from another architecture. This
+// allows us to simplify this interface as we can't for example load the same
+// executable library for both aarch64 on riscv32 and don't need to normalize
+// any of the fields across them both.
+typedef struct iree_hal_processor_v0_t {
+  // Opaque architecture-specific encoding in 64-bit words.
+  // This may represent a fixed-length data structure, a series of hardware
+  // registers, or key-value pairs.
+  //
+  // The contents are opaque here as to support out-of-tree architectures. The
+  // runtime code deriving the identifier/flags and providing it here is losely
+  // coupled with the compiler code emitting checks based on the identifier and
+  // only those two places ever need to change.
+  uint64_t data[IREE_HAL_PROCESSOR_DATA_CAPACITY_V0];
+} iree_hal_processor_v0_t;
+static_assert(sizeof(iree_hal_processor_v0_t) % sizeof(uint64_t) == 0,
+              "8-byte alignment required");
+
+// Defines the environment in which the executable is being used.
+// Executables only have access to the information in this structure and must
+// make all decisions based on it; this ensures executables are portable across
+// operating environments (Linux, Mac, bare-metal, web, etc) by not having
+// platform-specific syscalls and register query emulation.
+typedef struct iree_hal_executable_environment_v0_t {
+  // Specialization constants available to the executable, if any.
+  // Contains as many as declared in the library header.
+  const uint32_t* constants;
+
+  // Thunk function for calling imports. All calls must be made through this.
+  iree_hal_executable_import_thunk_v0_t import_thunk;
+  // Optional imported functions available for use within the executable.
+  // Contains one entry per imported function. If an import was marked as weak
+  // then the corresponding entry may be NULL.
+  const iree_hal_executable_import_v0_t* imports;
+
+  // Optional architecture-specific CPU information.
+  // In heterogenous processors this may represent any of the subarchitecture
+  // types as it is derived from the core the calling thread is scheduled on.
+  // Will be all zeros if unavailable.
+  iree_hal_processor_v0_t processor;
+} iree_hal_executable_environment_v0_t;
+
 typedef union iree_hal_vec3_t {
   struct {
     uint32_t x;
@@ -227,12 +290,12 @@
   // The length of each binding in bytes, 1:1 with |binding_ptrs|.
   const size_t* binding_lengths;
 
-  // Thunk function for calling imports. All calls must be made through this.
-  iree_hal_executable_import_thunk_v0_t import_thunk;
-  // Optional imported functions available for use within the executable.
-  // Contains one entry per imported function. If an import was marked as weak
-  // then the corresponding entry may be NULL.
-  const iree_hal_executable_import_v0_t* imports;
+  // Logical processor identifier used to index into processor info fields.
+  // Depending on the implementation this may be an ordinal, a bitfield, or an
+  // opaque unique identifier.
+  uint32_t processor_id;
+  // Optional executable environment information.
+  const iree_hal_executable_environment_v0_t* environment;
 } iree_hal_executable_dispatch_state_v0_t;
 
 // Function signature of exported executable entry points.
diff --git a/iree/hal/local/executable_library_benchmark.c b/iree/hal/local/executable_library_benchmark.c
index 1002f4a..aecb746 100644
--- a/iree/hal/local/executable_library_benchmark.c
+++ b/iree/hal/local/executable_library_benchmark.c
@@ -300,8 +300,7 @@
       .binding_count = dispatch_params.binding_count,
       .binding_ptrs = binding_ptrs,
       .binding_lengths = binding_lengths,
-      .import_thunk = NULL,  // not yet implemented
-      .imports = NULL,       // not yet implemented
+      .environment = &local_executable->environment,
   };
 
   // Execute benchmark the workgroup invocation.
diff --git a/iree/hal/local/executable_library_demo.c b/iree/hal/local/executable_library_demo.c
index baa9cd7..eee0c20 100644
--- a/iree/hal/local/executable_library_demo.c
+++ b/iree/hal/local/executable_library_demo.c
@@ -105,7 +105,8 @@
 // example, an executable may want to swap out a few entry points to an
 // architecture-specific version.
 const iree_hal_executable_library_header_t** demo_executable_library_query(
-    iree_hal_executable_library_version_t max_version, void* reserved) {
+    iree_hal_executable_library_version_t max_version,
+    const iree_hal_executable_environment_v0_t* environment) {
   return max_version <= 0
              ? (const iree_hal_executable_library_header_t**)&library
              : NULL;
diff --git a/iree/hal/local/executable_library_demo.h b/iree/hal/local/executable_library_demo.h
index ce99ab1..f458768 100644
--- a/iree/hal/local/executable_library_demo.h
+++ b/iree/hal/local/executable_library_demo.h
@@ -43,7 +43,8 @@
 //       bindings: 0
 //
 const iree_hal_executable_library_header_t** demo_executable_library_query(
-    iree_hal_executable_library_version_t max_version, void* reserved);
+    iree_hal_executable_library_version_t max_version,
+    const iree_hal_executable_environment_v0_t* environment);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/iree/hal/local/executable_library_test.c b/iree/hal/local/executable_library_test.c
index 8c3cf4a..e15e94a 100644
--- a/iree/hal/local/executable_library_test.c
+++ b/iree/hal/local/executable_library_test.c
@@ -10,6 +10,7 @@
 #include <string.h>
 
 #include "iree/base/api.h"
+#include "iree/hal/local/executable_environment.h"
 #include "iree/hal/local/executable_library_demo.h"
 
 // Demonstration of the HAL-side of the iree_hal_executable_library_t ABI.
@@ -27,6 +28,11 @@
 //
 // See iree/hal/local/executable_library.h for more information.
 int main(int argc, char** argv) {
+  // Default environment.
+  iree_hal_executable_environment_v0_t environment;
+  iree_hal_executable_environment_initialize(iree_allocator_system(),
+                                             &environment);
+
   // Query the library header at the requested version.
   // The query call in this example is going into the handwritten demo code
   // but could be targeted at generated files or runtime-loaded shared objects.
@@ -35,7 +41,7 @@
     const iree_hal_executable_library_v0_t* v0;
   } library;
   library.header = demo_executable_library_query(
-      IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION, /*reserved=*/NULL);
+      IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION, &environment);
   const iree_hal_executable_library_header_t* header = *library.header;
   IREE_ASSERT_NE(header, NULL, "version may not have matched");
   IREE_ASSERT_LE(
@@ -82,8 +88,8 @@
       .binding_count = IREE_ARRAYSIZE(binding_ptrs),
       .binding_ptrs = binding_ptrs,
       .binding_lengths = binding_lengths,
-      .import_thunk = NULL,  // not yet implemented
-      .imports = NULL,       // not yet implemented
+      .processor_id = iree_cpu_query_processor_id(),
+      .environment = &environment,
   };
   for (uint32_t z = 0; z < dispatch_state.workgroup_count.z; ++z) {
     for (uint32_t y = 0; y < dispatch_state.workgroup_count.y; ++y) {
diff --git a/iree/hal/local/executable_loader.c b/iree/hal/local/executable_loader.c
index 1630b0c..0d90ec1 100644
--- a/iree/hal/local/executable_loader.c
+++ b/iree/hal/local/executable_loader.c
@@ -6,8 +6,6 @@
 
 #include "iree/hal/local/executable_loader.h"
 
-#include "iree/base/api.h"
-
 iree_status_t iree_hal_executable_import_provider_resolve(
     const iree_hal_executable_import_provider_t import_provider,
     iree_string_view_t symbol_name, void** out_fn_ptr) {
diff --git a/iree/hal/local/executable_loader.h b/iree/hal/local/executable_loader.h
index b370c06..6092948 100644
--- a/iree/hal/local/executable_loader.h
+++ b/iree/hal/local/executable_loader.h
@@ -107,12 +107,12 @@
     iree_hal_executable_caching_mode_t caching_mode,
     iree_string_view_t executable_format);
 
-// Tries loading the |executable_data| provided in the given
-// |executable_format|. May fail even if the executable is valid if it requires
-// features not supported by the current host or runtime (such as available
-// architectures, imports, etc).
+// Tries loading the executable data provided in the given format.
+// May fail even if the executable is valid if it requires features not
+// supported by the current host or runtime (such as available architectures,
+// imports, etc).
 //
-// Depending on loader ability the |caching_mode| is used to enable certain
+// Depending on loader ability the caching_mode is used to enable certain
 // features such as instrumented profiling. Not all formats support these
 // features and cooperation of both the compiler producing the executables and
 // the runtime loader and system are required.
diff --git a/iree/hal/local/inline_command_buffer.c b/iree/hal/local/inline_command_buffer.c
index 07d95c5..a64ef51 100644
--- a/iree/hal/local/inline_command_buffer.c
+++ b/iree/hal/local/inline_command_buffer.c
@@ -14,6 +14,7 @@
 #include "iree/base/internal/fpu_state.h"
 #include "iree/base/internal/math.h"
 #include "iree/base/tracing.h"
+#include "iree/hal/local/executable_environment.h"
 #include "iree/hal/local/executable_library.h"
 #include "iree/hal/local/local_descriptor_set_layout.h"
 #include "iree/hal/local/local_executable.h"
@@ -56,6 +57,9 @@
     // Individual dispatches must populate the dynamically changing fields like
     // push_constant_count and binding_count.
     iree_hal_executable_dispatch_state_v0_t dispatch_state;
+
+    // An opaque tag used to reduce the cost of processor ID queries.
+    iree_cpu_processor_tag_t processor_tag;
   } state;
 } iree_hal_inline_command_buffer_t;
 
@@ -153,11 +157,23 @@
 static iree_status_t iree_hal_inline_command_buffer_flush_tasks(
     iree_hal_inline_command_buffer_t* command_buffer);
 
+// Updates the cached processor ID field in the command buffer.
+static void iree_hal_inline_command_buffer_update_processor_id(
+    iree_hal_inline_command_buffer_t* command_buffer) {
+  iree_cpu_requery_processor_id(
+      &command_buffer->state.processor_tag,
+      &command_buffer->state.dispatch_state.processor_id);
+}
+
 static iree_status_t iree_hal_inline_command_buffer_begin(
     iree_hal_command_buffer_t* base_command_buffer) {
   iree_hal_inline_command_buffer_t* command_buffer =
       iree_hal_inline_command_buffer_cast(base_command_buffer);
   iree_hal_inline_command_buffer_reset(command_buffer);
+
+  // Query the processor ID we start out on. We may update it during execution.
+  iree_hal_inline_command_buffer_update_processor_id(command_buffer);
+
   return iree_ok_status();
 }
 
@@ -395,10 +411,16 @@
                 IREE_HAL_WORKGROUP_LOCAL_MEMORY_PAGE_SIZE
           : 0;
 
+  // Update the ID of the processor we are running on.
+  // We don't know how much time has passed since we last updated as we are
+  // running inline with the user program; if we knew we were going to be
+  // handling a batch of dispatches we could reduce the amount of times we call
+  // this - but that's what the task system is for.
+  iree_hal_inline_command_buffer_update_processor_id(command_buffer);
+
   iree_hal_executable_dispatch_state_v0_t* dispatch_state =
       &command_buffer->state.dispatch_state;
-  dispatch_state->import_thunk = local_executable->import_thunk;
-  dispatch_state->imports = local_executable->imports;
+  dispatch_state->environment = &local_executable->environment;
 
   // TODO(benvanik): expose on API or keep fixed on executable.
   dispatch_state->workgroup_size.x = 1;
diff --git a/iree/hal/local/loaders/BUILD b/iree/hal/local/loaders/BUILD
index 5ed7cb3..a780c38 100644
--- a/iree/hal/local/loaders/BUILD
+++ b/iree/hal/local/loaders/BUILD
@@ -45,6 +45,7 @@
         "//iree/base:tracing",
         "//iree/hal",
         "//iree/hal/local",
+        "//iree/hal/local:executable_environment",
         "//iree/hal/local:executable_library",
     ],
 )
diff --git a/iree/hal/local/loaders/CMakeLists.txt b/iree/hal/local/loaders/CMakeLists.txt
index 49b3214..a2a1c3d 100644
--- a/iree/hal/local/loaders/CMakeLists.txt
+++ b/iree/hal/local/loaders/CMakeLists.txt
@@ -42,6 +42,7 @@
     iree::base::tracing
     iree::hal
     iree::hal::local
+    iree::hal::local::executable_environment
     iree::hal::local::executable_library
   DEFINES
     "IREE_HAL_HAVE_STATIC_LIBRARY_LOADER=1"
diff --git a/iree/hal/local/loaders/embedded_library_loader.c b/iree/hal/local/loaders/embedded_library_loader.c
index 689a0ad..bdffc70 100644
--- a/iree/hal/local/loaders/embedded_library_loader.c
+++ b/iree/hal/local/loaders/embedded_library_loader.c
@@ -53,7 +53,7 @@
   executable->library.header =
       (const iree_hal_executable_library_header_t**)iree_elf_call_p_ip(
           query_fn, IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION,
-          /*reserved=*/NULL);
+          &executable->base.environment);
   if (!executable->library.header) {
     return iree_make_status(
         IREE_STATUS_FAILED_PRECONDITION,
@@ -98,15 +98,16 @@
 
   // All calls from the loaded ELF route through our thunk function so that we
   // can adapt to ABI differences.
-  executable->base.import_thunk =
+  executable->base.environment.import_thunk =
       (iree_hal_executable_import_thunk_v0_t)iree_elf_thunk_i_p;
 
   // Allocate storage for the imports.
   IREE_RETURN_AND_END_ZONE_IF_ERROR(
-      z0, iree_allocator_malloc(
-              executable->base.host_allocator,
-              import_table->count * sizeof(*executable->base.imports),
-              (void**)&executable->base.imports));
+      z0,
+      iree_allocator_malloc(
+          executable->base.host_allocator,
+          import_table->count * sizeof(*executable->base.environment.imports),
+          (void**)&executable->base.environment.imports));
 
   // Try to resolve each import.
   // NOTE: imports are sorted alphabetically and if we cared we could use this
@@ -117,7 +118,7 @@
         z0,
         iree_hal_executable_import_provider_resolve(
             import_provider, iree_make_cstring_view(import_table->symbols[i]),
-            (void**)&executable->base.imports[i]));
+            (void**)&executable->base.environment.imports[i]));
   }
 
   IREE_TRACE_ZONE_END(z0);
@@ -198,8 +199,9 @@
 
   iree_elf_module_deinitialize(&executable->module);
 
-  if (executable->base.imports != NULL) {
-    iree_allocator_free(host_allocator, (void*)executable->base.imports);
+  if (executable->base.environment.imports != NULL) {
+    iree_allocator_free(host_allocator,
+                        (void*)executable->base.environment.imports);
   }
 
   iree_hal_local_executable_deinitialize(
diff --git a/iree/hal/local/loaders/static_library_loader.c b/iree/hal/local/loaders/static_library_loader.c
index 1e7819c..34f934b 100644
--- a/iree/hal/local/loaders/static_library_loader.c
+++ b/iree/hal/local/loaders/static_library_loader.c
@@ -13,6 +13,7 @@
 
 #include "iree/base/tracing.h"
 #include "iree/hal/api.h"
+#include "iree/hal/local/executable_environment.h"
 #include "iree/hal/local/local_executable.h"
 #include "iree/hal/local/local_executable_layout.h"
 
@@ -166,30 +167,15 @@
 
 iree_status_t iree_hal_static_library_loader_create(
     iree_host_size_t library_count,
-    const iree_hal_executable_library_header_t** const* libraries,
+    const iree_hal_executable_library_query_fn_t* library_query_fns,
     iree_hal_executable_import_provider_t import_provider,
     iree_allocator_t host_allocator,
     iree_hal_executable_loader_t** out_executable_loader) {
+  IREE_ASSERT_ARGUMENT(!library_count || library_query_fns);
   IREE_ASSERT_ARGUMENT(out_executable_loader);
   *out_executable_loader = NULL;
   IREE_TRACE_ZONE_BEGIN(z0);
 
-  // Verify the libraries provided all match our expected version.
-  // It's rare they won't, however static libraries generated with a newer
-  // version of the IREE compiler that are then linked with an older version of
-  // the runtime are difficult to spot otherwise.
-  for (iree_host_size_t i = 0; i < library_count; ++i) {
-    const iree_hal_executable_library_header_t* header = *libraries[i];
-    if (header->version > IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION) {
-      IREE_TRACE_ZONE_END(z0);
-      return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
-                              "executable does not support this version of the "
-                              "runtime (executable: %d, runtime: %d)",
-                              header->version,
-                              IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION);
-    }
-  }
-
   iree_hal_static_library_loader_t* executable_loader = NULL;
   iree_host_size_t total_size =
       sizeof(*executable_loader) +
@@ -202,9 +188,45 @@
         &executable_loader->base);
     executable_loader->host_allocator = host_allocator;
     executable_loader->library_count = library_count;
-    memcpy((void*)executable_loader->libraries, libraries,
-           sizeof(libraries[0]) * library_count);
+
+    // Default environment to enable initialization.
+    iree_hal_executable_environment_v0_t environment;
+    iree_hal_executable_environment_initialize(host_allocator, &environment);
+
+    // Query and verify the libraries provided all match our expected version.
+    // It's rare they won't, however static libraries generated with a newer
+    // version of the IREE compiler that are then linked with an older version
+    // of the runtime are difficult to spot otherwise.
+    for (iree_host_size_t i = 0; i < library_count; ++i) {
+      const iree_hal_executable_library_header_t* const* header_ptr =
+          library_query_fns[i](IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION,
+                               &environment);
+      if (!header_ptr) {
+        status = iree_make_status(
+            IREE_STATUS_UNAVAILABLE,
+            "failed to query library header for runtime version %d",
+            IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION);
+        break;
+      }
+      const iree_hal_executable_library_header_t* header = *header_ptr;
+      IREE_TRACE_ZONE_APPEND_TEXT(z0, header->name);
+      if (header->version > IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION) {
+        status = iree_make_status(
+            IREE_STATUS_FAILED_PRECONDITION,
+            "executable does not support this version of the "
+            "runtime (executable: %d, runtime: %d)",
+            header->version, IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION);
+        break;
+      }
+      memcpy((void*)&executable_loader->libraries[i], &header_ptr,
+             sizeof(header_ptr));
+    }
+  }
+
+  if (iree_status_is_ok(status)) {
     *out_executable_loader = (iree_hal_executable_loader_t*)executable_loader;
+  } else {
+    iree_allocator_free(host_allocator, executable_loader);
   }
 
   IREE_TRACE_ZONE_END(z0);
diff --git a/iree/hal/local/loaders/static_library_loader.h b/iree/hal/local/loaders/static_library_loader.h
index 36487d8..12648e9 100644
--- a/iree/hal/local/loaders/static_library_loader.h
+++ b/iree/hal/local/loaders/static_library_loader.h
@@ -35,7 +35,7 @@
 // within and across loaders will result in undefined behavior.
 iree_status_t iree_hal_static_library_loader_create(
     iree_host_size_t library_count,
-    const iree_hal_executable_library_header_t** const* libraries,
+    const iree_hal_executable_library_query_fn_t* library_query_fns,
     iree_hal_executable_import_provider_t import_provider,
     iree_allocator_t host_allocator,
     iree_hal_executable_loader_t** out_executable_loader);
diff --git a/iree/hal/local/loaders/system_library_loader.c b/iree/hal/local/loaders/system_library_loader.c
index d327eaa..f77f432 100644
--- a/iree/hal/local/loaders/system_library_loader.c
+++ b/iree/hal/local/loaders/system_library_loader.c
@@ -147,7 +147,8 @@
 
   // Query for a compatible version of the library.
   executable->library.header =
-      query_fn(IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION, /*reserved=*/NULL);
+      query_fn(IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION,
+               &executable->base.environment);
   if (!executable->library.header) {
     return iree_make_status(
         IREE_STATUS_FAILED_PRECONDITION,
@@ -209,14 +210,16 @@
   IREE_TRACE_ZONE_BEGIN(z0);
 
   // Pass all imports right through.
-  executable->base.import_thunk = iree_hal_system_executable_import_thunk_v0;
+  executable->base.environment.import_thunk =
+      iree_hal_system_executable_import_thunk_v0;
 
   // Allocate storage for the imports.
   IREE_RETURN_AND_END_ZONE_IF_ERROR(
-      z0, iree_allocator_malloc(
-              executable->base.host_allocator,
-              import_table->count * sizeof(*executable->base.imports),
-              (void**)&executable->base.imports));
+      z0,
+      iree_allocator_malloc(
+          executable->base.host_allocator,
+          import_table->count * sizeof(*executable->base.environment.imports),
+          (void**)&executable->base.environment.imports));
 
   // Try to resolve each import.
   // NOTE: imports are sorted alphabetically and if we cared we could use this
@@ -227,7 +230,7 @@
         z0,
         iree_hal_executable_import_provider_resolve(
             import_provider, iree_make_cstring_view(import_table->symbols[i]),
-            (void**)&executable->base.imports[i]));
+            (void**)&executable->base.environment.imports[i]));
   }
 
   IREE_TRACE_ZONE_END(z0);
@@ -302,6 +305,11 @@
 
   iree_dynamic_library_release(executable->handle);
 
+  if (executable->base.environment.imports != NULL) {
+    iree_allocator_free(host_allocator,
+                        (void*)executable->base.environment.imports);
+  }
+
   iree_hal_local_executable_deinitialize(
       (iree_hal_local_executable_t*)base_executable);
   iree_allocator_free(host_allocator, executable);
diff --git a/iree/hal/local/local_executable.c b/iree/hal/local/local_executable.c
index c98cbc2..cdcf828 100644
--- a/iree/hal/local/local_executable.c
+++ b/iree/hal/local/local_executable.c
@@ -7,6 +7,7 @@
 #include "iree/hal/local/local_executable.h"
 
 #include "iree/base/tracing.h"
+#include "iree/hal/local/executable_environment.h"
 
 void iree_hal_local_executable_initialize(
     const iree_hal_local_executable_vtable_t* vtable,
@@ -29,9 +30,9 @@
   // Function attributes are optional and populated by the parent type.
   out_base_executable->dispatch_attrs = NULL;
 
-  // Imports will be provided by the parent type, if needed.
-  out_base_executable->import_thunk = NULL;
-  out_base_executable->imports = NULL;
+  // Default environment with no imports assigned.
+  iree_hal_executable_environment_initialize(host_allocator,
+                                             &out_base_executable->environment);
 }
 
 void iree_hal_local_executable_deinitialize(
diff --git a/iree/hal/local/local_executable.h b/iree/hal/local/local_executable.h
index c55c867..7993295 100644
--- a/iree/hal/local/local_executable.h
+++ b/iree/hal/local/local_executable.h
@@ -28,12 +28,8 @@
   // of memory required by the function.
   const iree_hal_executable_dispatch_attrs_v0_t* dispatch_attrs;
 
-  // Thunk function for calling imports. All calls must be made through this.
-  iree_hal_executable_import_thunk_v0_t import_thunk;
-  // Optional imported functions available for use within the executable.
-  // Contains one entry per imported function. If an import was marked as weak
-  // then the corresponding entry may be NULL.
-  const iree_hal_executable_import_v0_t* imports;
+  // Execution environment.
+  iree_hal_executable_environment_v0_t environment;
 } iree_hal_local_executable_t;
 
 typedef struct iree_hal_local_executable_vtable_t {
diff --git a/iree/hal/local/task_command_buffer.c b/iree/hal/local/task_command_buffer.c
index 010a480..63efb51 100644
--- a/iree/hal/local/task_command_buffer.c
+++ b/iree/hal/local/task_command_buffer.c
@@ -13,6 +13,7 @@
 
 #include "iree/base/api.h"
 #include "iree/base/tracing.h"
+#include "iree/hal/local/executable_environment.h"
 #include "iree/hal/local/executable_library.h"
 #include "iree/hal/local/local_descriptor_set_layout.h"
 #include "iree/hal/local/local_executable.h"
@@ -838,11 +839,8 @@
   state.binding_lengths = (size_t*)cmd_ptr;
   cmd_ptr += cmd->binding_count * sizeof(*state.binding_lengths);
 
-  // When we support imports we can populate those here based on what the
-  // executable declared (as each executable may import a unique set of
-  // functions).
-  state.import_thunk = cmd->executable->import_thunk;
-  state.imports = cmd->executable->imports;
+  state.processor_id = tile_context->processor_id;
+  state.environment = &cmd->executable->environment;
 
   iree_status_t status = iree_hal_local_executable_issue_call(
       cmd->executable, cmd->ordinal, &state,
diff --git a/iree/samples/static_library/static_library_demo.c b/iree/samples/static_library/static_library_demo.c
index 8e1aeec..d787ab3 100644
--- a/iree/samples/static_library/static_library_demo.c
+++ b/iree/samples/static_library/static_library_demo.c
@@ -14,7 +14,8 @@
 
 extern const iree_hal_executable_library_header_t**
 simple_mul_dispatch_0_library_query(
-    iree_hal_executable_library_version_t max_version, void* reserved);
+    iree_hal_executable_library_version_t max_version,
+    const iree_hal_executable_environment_v0_t* environment);
 // A function to create the bytecode or C module.
 extern iree_status_t create_module(iree_vm_module_t** module);
 
@@ -25,25 +26,19 @@
 // released by the caller.
 iree_status_t create_device_with_static_loader(iree_allocator_t host_allocator,
                                                iree_hal_device_t** out_device) {
-  iree_status_t status = iree_ok_status();
-
   // Set paramters for the device created in the next step.
   iree_hal_sync_device_params_t params;
   iree_hal_sync_device_params_initialize(&params);
 
-  // Load the statically embedded library
-  const iree_hal_executable_library_header_t** static_library =
-      simple_mul_dispatch_0_library_query(
-          IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION, /*reserved=*/NULL);
-  const iree_hal_executable_library_header_t** libraries[1] = {static_library};
-
+  // Register the statically linked executable library.
+  const iree_hal_executable_library_query_fn_t libraries[] = {
+      simple_mul_dispatch_0_library_query,
+  };
   iree_hal_executable_loader_t* library_loader = NULL;
-  if (iree_status_is_ok(status)) {
-    status = iree_hal_static_library_loader_create(
-        IREE_ARRAYSIZE(libraries), libraries,
-        iree_hal_executable_import_provider_null(), host_allocator,
-        &library_loader);
-  }
+  iree_status_t status = iree_hal_static_library_loader_create(
+      IREE_ARRAYSIZE(libraries), libraries,
+      iree_hal_executable_import_provider_null(), host_allocator,
+      &library_loader);
 
   // Use the default host allocator for buffer allocations.
   iree_string_view_t identifier = iree_make_cstring_view("sync");
diff --git a/iree/task/BUILD b/iree/task/BUILD
index af417f5..1352b69 100644
--- a/iree/task/BUILD
+++ b/iree/task/BUILD
@@ -73,6 +73,7 @@
         "//iree/base:tracing",
         "//iree/base/internal",
         "//iree/base/internal:atomic_slist",
+        "//iree/base/internal:cpu",
         "//iree/base/internal:event_pool",
         "//iree/base/internal:fpu_state",
         "//iree/base/internal:prng",
diff --git a/iree/task/CMakeLists.txt b/iree/task/CMakeLists.txt
index bdddd8f..1d39c33 100644
--- a/iree/task/CMakeLists.txt
+++ b/iree/task/CMakeLists.txt
@@ -68,6 +68,7 @@
     iree::base::core_headers
     iree::base::internal
     iree::base::internal::atomic_slist
+    iree::base::internal::cpu
     iree::base::internal::event_pool
     iree::base::internal::fpu_state
     iree::base::internal::prng
diff --git a/iree/task/task.c b/iree/task/task.c
index f11bedc..afab9eb 100644
--- a/iree/task/task.c
+++ b/iree/task/task.c
@@ -706,7 +706,8 @@
 }
 
 void iree_task_dispatch_shard_execute(
-    iree_task_dispatch_shard_t* task, iree_byte_span_t worker_local_memory,
+    iree_task_dispatch_shard_t* task, iree_cpu_processor_id_t processor_id,
+    iree_byte_span_t worker_local_memory,
     iree_task_submission_t* pending_submission) {
   IREE_TRACE_ZONE_BEGIN(z0);
 
@@ -751,6 +752,9 @@
   memset(&shard_statistics, 0, sizeof(shard_statistics));
   tile_context.statistics = &shard_statistics;
 
+  // Hint as to which processor we are running on.
+  tile_context.processor_id = processor_id;
+
   // Loop over all tiles until they are all processed.
   const uint32_t tile_count = dispatch_task->tile_count;
   const uint32_t tiles_per_reservation = dispatch_task->tiles_per_reservation;
diff --git a/iree/task/task.h b/iree/task/task.h
index 0dc3958..9ae2290 100644
--- a/iree/task/task.h
+++ b/iree/task/task.h
@@ -14,6 +14,7 @@
 #include "iree/base/api.h"
 #include "iree/base/internal/atomic_slist.h"
 #include "iree/base/internal/atomics.h"
+#include "iree/base/internal/cpu.h"
 #include "iree/base/internal/synchronization.h"
 #include "iree/task/affinity_set.h"
 
@@ -539,8 +540,9 @@
   // Shared statistics counters for the dispatch shard.
   iree_task_dispatch_statistics_t* statistics;
 
-  // TODO(benvanik): cpuid uarch.
-  // TODO(benvanik): per-tile coroutine storage.
+  // Opaque ID of the processor executing the tile.
+  // May be slightly out of date or 0 if the processor could not be queried.
+  iree_cpu_processor_id_t processor_id;
 } iree_task_tile_context_t;
 
 typedef struct iree_task_dispatch_t iree_task_dispatch_t;
diff --git a/iree/task/task_impl.h b/iree/task/task_impl.h
index afb9ac8..ee1b5a3 100644
--- a/iree/task/task_impl.h
+++ b/iree/task/task_impl.h
@@ -111,13 +111,18 @@
 // May block the caller for an indeterminate amount of time and should only be
 // called from threads owned by or donated to the executor.
 //
+// |processor_id| is a guess as to which logical processor the shard is
+// executing on. It may be out of date or 0 if the processor could not be
+// queried.
+//
 // |worker_local_memory| is a block of memory exclusively available to the shard
 // during execution. Contents are undefined both before and after execution.
 //
 // Errors are propagated to the parent scope and the dispatch will fail once
 // all shards have completed.
 void iree_task_dispatch_shard_execute(
-    iree_task_dispatch_shard_t* task, iree_byte_span_t worker_local_memory,
+    iree_task_dispatch_shard_t* task, iree_cpu_processor_id_t processor_id,
+    iree_byte_span_t worker_local_memory,
     iree_task_submission_t* pending_submission);
 
 #ifdef __cplusplus
diff --git a/iree/task/worker.c b/iree/task/worker.c
index a367043..e0c4ea6 100644
--- a/iree/task/worker.c
+++ b/iree/task/worker.c
@@ -37,6 +37,8 @@
   iree_prng_minilcg128_initialize(iree_prng_splitmix64_next(seed_prng),
                                   &out_worker->theft_prng);
   out_worker->local_memory = local_memory;
+  out_worker->processor_id = 0;
+  out_worker->processor_tag = 0;
 
   iree_task_worker_state_t initial_state = IREE_TASK_WORKER_STATE_RUNNING;
   if (executor->scheduling_mode &
@@ -186,9 +188,9 @@
       break;
     }
     case IREE_TASK_TYPE_DISPATCH_SHARD: {
-      iree_task_dispatch_shard_execute((iree_task_dispatch_shard_t*)task,
-                                       worker->local_memory,
-                                       pending_submission);
+      iree_task_dispatch_shard_execute(
+          (iree_task_dispatch_shard_t*)task, worker->processor_id,
+          worker->local_memory, pending_submission);
       break;
     }
     default:
@@ -251,10 +253,20 @@
   return true;  // try again
 }
 
+// Updates the cached processor ID field in the worker.
+static void iree_task_worker_update_processor_id(iree_task_worker_t* worker) {
+  iree_cpu_requery_processor_id(&worker->processor_tag, &worker->processor_id);
+}
+
 // Alternates between pumping ready tasks in the worker queue and waiting
 // for more tasks to arrive. Only returns when the worker has been asked by
 // the executor to exit.
 static void iree_task_worker_pump_until_exit(iree_task_worker_t* worker) {
+  // Initial processor ID assignment. We normally refresh this upon waking from
+  // a wait but it's possible that there's already work pending and we want to
+  // be able to process it with the proper processor ID immediately.
+  iree_task_worker_update_processor_id(worker);
+
   // Pump the thread loop to process more tasks.
   while (true) {
     // If we fail to find any work to do we'll wait at the end of this loop.
@@ -277,6 +289,9 @@
       break;
     }
 
+    // TODO(benvanik): we could try to update the processor ID here before we
+    // begin a new batch of work - assuming it's not too expensive.
+
     iree_task_submission_t pending_submission;
     iree_task_submission_initialize(&pending_submission);
 
@@ -321,6 +336,10 @@
       iree_notification_commit_wait(&worker->wake_notification, wait_token,
                                     IREE_TIME_INFINITE_FUTURE);
       IREE_TRACE_ZONE_END(z_wait);
+
+      // Woke from a wait - query the processor ID in case we migrated during
+      // the sleep.
+      iree_task_worker_update_processor_id(worker);
     }
 
     // Wait completed.
diff --git a/iree/task/worker.h b/iree/task/worker.h
index 1cc2d0f..6e6c435 100644
--- a/iree/task/worker.h
+++ b/iree/task/worker.h
@@ -110,6 +110,15 @@
   // remain valid so that the executor can query its state.
   iree_thread_t* thread;
 
+  // Guess at the current processor ID.
+  // This is updated infrequently as it can be semi-expensive to determine
+  // (on some platforms at least 1 syscall involved). We always update it upon
+  // waking as idle waits are the most likely place the worker will be migrated
+  // across processors.
+  iree_cpu_processor_id_t processor_id;
+  // An opaque tag used to reduce the cost of processor ID queries.
+  iree_cpu_processor_tag_t processor_tag;
+
   // Destructive interference padding between the mailbox and local task queue
   // to ensure that the worker - who is pounding on local_task_queue - doesn't
   // contend with submissions or coordinators dropping new tasks in the mailbox.