Adding --task_worker_spin_us= flag to spin for a bit. The flag allows iree-* tools to override the spin value for benchmarking. The new iree_task_executor_options_t struct consolidates the existing options and exposes the setting for use by hosting applications. We default to not spinning because in general it's not a good thing to do.
diff --git a/experimental/web/sample_static/device_multithreaded.c b/experimental/web/sample_static/device_multithreaded.c index 24ad2f3..63854ee 100644 --- a/experimental/web/sample_static/device_multithreaded.c +++ b/experimental/web/sample_static/device_multithreaded.c
@@ -27,9 +27,9 @@ &library_loader); // Create a task executor. - iree_task_executor_t* executor = NULL; - iree_task_scheduling_mode_t scheduling_mode = 0; - iree_host_size_t worker_local_memory = 0; + iree_task_executor_options_t options; + iree_task_executor_options_initialize(&options); + options.worker_local_memory_size = 0; iree_task_topology_t topology; iree_task_topology_initialize(&topology); iree_task_topology_initialize_from_group_count( @@ -39,9 +39,9 @@ // INITIAL_MEMORY, or setting Emscripten's ALLOW_MEMORY_GROWTH. // iree_task_topology_initialize_from_group_count( // /*group_count=*/emscripten_num_logical_cores(), &topology); + iree_task_executor_t* executor = NULL; if (iree_status_is_ok(status)) { - status = iree_task_executor_create(scheduling_mode, &topology, - worker_local_memory, host_allocator, + status = iree_task_executor_create(options, &topology, host_allocator, &executor); } iree_task_topology_deinitialize(&topology);
diff --git a/runtime/src/iree/base/internal/synchronization.c b/runtime/src/iree/base/internal/synchronization.c index 08518d3..ac0a9d9 100644 --- a/runtime/src/iree/base/internal/synchronization.c +++ b/runtime/src/iree/base/internal/synchronization.c
@@ -612,6 +612,7 @@ bool iree_notification_commit_wait(iree_notification_t* notification, iree_wait_token_t wait_token, + iree_duration_t spin_ns, iree_time_t deadline_ns) { return true; } @@ -667,6 +668,7 @@ bool iree_notification_commit_wait(iree_notification_t* notification, iree_wait_token_t wait_token, + iree_duration_t spin_ns, iree_time_t deadline_ns) { struct timespec abs_ts = { .tv_sec = (time_t)(deadline_ns / 1000000000ull), @@ -769,21 +771,59 @@ return (iree_wait_token_t)(previous_value >> IREE_NOTIFICATION_EPOCH_SHIFT); } +typedef enum iree_notification_result_e { + IREE_NOTIFICATION_RESULT_UNRESOLVED = 0, + IREE_NOTIFICATION_RESULT_RESOLVED, + IREE_NOTIFICATION_RESULT_REJECTED, +} iree_notification_result_t; + +static iree_notification_result_t iree_notification_test_wait_condition( + iree_notification_t* notification, iree_wait_token_t wait_token) { + return (iree_atomic_load_int64(¬ification->value, + iree_memory_order_acquire) >> + IREE_NOTIFICATION_EPOCH_SHIFT) != wait_token + ? IREE_NOTIFICATION_RESULT_RESOLVED + : IREE_NOTIFICATION_RESULT_UNRESOLVED; +} + bool iree_notification_commit_wait(iree_notification_t* notification, iree_wait_token_t wait_token, + iree_duration_t spin_ns, iree_time_t deadline_ns) { - bool result = true; + // Quick check to see if the wait has already succeeded (the epoch advances + // from when it was captured in iree_notification_prepare_wait). + iree_notification_result_t result = + iree_notification_test_wait_condition(notification, wait_token); - // Spin until notified and the epoch increments from what we captured during - // iree_notification_prepare_wait. - while ((iree_atomic_load_int64(¬ification->value, - iree_memory_order_acquire) >> - IREE_NOTIFICATION_EPOCH_SHIFT) == wait_token) { - iree_status_code_t status_code = iree_futex_wait( - iree_notification_epoch_address(notification), wait_token, deadline_ns); - if (status_code != IREE_STATUS_OK) { - result = false; - break; + // If not already reached and spinning is enabled then we'll try that first. + if (result == IREE_NOTIFICATION_RESULT_UNRESOLVED && + spin_ns != IREE_DURATION_ZERO) { + // If spinning we need to compute the absolute deadline that we'll spin + // until (as we may be descheduled while spinning and time may drift). + const iree_time_t spin_deadline_ns = iree_time_now() + spin_ns; + IREE_TRACE_ZONE_BEGIN_NAMED(z0, "iree_notification_commit_wait_spin"); + do { + // Try to be nice to the processor when using SMT. + iree_processor_yield(); + result = iree_notification_test_wait_condition(notification, wait_token); + } while (result == IREE_NOTIFICATION_RESULT_UNRESOLVED && + iree_time_now() < spin_deadline_ns); + IREE_TRACE_ZONE_END(z0); + } + + // If spinning failed let the kernel do what it does ... okish at. + // We loop until notified and the epoch increments from what we captured + // during iree_notification_prepare_wait. + if (deadline_ns != IREE_TIME_INFINITE_PAST) { + while (result == IREE_NOTIFICATION_RESULT_UNRESOLVED) { + iree_status_code_t status_code = + iree_futex_wait(iree_notification_epoch_address(notification), + wait_token, deadline_ns); + if (status_code != IREE_STATUS_OK) { + result = IREE_NOTIFICATION_RESULT_REJECTED; + break; + } + result = iree_notification_test_wait_condition(notification, wait_token); } } @@ -795,7 +835,7 @@ iree_memory_order_acq_rel); SYNC_ASSERT((previous_value & IREE_NOTIFICATION_WAITER_MASK) != 0); - return result; + return result == IREE_NOTIFICATION_RESULT_RESOLVED; } void iree_notification_cancel_wait(iree_notification_t* notification) { @@ -833,6 +873,7 @@ return true; } else { if (!iree_notification_commit_wait(notification, wait_token, + /*spin_ns=*/IREE_DURATION_ZERO, deadline_ns)) { // Wait hit the deadline before we hit the condition. return false;
diff --git a/runtime/src/iree/base/internal/synchronization.h b/runtime/src/iree/base/internal/synchronization.h index 9ac4235..b6ec44b 100644 --- a/runtime/src/iree/base/internal/synchronization.h +++ b/runtime/src/iree/base/internal/synchronization.h
@@ -401,6 +401,9 @@ // is reached. Returns false if the deadline is reached before a notification is // posted. // +// If |spin_ns| is not IREE_DURATION_ZERO the wait _may_ spin for at least the +// specified duration before entering the system wait API. +// // Acts as (at least) a memory_order_acquire operation on the notification // object. This is meant to be paired with iree_notification_post, which is a // memory_order_release operation. This means the following guarantee: @@ -411,6 +414,7 @@ // memory read or write on thread T1. bool iree_notification_commit_wait(iree_notification_t* notification, iree_wait_token_t wait_token, + iree_duration_t spin_ns, iree_time_t deadline_ns); // Cancels a pending wait operation without blocking.
diff --git a/runtime/src/iree/task/api.c b/runtime/src/iree/task/api.c index 0e79ff7..f3d5c80 100644 --- a/runtime/src/iree/task/api.c +++ b/runtime/src/iree/task/api.c
@@ -17,6 +17,14 @@ // Executor configuration //===----------------------------------------------------------------------===// +IREE_FLAG( + int32_t, task_worker_spin_us, 0, + "Maximum duration in microseconds each worker should spin waiting for\n" + "additional work. In almost all cases this should be 0 as spinning is\n" + "often extremely harmful to system health. Only set to non-zero values\n" + "when latency is the #1 priority (vs. thermals, system-wide scheduling,\n" + "etc)."); + // TODO(benvanik): enable this when we use it - though hopefully we don't! IREE_FLAG( int32_t, task_worker_local_memory, 0, // 64 * 1024, @@ -27,6 +35,20 @@ "only use a specific maximum amount of local memory and the runtime must\n" "be configured to make at least that amount of local memory available."); +iree_status_t iree_task_executor_options_initialize_from_flags( + iree_task_executor_options_t* out_options) { + IREE_ASSERT_ARGUMENT(out_options); + iree_task_executor_options_initialize(out_options); + + out_options->worker_spin_ns = + (iree_duration_t)FLAG_task_worker_spin_us * 1000; + + out_options->worker_local_memory_size = + (iree_host_size_t)FLAG_task_worker_local_memory; + + return iree_ok_status(); +} + //===----------------------------------------------------------------------===// // Topology configuration //===----------------------------------------------------------------------===// @@ -56,6 +78,28 @@ // TODO(benvanik): add --task_topology_dump to dump out the current machine // configuration as seen by the topology utilities. +iree_status_t iree_task_topology_initialize_from_flags( + iree_task_topology_t* out_topology) { + IREE_ASSERT_ARGUMENT(out_topology); + iree_task_topology_initialize(out_topology); + + if (FLAG_task_topology_group_count != 0) { + iree_task_topology_initialize_from_group_count( + FLAG_task_topology_group_count, out_topology); + } else if (strcmp(FLAG_task_topology_mode, "physical_cores") == 0) { + iree_task_topology_initialize_from_physical_cores( + FLAG_task_topology_max_group_count, out_topology); + } else { + return iree_make_status( + IREE_STATUS_INVALID_ARGUMENT, + "one of --task_topology_group_count or --task_topology_mode must be " + "specified and be a valid value; have --task_topology_mode=%s.", + FLAG_task_topology_mode); + } + + return iree_ok_status(); +} + //===----------------------------------------------------------------------===// // Task system factory functions //===----------------------------------------------------------------------===// @@ -66,35 +110,16 @@ *out_executor = NULL; IREE_TRACE_ZONE_BEGIN(z0); - iree_task_scheduling_mode_t scheduling_mode = 0; - - iree_host_size_t worker_local_memory = - (iree_host_size_t)FLAG_task_worker_local_memory; - - iree_status_t status = iree_ok_status(); + iree_task_executor_options_t options; + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, iree_task_executor_options_initialize_from_flags(&options)); iree_task_topology_t topology; - iree_task_topology_initialize(&topology); + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, iree_task_topology_initialize_from_flags(&topology)); - if (FLAG_task_topology_group_count != 0) { - iree_task_topology_initialize_from_group_count( - FLAG_task_topology_group_count, &topology); - } else if (strcmp(FLAG_task_topology_mode, "physical_cores") == 0) { - iree_task_topology_initialize_from_physical_cores( - FLAG_task_topology_max_group_count, &topology); - } else { - status = iree_make_status( - IREE_STATUS_INVALID_ARGUMENT, - "one of --task_topology_group_count or --task_topology_mode must be " - "specified and be a valid value; have --task_topology_mode=%s.", - FLAG_task_topology_mode); - } - - if (iree_status_is_ok(status)) { - status = iree_task_executor_create(scheduling_mode, &topology, - worker_local_memory, host_allocator, - out_executor); - } + iree_status_t status = iree_task_executor_create( + options, &topology, host_allocator, out_executor); iree_task_topology_deinitialize(&topology);
diff --git a/runtime/src/iree/task/api.h b/runtime/src/iree/task/api.h index bebaf57..acd3e7e 100644 --- a/runtime/src/iree/task/api.h +++ b/runtime/src/iree/task/api.h
@@ -16,6 +16,20 @@ #endif // __cplusplus //===----------------------------------------------------------------------===// +// Flag parsing +//===----------------------------------------------------------------------===// + +// Initializes |out_options| from the command line flags. +// Used in place of iree_task_executor_options_initialize. +iree_status_t iree_task_executor_options_initialize_from_flags( + iree_task_executor_options_t* out_options); + +// Initializes |out_topology| from the command line flags. +// Used in place of iree_task_topology_initialize. +iree_status_t iree_task_topology_initialize_from_flags( + iree_task_topology_t* out_topology); + +//===----------------------------------------------------------------------===// // Task system factory functions //===----------------------------------------------------------------------===//
diff --git a/runtime/src/iree/task/executor.c b/runtime/src/iree/task/executor.c index d205291..2f2a6c7 100644 --- a/runtime/src/iree/task/executor.c +++ b/runtime/src/iree/task/executor.c
@@ -24,11 +24,15 @@ static void iree_task_executor_destroy(iree_task_executor_t* executor); -iree_status_t iree_task_executor_create( - iree_task_scheduling_mode_t scheduling_mode, - const iree_task_topology_t* topology, - iree_host_size_t worker_local_memory_size, iree_allocator_t allocator, - iree_task_executor_t** out_executor) { +void iree_task_executor_options_initialize( + iree_task_executor_options_t* out_options) { + memset(out_options, 0, sizeof(*out_options)); +} + +iree_status_t iree_task_executor_create(iree_task_executor_options_t options, + const iree_task_topology_t* topology, + iree_allocator_t allocator, + iree_task_executor_t** out_executor) { iree_host_size_t worker_count = iree_task_topology_group_count(topology); if (worker_count > IREE_TASK_EXECUTOR_MAX_WORKER_COUNT) { return iree_make_status( @@ -52,17 +56,19 @@ // The executor is followed in memory by worker[] + worker_local_memory[]. // The whole point is that we don't want destructive sharing between workers // so ensure we are aligned to at least the destructive interference size. - worker_local_memory_size = iree_host_align( - worker_local_memory_size, iree_hardware_destructive_interference_size); - IREE_TRACE_ZONE_APPEND_VALUE(z0, (int64_t)worker_local_memory_size); + options.worker_local_memory_size = + iree_host_align(options.worker_local_memory_size, + iree_hardware_destructive_interference_size); + IREE_TRACE_ZONE_APPEND_VALUE(z0, (int64_t)options.worker_local_memory_size); iree_host_size_t executor_base_size = iree_host_align(sizeof(iree_task_executor_t), iree_hardware_destructive_interference_size); iree_host_size_t worker_list_size = iree_host_align(worker_count * sizeof(iree_task_worker_t), iree_hardware_destructive_interference_size); - iree_host_size_t executor_size = executor_base_size + worker_list_size + - worker_count * worker_local_memory_size; + iree_host_size_t executor_size = + executor_base_size + worker_list_size + + worker_count * options.worker_local_memory_size; iree_task_executor_t* executor = NULL; IREE_RETURN_AND_END_ZONE_IF_ERROR( @@ -70,7 +76,8 @@ memset(executor, 0, executor_size); iree_atomic_ref_count_init(&executor->ref_count); executor->allocator = allocator; - executor->scheduling_mode = scheduling_mode; + executor->scheduling_mode = options.scheduling_mode; + executor->worker_spin_ns = options.worker_spin_ns; iree_atomic_task_slist_initialize(&executor->incoming_ready_slist); iree_slim_mutex_initialize(&executor->coordinator_mutex); @@ -137,9 +144,10 @@ iree_task_worker_t* worker = &executor->workers[i]; status = iree_task_worker_initialize( executor, i, iree_task_topology_get_group(topology, i), - iree_make_byte_span(worker_local_memory, worker_local_memory_size), + iree_make_byte_span(worker_local_memory, + options.worker_local_memory_size), &seed_prng, worker); - worker_local_memory += worker_local_memory_size; + worker_local_memory += options.worker_local_memory_size; if (!iree_status_is_ok(status)) break; } // The masks are accessed with 'relaxed' order because they are just hints.
diff --git a/runtime/src/iree/task/executor.h b/runtime/src/iree/task/executor.h index 7b1ee31..7ddd46f 100644 --- a/runtime/src/iree/task/executor.h +++ b/runtime/src/iree/task/executor.h
@@ -268,24 +268,45 @@ }; typedef uint32_t iree_task_scheduling_mode_t; +// Options controlling task executor behavior. +typedef struct iree_task_executor_options_t { + // Specifies the schedule mode used for worker and workload balancing. + iree_task_scheduling_mode_t scheduling_mode; + + // TODO(benvanik): add a scope_spin_ns to control wait-idle and other + // scope-related waits coming from outside of the task system. + + // Maximum duration in nanoseconds each worker should spin waiting for + // additional work. In almost all cases this should be IREE_DURATION_ZERO as + // spinning is often extremely harmful to system health. Only set to non-zero + // values when latency is the #1 priority (over thermals, system-wide + // scheduling, and the environment). + iree_duration_t worker_spin_ns; + + // Defines the bytes to be allocated and reserved by each worker to use for + // local memory operations. Will be rounded up to the next power of two. + // Dispatches performed will be able to request up to this amount of memory + // for their invocations and no more. May be 0 if no worker local memory is + // required. + iree_host_size_t worker_local_memory_size; +} iree_task_executor_options_t; + +// Initializes |out_options| to default values. +void iree_task_executor_options_initialize( + iree_task_executor_options_t* out_options); + // Base task system executor interface. typedef struct iree_task_executor_t iree_task_executor_t; // Creates a task executor using the specified topology. -// -// |worker_local_memory_size| defines the bytes to be allocated and reserved for -// each worker to use for local memory operations. Will be rounded up to the -// next power of two. Dispatches performed will be able to request up to this -// amount of memory for their invocations and no more. May be 0 if no worker -// local memory is required. -// +// |options| must be initialized with iree_task_executor_options_initialize by +// callers and then overridden as required. // |topology| is only used during creation and need not live beyond this call. // |out_executor| must be released by the caller. -iree_status_t iree_task_executor_create( - iree_task_scheduling_mode_t scheduling_mode, - const iree_task_topology_t* topology, - iree_host_size_t worker_local_memory_size, iree_allocator_t allocator, - iree_task_executor_t** out_executor); +iree_status_t iree_task_executor_create(iree_task_executor_options_t options, + const iree_task_topology_t* topology, + iree_allocator_t allocator, + iree_task_executor_t** out_executor); // Retains the given |executor| for the caller. void iree_task_executor_retain(iree_task_executor_t* executor);
diff --git a/runtime/src/iree/task/executor_demo.cc b/runtime/src/iree/task/executor_demo.cc index b8869d8..6d3b08a 100644 --- a/runtime/src/iree/task/executor_demo.cc +++ b/runtime/src/iree/task/executor_demo.cc
@@ -51,13 +51,12 @@ iree_task_topology_initialize_from_group_count(/*group_count=*/6, &topology); #endif + iree_task_executor_options_t options; + iree_task_executor_options_initialize(&options); + options.worker_local_memory_size = 0; // 64 * 1024; iree_task_executor_t* executor = NULL; - iree_task_scheduling_mode_t scheduling_mode = - IREE_TASK_SCHEDULING_MODE_RESERVED; - iree_host_size_t worker_local_memory_size = 0; // 64 * 1024; - IREE_CHECK_OK(iree_task_executor_create(scheduling_mode, &topology, - worker_local_memory_size, allocator, - &executor)); + IREE_CHECK_OK( + iree_task_executor_create(options, &topology, allocator, &executor)); iree_task_topology_deinitialize(&topology); //
diff --git a/runtime/src/iree/task/executor_impl.h b/runtime/src/iree/task/executor_impl.h index 35e5419..98b410c 100644 --- a/runtime/src/iree/task/executor_impl.h +++ b/runtime/src/iree/task/executor_impl.h
@@ -34,6 +34,10 @@ // TODO(benvanik): make mutable; currently always the same reserved value. iree_task_scheduling_mode_t scheduling_mode; + // Time each worker should spin before parking itself to wait for more work. + // IREE_DURATION_ZERO is used to disable spinning. + iree_duration_t worker_spin_ns; + // State used by the work-stealing operations performed by donated threads. // This is **NOT SYNCHRONIZED** and relies on the fact that we actually don't // much care about the precise selection of workers enough to mind any tears
diff --git a/runtime/src/iree/task/executor_test.cc b/runtime/src/iree/task/executor_test.cc index 7e96a8e..5a1a791 100644 --- a/runtime/src/iree/task/executor_test.cc +++ b/runtime/src/iree/task/executor_test.cc
@@ -22,13 +22,12 @@ iree_task_topology_initialize_from_group_count(/*group_count=*/4, &topology); for (int i = 0; i < 100; ++i) { + iree_task_executor_options_t options; + iree_task_executor_options_initialize(&options); + options.worker_local_memory_size = 64 * 1024; iree_task_executor_t* executor = NULL; - iree_task_scheduling_mode_t scheduling_mode = - IREE_TASK_SCHEDULING_MODE_RESERVED; - iree_host_size_t worker_local_memory_size = 64 * 1024; IREE_ASSERT_OK(iree_task_executor_create( - scheduling_mode, &topology, worker_local_memory_size, - iree_allocator_system(), &executor)); + options, &topology, iree_allocator_system(), &executor)); // -- idle -- iree_task_executor_release(executor); } @@ -43,13 +42,12 @@ iree_task_topology_initialize_from_group_count(/*group_count=*/4, &topology); for (int i = 0; i < 100; ++i) { + iree_task_executor_options_t options; + iree_task_executor_options_initialize(&options); + options.worker_local_memory_size = 64 * 1024; iree_task_executor_t* executor = NULL; - iree_task_scheduling_mode_t scheduling_mode = - IREE_TASK_SCHEDULING_MODE_RESERVED; - iree_host_size_t worker_local_memory_size = 64 * 1024; IREE_ASSERT_OK(iree_task_executor_create( - scheduling_mode, &topology, worker_local_memory_size, - iree_allocator_system(), &executor)); + options, &topology, iree_allocator_system(), &executor)); iree_task_scope_t scope; iree_task_scope_initialize(iree_make_cstring_view("scope"), &scope); @@ -90,14 +88,13 @@ // Tests heavily serialized submission to an executor. // This puts pressure on the overheads involved in spilling up threads. TEST(ExecutorTest, SubmissionStress) { + iree_task_executor_options_t options; + iree_task_executor_options_initialize(&options); + options.worker_local_memory_size = 64 * 1024; iree_task_topology_t topology; iree_task_topology_initialize_from_group_count(/*group_count=*/4, &topology); iree_task_executor_t* executor = NULL; - iree_task_scheduling_mode_t scheduling_mode = - IREE_TASK_SCHEDULING_MODE_RESERVED; - iree_host_size_t worker_local_memory_size = 64 * 1024; - IREE_ASSERT_OK(iree_task_executor_create(scheduling_mode, &topology, - worker_local_memory_size, + IREE_ASSERT_OK(iree_task_executor_create(options, &topology, iree_allocator_system(), &executor)); iree_task_scope_t scope; iree_task_scope_initialize(iree_make_cstring_view("scope"), &scope);
diff --git a/runtime/src/iree/task/testing/task_test.h b/runtime/src/iree/task/testing/task_test.h index 12068e6..4d3a74c 100644 --- a/runtime/src/iree/task/testing/task_test.h +++ b/runtime/src/iree/task/testing/task_test.h
@@ -22,12 +22,13 @@ class TaskTest : public ::testing::Test { protected: virtual void SetUp() { + iree_task_executor_options_t options; + options.worker_local_memory_size = 64 * 1024; + iree_task_executor_options_initialize(&options); iree_task_topology_t topology; iree_task_topology_initialize_from_group_count(8, &topology); - IREE_ASSERT_OK( - iree_task_executor_create(IREE_TASK_SCHEDULING_MODE_RESERVED, &topology, - /*worker_local_memory_size=*/(64 * 1024), - iree_allocator_system(), &executor_)); + IREE_ASSERT_OK(iree_task_executor_create( + options, &topology, iree_allocator_system(), &executor_)); iree_task_topology_deinitialize(&topology); iree_task_scope_initialize(iree_make_cstring_view("scope"), &scope_);
diff --git a/runtime/src/iree/task/worker.c b/runtime/src/iree/task/worker.c index cda11ad..5252bec 100644 --- a/runtime/src/iree/task/worker.c +++ b/runtime/src/iree/task/worker.c
@@ -326,10 +326,14 @@ // Have more work to do; loop around to try another pump. iree_notification_cancel_wait(&worker->wake_notification); } else { + // Spin/wait in the kernel. We don't care if the condition fails as we're + // just using it as a pulse. IREE_TRACE_ZONE_BEGIN_NAMED(z_wait, "iree_task_worker_main_pump_wake_wait"); - iree_notification_commit_wait(&worker->wake_notification, wait_token, - IREE_TIME_INFINITE_FUTURE); + iree_notification_commit_wait( + &worker->wake_notification, wait_token, + /*spin_ns=*/worker->executor->worker_spin_ns, + /*deadline_ns=*/IREE_TIME_INFINITE_FUTURE); IREE_TRACE_ZONE_END(z_wait); // Woke from a wait - query the processor ID in case we migrated during