Revert "Early-exit from try_steal_task when there are no victims. (#6879)" (#6895)
This reverts commit 84438926446e11696f91022a22f9e2f7db052824.
diff --git a/iree/task/executor.c b/iree/task/executor.c
index 5252b35..54de4ed 100644
--- a/iree/task/executor.c
+++ b/iree/task/executor.c
@@ -710,21 +710,15 @@
iree_task_affinity_set_t constructive_sharing_mask,
uint32_t max_theft_attempts, iree_prng_minilcg128_state_t* theft_prng,
iree_task_queue_t* local_task_queue) {
+ IREE_TRACE_ZONE_BEGIN(z0);
+
// Limit the workers we will steal from to the ones that are currently live
// and not idle.
iree_task_affinity_set_t victim_mask =
- iree_atomic_task_affinity_set_load(&executor->worker_pending_mask,
- iree_memory_order_relaxed) &
iree_atomic_task_affinity_set_load(&executor->worker_live_mask,
iree_memory_order_relaxed) &
~iree_atomic_task_affinity_set_load(&executor->worker_idle_mask,
iree_memory_order_relaxed);
- if (!victim_mask) {
- // No live workers to steal from; early-exit.
- return NULL;
- }
-
- IREE_TRACE_ZONE_BEGIN(z0);
// TODO(benvanik): it may be possible to rework this such that we better
// use the prng; for example, instead of all this rotating stuff we could just
diff --git a/iree/task/executor_impl.h b/iree/task/executor_impl.h
index ce00fb9..f15ed56 100644
--- a/iree/task/executor_impl.h
+++ b/iree/task/executor_impl.h
@@ -94,12 +94,6 @@
// on already woken workers.
iree_atomic_task_affinity_set_t worker_idle_mask;
- // A bitset indicating which workers have tasks pending in their queues.
- // This does not include tasks that are in-flight; it's just queue depth > 0.
- // Note that this may not be coherent with any of the other masks and should
- // only be used for probabilistic queries ("_could_ we use worker X?" vs can).
- iree_atomic_task_affinity_set_t worker_pending_mask;
-
// Specifies how many workers threads there are.
// For now this number is fixed per executor however if we wanted to enable
// live join/leave behavior we could change this to a registration mechanism.
diff --git a/iree/task/post_batch.c b/iree/task/post_batch.c
index 7201558..05b4e19 100644
--- a/iree/task/post_batch.c
+++ b/iree/task/post_batch.c
@@ -164,7 +164,6 @@
int target_index = worker_index + offset;
worker_index += offset + 1;
worker_mask = iree_shr(worker_mask, offset + 1);
- worker_wake_mask |= iree_task_affinity_for_worker(target_index);
iree_task_worker_t* worker = &post_batch->executor->workers[target_index];
iree_task_list_t* target_pending_lifo =
@@ -178,19 +177,12 @@
target_pending_lifo);
} else {
iree_task_worker_post_tasks(worker, target_pending_lifo);
+ worker_wake_mask |= iree_task_affinity_for_worker(target_index);
}
}
- // Mark workers we are about to wake as having pending work.
- iree_atomic_task_affinity_set_fetch_or(
- &post_batch->executor->worker_pending_mask, worker_wake_mask,
- iree_memory_order_seq_cst);
-
// Wake all workers that now have pending work. If a worker is not already
- // waiting this will be cheap (no syscall). We also don't try to wake the
- // current worker doing the posting.
- worker_wake_mask ^=
- post_batch->current_worker ? post_batch->current_worker->worker_bit : 0;
+ // waiting this will be cheap (no syscall).
if (worker_wake_mask != 0) {
iree_task_post_batch_wake_workers(post_batch, worker_wake_mask);
}
diff --git a/iree/task/queue.c b/iree/task/queue.c
index ce5b618..9a6986b 100644
--- a/iree/task/queue.c
+++ b/iree/task/queue.c
@@ -43,8 +43,7 @@
}
iree_task_t* iree_task_queue_flush_from_lifo_slist(
- iree_task_queue_t* queue, iree_atomic_task_slist_t* source_slist,
- bool* out_empty) {
+ iree_task_queue_t* queue, iree_atomic_task_slist_t* source_slist) {
// Perform the flush and swap outside of the lock; acquiring the list is
// atomic and then we own it exclusively.
iree_task_list_t suffix;
@@ -57,17 +56,14 @@
iree_slim_mutex_lock(&queue->mutex);
if (did_flush) iree_task_list_append(&queue->list, &suffix);
iree_task_t* next_task = iree_task_list_pop_front(&queue->list);
- *out_empty = iree_task_list_is_empty(&queue->list);
iree_slim_mutex_unlock(&queue->mutex);
return next_task;
}
-iree_task_t* iree_task_queue_pop_front(iree_task_queue_t* queue,
- bool* out_empty) {
+iree_task_t* iree_task_queue_pop_front(iree_task_queue_t* queue) {
iree_slim_mutex_lock(&queue->mutex);
iree_task_t* next_task = iree_task_list_pop_front(&queue->list);
- *out_empty = iree_task_list_is_empty(&queue->list);
iree_slim_mutex_unlock(&queue->mutex);
return next_task;
}
diff --git a/iree/task/queue.h b/iree/task/queue.h
index 74ca4ad..917b872 100644
--- a/iree/task/queue.h
+++ b/iree/task/queue.h
@@ -138,19 +138,15 @@
// Flushes the |source_slist| LIFO mailbox into the task queue in FIFO order.
// Returns the first task in the queue upon success; the task may be
// pre-existing or from the newly flushed tasks.
-// |out_empty| is set to true if the queue is empty after the pop.
//
// Must only be called from the owning worker's thread.
iree_task_t* iree_task_queue_flush_from_lifo_slist(
- iree_task_queue_t* queue, iree_atomic_task_slist_t* source_slist,
- bool* out_empty);
+ iree_task_queue_t* queue, iree_atomic_task_slist_t* source_slist);
// Pops a task from the front of the queue if any are available.
-// |out_empty| is set to true if the queue is empty after the pop.
//
// Must only be called from the owning worker's thread.
-iree_task_t* iree_task_queue_pop_front(iree_task_queue_t* queue,
- bool* out_empty);
+iree_task_t* iree_task_queue_pop_front(iree_task_queue_t* queue);
// Tries to steal up to |max_tasks| from the back of the queue.
// Returns NULL if no tasks are available and otherwise up to |max_tasks| tasks
diff --git a/iree/task/queue_test.cc b/iree/task/queue_test.cc
index ea04725..53342fd 100644
--- a/iree/task/queue_test.cc
+++ b/iree/task/queue_test.cc
@@ -17,25 +17,19 @@
}
TEST(QueueTest, Empty) {
- bool empty = false;
-
iree_task_queue_t queue;
iree_task_queue_initialize(&queue);
EXPECT_TRUE(iree_task_queue_is_empty(&queue));
- EXPECT_FALSE(iree_task_queue_pop_front(&queue, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_FALSE(iree_task_queue_pop_front(&queue));
iree_task_queue_deinitialize(&queue);
}
TEST(QueueTest, PushPop) {
- bool empty = false;
-
iree_task_queue_t queue;
iree_task_queue_initialize(&queue);
EXPECT_TRUE(iree_task_queue_is_empty(&queue));
- EXPECT_FALSE(iree_task_queue_pop_front(&queue, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_FALSE(iree_task_queue_pop_front(&queue));
iree_task_t task_a = {0};
iree_task_queue_push_front(&queue, &task_a);
@@ -46,15 +40,13 @@
iree_task_queue_push_front(&queue, &task_b);
EXPECT_FALSE(iree_task_queue_is_empty(&queue));
- EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue, &empty));
- EXPECT_FALSE(empty);
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue));
EXPECT_FALSE(iree_task_queue_is_empty(&queue));
- EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue));
EXPECT_TRUE(iree_task_queue_is_empty(&queue));
- EXPECT_FALSE(iree_task_queue_pop_front(&queue, &empty));
+ EXPECT_FALSE(iree_task_queue_pop_front(&queue));
iree_task_queue_deinitialize(&queue);
}
@@ -86,9 +78,7 @@
EXPECT_FALSE(iree_task_queue_is_empty(&queue));
EXPECT_TRUE(iree_task_list_is_empty(&list));
- bool empty = false;
- EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue));
EXPECT_TRUE(iree_task_queue_is_empty(&queue));
iree_task_queue_deinitialize(&queue);
@@ -112,11 +102,8 @@
EXPECT_TRUE(iree_task_list_is_empty(&list));
// Pop list and ensure order: a->b.
- bool empty = false;
- EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue, &empty));
- EXPECT_FALSE(empty);
- EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue));
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue));
EXPECT_TRUE(iree_task_queue_is_empty(&queue));
iree_task_queue_deinitialize(&queue);
@@ -129,10 +116,8 @@
iree_atomic_task_slist_t slist;
iree_atomic_task_slist_initialize(&slist);
- bool empty = false;
EXPECT_TRUE(iree_task_queue_is_empty(&queue));
- EXPECT_FALSE(iree_task_queue_flush_from_lifo_slist(&queue, &slist, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_FALSE(iree_task_queue_flush_from_lifo_slist(&queue, &slist));
EXPECT_TRUE(iree_task_queue_is_empty(&queue));
iree_atomic_task_slist_deinitialize(&slist);
@@ -149,11 +134,8 @@
iree_task_t task_a = {0};
iree_atomic_task_slist_push(&slist, &task_a);
- bool empty = false;
EXPECT_TRUE(iree_task_queue_is_empty(&queue));
- EXPECT_EQ(&task_a,
- iree_task_queue_flush_from_lifo_slist(&queue, &slist, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_EQ(&task_a, iree_task_queue_flush_from_lifo_slist(&queue, &slist));
EXPECT_TRUE(iree_task_queue_is_empty(&queue));
iree_atomic_task_slist_deinitialize(&slist);
@@ -177,18 +159,13 @@
// Flush the list to the queue; it should swap LIFO->FIFO and return the
// first task in the queue.
- bool empty = false;
EXPECT_TRUE(iree_task_queue_is_empty(&queue));
- EXPECT_EQ(&task_a,
- iree_task_queue_flush_from_lifo_slist(&queue, &slist, &empty));
- EXPECT_FALSE(empty);
+ EXPECT_EQ(&task_a, iree_task_queue_flush_from_lifo_slist(&queue, &slist));
EXPECT_FALSE(iree_task_queue_is_empty(&queue));
// Pop list and ensure order: [a->]b->c.
- EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue, &empty));
- EXPECT_FALSE(empty);
- EXPECT_EQ(&task_c, iree_task_queue_pop_front(&queue, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue));
+ EXPECT_EQ(&task_c, iree_task_queue_pop_front(&queue));
EXPECT_TRUE(iree_task_queue_is_empty(&queue));
iree_atomic_task_slist_deinitialize(&slist);
@@ -251,11 +228,8 @@
iree_task_queue_try_steal(&source_queue, &target_queue, 1));
EXPECT_TRUE(iree_task_queue_is_empty(&target_queue));
- bool empty = false;
- EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue, &empty));
- EXPECT_FALSE(empty);
- EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue));
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue));
EXPECT_TRUE(iree_task_queue_is_empty(&source_queue));
iree_task_queue_deinitialize(&source_queue);
@@ -279,13 +253,10 @@
EXPECT_EQ(&task_existing,
iree_task_queue_try_steal(&source_queue, &target_queue, 1));
- bool empty = false;
- EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue));
EXPECT_TRUE(iree_task_queue_is_empty(&source_queue));
- EXPECT_EQ(&task_b, iree_task_queue_pop_front(&target_queue, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&target_queue));
EXPECT_TRUE(iree_task_queue_is_empty(&target_queue));
iree_task_queue_deinitialize(&source_queue);
@@ -307,17 +278,13 @@
iree_task_queue_push_front(&source_queue, &task_b);
iree_task_queue_push_front(&source_queue, &task_a);
- bool empty = false;
EXPECT_EQ(&task_c,
iree_task_queue_try_steal(&source_queue, &target_queue, 2));
- EXPECT_EQ(&task_d, iree_task_queue_pop_front(&target_queue, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_EQ(&task_d, iree_task_queue_pop_front(&target_queue));
EXPECT_TRUE(iree_task_queue_is_empty(&target_queue));
- EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue, &empty));
- EXPECT_FALSE(empty);
- EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue));
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue));
EXPECT_TRUE(iree_task_queue_is_empty(&source_queue));
iree_task_queue_deinitialize(&source_queue);
@@ -339,17 +306,13 @@
iree_task_queue_push_front(&source_queue, &task_b);
iree_task_queue_push_front(&source_queue, &task_a);
- bool empty = false;
EXPECT_EQ(&task_c,
iree_task_queue_try_steal(&source_queue, &target_queue, 1000));
- EXPECT_EQ(&task_d, iree_task_queue_pop_front(&target_queue, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_EQ(&task_d, iree_task_queue_pop_front(&target_queue));
EXPECT_TRUE(iree_task_queue_is_empty(&target_queue));
- EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue, &empty));
- EXPECT_FALSE(empty);
- EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue, &empty));
- EXPECT_TRUE(empty);
+ EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue));
+ EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue));
EXPECT_TRUE(iree_task_queue_is_empty(&source_queue));
iree_task_queue_deinitialize(&source_queue);
diff --git a/iree/task/worker.c b/iree/task/worker.c
index e203fd8..f383e0e 100644
--- a/iree/task/worker.c
+++ b/iree/task/worker.c
@@ -30,7 +30,7 @@
out_worker->worker_bit = iree_task_affinity_for_worker(worker_index);
out_worker->ideal_thread_affinity = topology_group->ideal_thread_affinity;
out_worker->constructive_sharing_mask =
- topology_group->constructive_sharing_mask ^ out_worker->worker_bit;
+ topology_group->constructive_sharing_mask;
out_worker->max_theft_attempts =
executor->worker_count / IREE_TASK_EXECUTOR_MAX_THEFT_ATTEMPTS_DIVISOR;
iree_prng_minilcg128_initialize(iree_prng_splitmix64_next(seed_prng),
@@ -216,9 +216,7 @@
// Check the local work queue for any work we know we should start
// processing immediately. Other workers may try to steal some of this work
// if we take too long.
- bool queue_empty = false;
- iree_task_t* task =
- iree_task_queue_pop_front(&worker->local_task_queue, &queue_empty);
+ iree_task_t* task = iree_task_queue_pop_front(&worker->local_task_queue);
// Check the mailbox to see if we have incoming work that has been posted.
// We try to greedily move it to our local work list so that we can work
@@ -230,17 +228,8 @@
// first place (large uneven workloads for various workers, bad distribution
// in the face of heterogenous multi-core architectures where some workers
// complete tasks faster than others, etc).
- task = iree_task_queue_flush_from_lifo_slist(
- &worker->local_task_queue, &worker->mailbox_slist, &queue_empty);
- }
- if (queue_empty) {
- // Queue depth is 0; we may have gotten a task but we know that there's
- // nothing else waiting in the next loop so we can tell other workers not
- // to try to steal tasks from us. New submissions that come in after this
- // (while we are processing the task we may have gotten) will reset the bit.
- iree_atomic_task_affinity_set_fetch_and(
- &worker->executor->worker_pending_mask, ~worker->worker_bit,
- iree_memory_order_seq_cst);
+ task = iree_task_queue_flush_from_lifo_slist(&worker->local_task_queue,
+ &worker->mailbox_slist);
}
// If we ran out of work assigned to this specific worker try to steal some