Revert "Early-exit from try_steal_task when there are no victims. (#6879)" (#6895)

This reverts commit 84438926446e11696f91022a22f9e2f7db052824.
diff --git a/iree/task/executor.c b/iree/task/executor.c
index 5252b35..54de4ed 100644
--- a/iree/task/executor.c
+++ b/iree/task/executor.c
@@ -710,21 +710,15 @@
     iree_task_affinity_set_t constructive_sharing_mask,
     uint32_t max_theft_attempts, iree_prng_minilcg128_state_t* theft_prng,
     iree_task_queue_t* local_task_queue) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+
   // Limit the workers we will steal from to the ones that are currently live
   // and not idle.
   iree_task_affinity_set_t victim_mask =
-      iree_atomic_task_affinity_set_load(&executor->worker_pending_mask,
-                                         iree_memory_order_relaxed) &
       iree_atomic_task_affinity_set_load(&executor->worker_live_mask,
                                          iree_memory_order_relaxed) &
       ~iree_atomic_task_affinity_set_load(&executor->worker_idle_mask,
                                           iree_memory_order_relaxed);
-  if (!victim_mask) {
-    // No live workers to steal from; early-exit.
-    return NULL;
-  }
-
-  IREE_TRACE_ZONE_BEGIN(z0);
 
   // TODO(benvanik): it may be possible to rework this such that we better
   // use the prng; for example, instead of all this rotating stuff we could just
diff --git a/iree/task/executor_impl.h b/iree/task/executor_impl.h
index ce00fb9..f15ed56 100644
--- a/iree/task/executor_impl.h
+++ b/iree/task/executor_impl.h
@@ -94,12 +94,6 @@
   // on already woken workers.
   iree_atomic_task_affinity_set_t worker_idle_mask;
 
-  // A bitset indicating which workers have tasks pending in their queues.
-  // This does not include tasks that are in-flight; it's just queue depth > 0.
-  // Note that this may not be coherent with any of the other masks and should
-  // only be used for probabilistic queries ("_could_ we use worker X?" vs can).
-  iree_atomic_task_affinity_set_t worker_pending_mask;
-
   // Specifies how many workers threads there are.
   // For now this number is fixed per executor however if we wanted to enable
   // live join/leave behavior we could change this to a registration mechanism.
diff --git a/iree/task/post_batch.c b/iree/task/post_batch.c
index 7201558..05b4e19 100644
--- a/iree/task/post_batch.c
+++ b/iree/task/post_batch.c
@@ -164,7 +164,6 @@
     int target_index = worker_index + offset;
     worker_index += offset + 1;
     worker_mask = iree_shr(worker_mask, offset + 1);
-    worker_wake_mask |= iree_task_affinity_for_worker(target_index);
 
     iree_task_worker_t* worker = &post_batch->executor->workers[target_index];
     iree_task_list_t* target_pending_lifo =
@@ -178,19 +177,12 @@
                                                    target_pending_lifo);
     } else {
       iree_task_worker_post_tasks(worker, target_pending_lifo);
+      worker_wake_mask |= iree_task_affinity_for_worker(target_index);
     }
   }
 
-  // Mark workers we are about to wake as having pending work.
-  iree_atomic_task_affinity_set_fetch_or(
-      &post_batch->executor->worker_pending_mask, worker_wake_mask,
-      iree_memory_order_seq_cst);
-
   // Wake all workers that now have pending work. If a worker is not already
-  // waiting this will be cheap (no syscall). We also don't try to wake the
-  // current worker doing the posting.
-  worker_wake_mask ^=
-      post_batch->current_worker ? post_batch->current_worker->worker_bit : 0;
+  // waiting this will be cheap (no syscall).
   if (worker_wake_mask != 0) {
     iree_task_post_batch_wake_workers(post_batch, worker_wake_mask);
   }
diff --git a/iree/task/queue.c b/iree/task/queue.c
index ce5b618..9a6986b 100644
--- a/iree/task/queue.c
+++ b/iree/task/queue.c
@@ -43,8 +43,7 @@
 }
 
 iree_task_t* iree_task_queue_flush_from_lifo_slist(
-    iree_task_queue_t* queue, iree_atomic_task_slist_t* source_slist,
-    bool* out_empty) {
+    iree_task_queue_t* queue, iree_atomic_task_slist_t* source_slist) {
   // Perform the flush and swap outside of the lock; acquiring the list is
   // atomic and then we own it exclusively.
   iree_task_list_t suffix;
@@ -57,17 +56,14 @@
   iree_slim_mutex_lock(&queue->mutex);
   if (did_flush) iree_task_list_append(&queue->list, &suffix);
   iree_task_t* next_task = iree_task_list_pop_front(&queue->list);
-  *out_empty = iree_task_list_is_empty(&queue->list);
   iree_slim_mutex_unlock(&queue->mutex);
 
   return next_task;
 }
 
-iree_task_t* iree_task_queue_pop_front(iree_task_queue_t* queue,
-                                       bool* out_empty) {
+iree_task_t* iree_task_queue_pop_front(iree_task_queue_t* queue) {
   iree_slim_mutex_lock(&queue->mutex);
   iree_task_t* next_task = iree_task_list_pop_front(&queue->list);
-  *out_empty = iree_task_list_is_empty(&queue->list);
   iree_slim_mutex_unlock(&queue->mutex);
   return next_task;
 }
diff --git a/iree/task/queue.h b/iree/task/queue.h
index 74ca4ad..917b872 100644
--- a/iree/task/queue.h
+++ b/iree/task/queue.h
@@ -138,19 +138,15 @@
 // Flushes the |source_slist| LIFO mailbox into the task queue in FIFO order.
 // Returns the first task in the queue upon success; the task may be
 // pre-existing or from the newly flushed tasks.
-// |out_empty| is set to true if the queue is empty after the pop.
 //
 // Must only be called from the owning worker's thread.
 iree_task_t* iree_task_queue_flush_from_lifo_slist(
-    iree_task_queue_t* queue, iree_atomic_task_slist_t* source_slist,
-    bool* out_empty);
+    iree_task_queue_t* queue, iree_atomic_task_slist_t* source_slist);
 
 // Pops a task from the front of the queue if any are available.
-// |out_empty| is set to true if the queue is empty after the pop.
 //
 // Must only be called from the owning worker's thread.
-iree_task_t* iree_task_queue_pop_front(iree_task_queue_t* queue,
-                                       bool* out_empty);
+iree_task_t* iree_task_queue_pop_front(iree_task_queue_t* queue);
 
 // Tries to steal up to |max_tasks| from the back of the queue.
 // Returns NULL if no tasks are available and otherwise up to |max_tasks| tasks
diff --git a/iree/task/queue_test.cc b/iree/task/queue_test.cc
index ea04725..53342fd 100644
--- a/iree/task/queue_test.cc
+++ b/iree/task/queue_test.cc
@@ -17,25 +17,19 @@
 }
 
 TEST(QueueTest, Empty) {
-  bool empty = false;
-
   iree_task_queue_t queue;
   iree_task_queue_initialize(&queue);
   EXPECT_TRUE(iree_task_queue_is_empty(&queue));
-  EXPECT_FALSE(iree_task_queue_pop_front(&queue, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_FALSE(iree_task_queue_pop_front(&queue));
   iree_task_queue_deinitialize(&queue);
 }
 
 TEST(QueueTest, PushPop) {
-  bool empty = false;
-
   iree_task_queue_t queue;
   iree_task_queue_initialize(&queue);
 
   EXPECT_TRUE(iree_task_queue_is_empty(&queue));
-  EXPECT_FALSE(iree_task_queue_pop_front(&queue, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_FALSE(iree_task_queue_pop_front(&queue));
 
   iree_task_t task_a = {0};
   iree_task_queue_push_front(&queue, &task_a);
@@ -46,15 +40,13 @@
   iree_task_queue_push_front(&queue, &task_b);
 
   EXPECT_FALSE(iree_task_queue_is_empty(&queue));
-  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue, &empty));
-  EXPECT_FALSE(empty);
+  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue));
 
   EXPECT_FALSE(iree_task_queue_is_empty(&queue));
-  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue));
 
   EXPECT_TRUE(iree_task_queue_is_empty(&queue));
-  EXPECT_FALSE(iree_task_queue_pop_front(&queue, &empty));
+  EXPECT_FALSE(iree_task_queue_pop_front(&queue));
 
   iree_task_queue_deinitialize(&queue);
 }
@@ -86,9 +78,7 @@
   EXPECT_FALSE(iree_task_queue_is_empty(&queue));
   EXPECT_TRUE(iree_task_list_is_empty(&list));
 
-  bool empty = false;
-  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue));
   EXPECT_TRUE(iree_task_queue_is_empty(&queue));
 
   iree_task_queue_deinitialize(&queue);
@@ -112,11 +102,8 @@
   EXPECT_TRUE(iree_task_list_is_empty(&list));
 
   // Pop list and ensure order: a->b.
-  bool empty = false;
-  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue, &empty));
-  EXPECT_FALSE(empty);
-  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&queue));
+  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue));
   EXPECT_TRUE(iree_task_queue_is_empty(&queue));
 
   iree_task_queue_deinitialize(&queue);
@@ -129,10 +116,8 @@
   iree_atomic_task_slist_t slist;
   iree_atomic_task_slist_initialize(&slist);
 
-  bool empty = false;
   EXPECT_TRUE(iree_task_queue_is_empty(&queue));
-  EXPECT_FALSE(iree_task_queue_flush_from_lifo_slist(&queue, &slist, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_FALSE(iree_task_queue_flush_from_lifo_slist(&queue, &slist));
   EXPECT_TRUE(iree_task_queue_is_empty(&queue));
 
   iree_atomic_task_slist_deinitialize(&slist);
@@ -149,11 +134,8 @@
   iree_task_t task_a = {0};
   iree_atomic_task_slist_push(&slist, &task_a);
 
-  bool empty = false;
   EXPECT_TRUE(iree_task_queue_is_empty(&queue));
-  EXPECT_EQ(&task_a,
-            iree_task_queue_flush_from_lifo_slist(&queue, &slist, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_EQ(&task_a, iree_task_queue_flush_from_lifo_slist(&queue, &slist));
   EXPECT_TRUE(iree_task_queue_is_empty(&queue));
 
   iree_atomic_task_slist_deinitialize(&slist);
@@ -177,18 +159,13 @@
 
   // Flush the list to the queue; it should swap LIFO->FIFO and return the
   // first task in the queue.
-  bool empty = false;
   EXPECT_TRUE(iree_task_queue_is_empty(&queue));
-  EXPECT_EQ(&task_a,
-            iree_task_queue_flush_from_lifo_slist(&queue, &slist, &empty));
-  EXPECT_FALSE(empty);
+  EXPECT_EQ(&task_a, iree_task_queue_flush_from_lifo_slist(&queue, &slist));
   EXPECT_FALSE(iree_task_queue_is_empty(&queue));
 
   // Pop list and ensure order: [a->]b->c.
-  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue, &empty));
-  EXPECT_FALSE(empty);
-  EXPECT_EQ(&task_c, iree_task_queue_pop_front(&queue, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&queue));
+  EXPECT_EQ(&task_c, iree_task_queue_pop_front(&queue));
   EXPECT_TRUE(iree_task_queue_is_empty(&queue));
 
   iree_atomic_task_slist_deinitialize(&slist);
@@ -251,11 +228,8 @@
             iree_task_queue_try_steal(&source_queue, &target_queue, 1));
   EXPECT_TRUE(iree_task_queue_is_empty(&target_queue));
 
-  bool empty = false;
-  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue, &empty));
-  EXPECT_FALSE(empty);
-  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue));
+  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue));
   EXPECT_TRUE(iree_task_queue_is_empty(&source_queue));
 
   iree_task_queue_deinitialize(&source_queue);
@@ -279,13 +253,10 @@
   EXPECT_EQ(&task_existing,
             iree_task_queue_try_steal(&source_queue, &target_queue, 1));
 
-  bool empty = false;
-  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue));
   EXPECT_TRUE(iree_task_queue_is_empty(&source_queue));
 
-  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&target_queue, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&target_queue));
   EXPECT_TRUE(iree_task_queue_is_empty(&target_queue));
 
   iree_task_queue_deinitialize(&source_queue);
@@ -307,17 +278,13 @@
   iree_task_queue_push_front(&source_queue, &task_b);
   iree_task_queue_push_front(&source_queue, &task_a);
 
-  bool empty = false;
   EXPECT_EQ(&task_c,
             iree_task_queue_try_steal(&source_queue, &target_queue, 2));
-  EXPECT_EQ(&task_d, iree_task_queue_pop_front(&target_queue, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_EQ(&task_d, iree_task_queue_pop_front(&target_queue));
   EXPECT_TRUE(iree_task_queue_is_empty(&target_queue));
 
-  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue, &empty));
-  EXPECT_FALSE(empty);
-  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue));
+  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue));
   EXPECT_TRUE(iree_task_queue_is_empty(&source_queue));
 
   iree_task_queue_deinitialize(&source_queue);
@@ -339,17 +306,13 @@
   iree_task_queue_push_front(&source_queue, &task_b);
   iree_task_queue_push_front(&source_queue, &task_a);
 
-  bool empty = false;
   EXPECT_EQ(&task_c,
             iree_task_queue_try_steal(&source_queue, &target_queue, 1000));
-  EXPECT_EQ(&task_d, iree_task_queue_pop_front(&target_queue, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_EQ(&task_d, iree_task_queue_pop_front(&target_queue));
   EXPECT_TRUE(iree_task_queue_is_empty(&target_queue));
 
-  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue, &empty));
-  EXPECT_FALSE(empty);
-  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue, &empty));
-  EXPECT_TRUE(empty);
+  EXPECT_EQ(&task_a, iree_task_queue_pop_front(&source_queue));
+  EXPECT_EQ(&task_b, iree_task_queue_pop_front(&source_queue));
   EXPECT_TRUE(iree_task_queue_is_empty(&source_queue));
 
   iree_task_queue_deinitialize(&source_queue);
diff --git a/iree/task/worker.c b/iree/task/worker.c
index e203fd8..f383e0e 100644
--- a/iree/task/worker.c
+++ b/iree/task/worker.c
@@ -30,7 +30,7 @@
   out_worker->worker_bit = iree_task_affinity_for_worker(worker_index);
   out_worker->ideal_thread_affinity = topology_group->ideal_thread_affinity;
   out_worker->constructive_sharing_mask =
-      topology_group->constructive_sharing_mask ^ out_worker->worker_bit;
+      topology_group->constructive_sharing_mask;
   out_worker->max_theft_attempts =
       executor->worker_count / IREE_TASK_EXECUTOR_MAX_THEFT_ATTEMPTS_DIVISOR;
   iree_prng_minilcg128_initialize(iree_prng_splitmix64_next(seed_prng),
@@ -216,9 +216,7 @@
   // Check the local work queue for any work we know we should start
   // processing immediately. Other workers may try to steal some of this work
   // if we take too long.
-  bool queue_empty = false;
-  iree_task_t* task =
-      iree_task_queue_pop_front(&worker->local_task_queue, &queue_empty);
+  iree_task_t* task = iree_task_queue_pop_front(&worker->local_task_queue);
 
   // Check the mailbox to see if we have incoming work that has been posted.
   // We try to greedily move it to our local work list so that we can work
@@ -230,17 +228,8 @@
     // first place (large uneven workloads for various workers, bad distribution
     // in the face of heterogenous multi-core architectures where some workers
     // complete tasks faster than others, etc).
-    task = iree_task_queue_flush_from_lifo_slist(
-        &worker->local_task_queue, &worker->mailbox_slist, &queue_empty);
-  }
-  if (queue_empty) {
-    // Queue depth is 0; we may have gotten a task but we know that there's
-    // nothing else waiting in the next loop so we can tell other workers not
-    // to try to steal tasks from us. New submissions that come in after this
-    // (while we are processing the task we may have gotten) will reset the bit.
-    iree_atomic_task_affinity_set_fetch_and(
-        &worker->executor->worker_pending_mask, ~worker->worker_bit,
-        iree_memory_order_seq_cst);
+    task = iree_task_queue_flush_from_lifo_slist(&worker->local_task_queue,
+                                                 &worker->mailbox_slist);
   }
 
   // If we ran out of work assigned to this specific worker try to steal some