Removing tail task behavior in the iree_hal_task_queue.
This never worked quite right and the coming submission list work will
probably be a better way of doing it - and if not the implementation will
at least be different from what's here today.
diff --git a/runtime/src/iree/hal/drivers/local_task/task_queue.c b/runtime/src/iree/hal/drivers/local_task/task_queue.c
index 4339e98..6521842 100644
--- a/runtime/src/iree/hal/drivers/local_task/task_queue.c
+++ b/runtime/src/iree/hal/drivers/local_task/task_queue.c
@@ -219,8 +219,9 @@
   return status;
 }
 
-// Cleanup for iree_hal_task_queue_issue_cmd_t that resets the queue state
-// tracking the last in-flight issue.
+// Cleanup for iree_hal_task_queue_issue_cmd_t to drop all resources.
+// Any that need to remain live during execution are retained by the tasks
+// performing that execution.
 static void iree_hal_task_queue_issue_cmd_cleanup(
     iree_task_t* task, iree_status_code_t status_code) {
   iree_hal_task_queue_issue_cmd_t* cmd = (iree_hal_task_queue_issue_cmd_t*)task;
@@ -230,13 +231,6 @@
   for (iree_host_size_t i = 0; i < cmd->command_buffer_count; ++i) {
     iree_hal_command_buffer_release(cmd->command_buffers[i]);
   }
-
-  // Reset queue tail issue task if it was us.
-  iree_slim_mutex_lock(&cmd->queue->mutex);
-  if (cmd->queue->tail_issue_task == task) {
-    cmd->queue->tail_issue_task = NULL;
-  }
-  iree_slim_mutex_unlock(&cmd->queue->mutex);
 }
 
 // Allocates and initializes a iree_hal_task_queue_issue_cmd_t task.
@@ -400,9 +394,7 @@
 
   iree_task_scope_initialize(identifier, &out_queue->scope);
 
-  iree_slim_mutex_initialize(&out_queue->mutex);
   iree_hal_task_queue_state_initialize(&out_queue->state);
-  out_queue->tail_issue_task = NULL;
 
   IREE_TRACE_ZONE_END(z0);
 }
@@ -413,12 +405,7 @@
   iree_status_ignore(
       iree_task_scope_wait_idle(&queue->scope, IREE_TIME_INFINITE_FUTURE));
 
-  iree_slim_mutex_lock(&queue->mutex);
-  IREE_ASSERT(!queue->tail_issue_task);
-  iree_slim_mutex_unlock(&queue->mutex);
-
   iree_hal_task_queue_state_deinitialize(&queue->state);
-  iree_slim_mutex_deinitialize(&queue->mutex);
   iree_task_scope_deinitialize(&queue->scope);
   iree_task_executor_release(queue->executor);
 
@@ -485,20 +472,6 @@
     iree_task_submission_enqueue(&submission, &issue_cmd->task.header);
   }
 
-  iree_slim_mutex_lock(&queue->mutex);
-
-  // If there is an in-flight issue pending then we need to chain onto that
-  // so that we ensure FIFO submission order is preserved. Note that we are only
-  // waiting for the issue to complete and *not* all of the commands that are
-  // issued.
-  if (queue->tail_issue_task != NULL) {
-    iree_task_set_completion_task(queue->tail_issue_task,
-                                  &issue_cmd->task.header);
-  }
-  queue->tail_issue_task = &issue_cmd->task.header;
-
-  iree_slim_mutex_unlock(&queue->mutex);
-
   // Submit the tasks immediately. The executor may queue them up until we
   // force the flush after all batches have been processed.
   iree_task_executor_submit(queue->executor, &submission);
diff --git a/runtime/src/iree/hal/drivers/local_task/task_queue.h b/runtime/src/iree/hal/drivers/local_task/task_queue.h
index 7084795..ee255d7 100644
--- a/runtime/src/iree/hal/drivers/local_task/task_queue.h
+++ b/runtime/src/iree/hal/drivers/local_task/task_queue.h
@@ -33,23 +33,10 @@
   // This allows for easy waits on all outstanding queue tasks as well as
   // differentiation of tasks within the executor.
   iree_task_scope_t scope;
-
-  // Guards queue state. Submissions and waits may come from any user thread and
-  // we do a bit of bookkeeping during command buffer issue that will come from
-  // an executor thread.
-  iree_slim_mutex_t mutex;
-
   // State tracking used during command buffer issue.
   // The intra-queue synchronization (barriers/events) carries across command
   // buffers and this is used to rendezvous the tasks in each set.
   iree_hal_task_queue_state_t state;
-
-  // The last active iree_hal_task_queue_issue_cmd_t submitted to the queue.
-  // If this is NULL then there are no issues pending - though there may still
-  // be active work that was previously issued. This is used to chain together
-  // issues in FIFO order such that all submissions *issue* in order but not
-  // *execute* in order.
-  iree_task_t* tail_issue_task;
 } iree_hal_task_queue_t;
 
 void iree_hal_task_queue_initialize(iree_string_view_t identifier,
diff --git a/runtime/src/iree/hal/drivers/local_task/task_semaphore.c b/runtime/src/iree/hal/drivers/local_task/task_semaphore.c
index 8ae35ca..d2c03d2 100644
--- a/runtime/src/iree/hal/drivers/local_task/task_semaphore.c
+++ b/runtime/src/iree/hal/drivers/local_task/task_semaphore.c
@@ -208,6 +208,7 @@
     iree_timeout_t timeout, iree_hal_task_timepoint_t* out_timepoint) {
   IREE_RETURN_IF_ERROR(
       iree_event_pool_acquire(semaphore->event_pool, 1, &out_timepoint->event));
+  out_timepoint->semaphore = &semaphore->base;
   iree_hal_semaphore_acquire_timepoint(
       &semaphore->base, minimum_value, timeout,
       (iree_hal_semaphore_callback_t){
@@ -370,7 +371,6 @@
         status = iree_hal_task_semaphore_acquire_timepoint(
             semaphore, semaphore_list->payload_values[i], timeout, timepoint);
         if (iree_status_is_ok(status)) {
-          timepoint->semaphore = &semaphore->base;
           status = iree_wait_set_insert(wait_set, timepoint->event);
         }
       }
diff --git a/runtime/src/iree/hal/semaphore.c b/runtime/src/iree/hal/semaphore.c
index 79089ae..a7c54ed 100644
--- a/runtime/src/iree/hal/semaphore.c
+++ b/runtime/src/iree/hal/semaphore.c
@@ -25,6 +25,7 @@
   IREE_ASSERT_ARGUMENT(out_semaphore);
   *out_semaphore = NULL;
   IREE_TRACE_ZONE_BEGIN(z0);
+  IREE_TRACE_ZONE_APPEND_VALUE(z0, initial_value);
   iree_status_t status =
       IREE_HAL_VTABLE_DISPATCH(device, iree_hal_device, create_semaphore)(
           device, initial_value, out_semaphore);
@@ -40,6 +41,7 @@
   IREE_TRACE_ZONE_BEGIN(z0);
   iree_status_t status =
       _VTABLE_DISPATCH(semaphore, query)(semaphore, out_value);
+  IREE_TRACE_ZONE_APPEND_VALUE(z0, *out_value);
   IREE_TRACE_ZONE_END(z0);
   return status;
 }
@@ -48,6 +50,7 @@
 iree_hal_semaphore_signal(iree_hal_semaphore_t* semaphore, uint64_t new_value) {
   IREE_ASSERT_ARGUMENT(semaphore);
   IREE_TRACE_ZONE_BEGIN(z0);
+  IREE_TRACE_ZONE_APPEND_VALUE(z0, new_value);
   iree_status_t status =
       _VTABLE_DISPATCH(semaphore, signal)(semaphore, new_value);
   IREE_TRACE_ZONE_END(z0);
@@ -58,6 +61,7 @@
                                              iree_status_t status) {
   IREE_ASSERT_ARGUMENT(semaphore);
   IREE_TRACE_ZONE_BEGIN(z0);
+  IREE_TRACE_ZONE_APPEND_VALUE(z0, iree_status_code(status));
   _VTABLE_DISPATCH(semaphore, fail)(semaphore, status);
   IREE_TRACE_ZONE_END(z0);
 }
@@ -66,6 +70,7 @@
     iree_hal_semaphore_t* semaphore, uint64_t value, iree_timeout_t timeout) {
   IREE_ASSERT_ARGUMENT(semaphore);
   IREE_TRACE_ZONE_BEGIN(z0);
+  IREE_TRACE_ZONE_APPEND_VALUE(z0, value);
   iree_status_t status =
       _VTABLE_DISPATCH(semaphore, wait)(semaphore, value, timeout);
   IREE_TRACE_ZONE_END(z0);