[hip] Fix race in the cleanup of queue read operations. (#19645)

Fix a race when we are looping data chunks to read. If we dispatch to
the secondary thread, and have the cleanup execute before we hit the
next loop iteration (the next LoC) we will read deallocated memory.

This also retains the file for the duration of the read operation. It
was not causing any issues, but seems prudent either way.

Signed-off-by: Andrew Woloszyn <andrew.woloszyn@gmail.com>
diff --git a/runtime/src/iree/hal/drivers/hip/hip_device.c b/runtime/src/iree/hal/drivers/hip/hip_device.c
index a2388d8..4943a81 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_device.c
+++ b/runtime/src/iree/hal/drivers/hip/hip_device.c
@@ -1632,6 +1632,7 @@
   if (!data) {
     return;
   }
+  iree_hal_file_release(data->source_file);
   iree_hal_resource_release(data->target_buffer);
   iree_hal_hip_semaphore_callback_data_deinitialize(&data->base);
   iree_allocator_free(data->base.host_allocator, data);
@@ -1890,6 +1891,9 @@
             device, device->cleanup_thread, data->base.signal_semaphore_list,
             device_ordinal, &iree_hal_hip_device_complete_queue_read_operation,
             data);
+        // Break here because data could immediately be cleaned up before the
+        // next loop iteration.
+        break;
       } else {
         status = iree_hal_hip_device_stream_add_cleanup(
             device, device->cleanup_thread, device_ordinal,
@@ -1962,6 +1966,7 @@
   callback_data->source_offset = source_offset;
   callback_data->target_buffer = target_buffer;
   iree_hal_resource_retain(target_buffer);
+  iree_hal_file_retain(source_file);
   callback_data->target_offset = target_offset;
   callback_data->length = length;
   callback_data->flags = flags;