Fixing task system off-by-one that was preventing proper distribution. (#6482)
This would lead to a single shard executing more tiles than it should in
a single iteration leading to unbalanced distribution when the tile count
was low.
Fixes #5568.
diff --git a/iree/task/task.c b/iree/task/task.c
index 160b6c1..51fc703 100644
--- a/iree/task/task.c
+++ b/iree/task/task.c
@@ -824,10 +824,6 @@
tiles_per_reservation,
iree_memory_order_relaxed);
while (tile_base < tile_count) {
- const uint32_t next_tile_base = iree_atomic_fetch_add_int32(
- &shared_state->tile_index, tiles_per_reservation,
- iree_memory_order_relaxed);
-
const uint32_t tile_range =
iree_min(tile_base + tiles_per_reservation, tile_count);
for (uint32_t tile_index = tile_base; tile_index < tile_range;
@@ -865,7 +861,9 @@
}
}
- tile_base = next_tile_base;
+ tile_base = iree_atomic_fetch_add_int32(&shared_state->tile_index,
+ tiles_per_reservation,
+ iree_memory_order_relaxed);
}
// Push aggregate statistics up to the dispatch.