[Async] Gracefully degrade io_uring slab registration on RLIMIT_MEMLOCK (#23654)
When IORING_REGISTER_BUFFERS[_UPDATE] fails with ENOMEM (kernel cannot
pin pages due to RLIMIT_MEMLOCK), fall back to copy-based I/O instead of
returning a hard error. The region is still created with full pool
management support, but the send path uses regular sends instead of
SEND_ZC with fixed buffer indices.
This fixes a flake on arm64 CI runners where RLIMIT_MEMLOCK is too low
for the 1MB buffer registration in ZeroCopySendRegisteredLargeTransfer.
ci-extra: all
Co-authored-by: Claude <noreply@anthropic.com>
diff --git a/runtime/src/iree/async/cts/buffer/registration_test.cc b/runtime/src/iree/async/cts/buffer/registration_test.cc
index 6b259e9..b76c89b 100644
--- a/runtime/src/iree/async/cts/buffer/registration_test.cc
+++ b/runtime/src/iree/async/cts/buffer/registration_test.cc
@@ -430,11 +430,13 @@
dmabuf_entry->region->type == IREE_ASYNC_REGION_TYPE_IOURING) {
// Slab occupies [base, base+count). Dmabuf occupies a single slot.
// They must not overlap.
- uint16_t slab_base = slab_region->handles.iouring.base_buffer_index;
- uint16_t slab_end =
- slab_base + static_cast<uint16_t>(slab_region->buffer_count);
- uint16_t dmabuf_slot =
+ int16_t slab_base = slab_region->handles.iouring.base_buffer_index;
+ ASSERT_GE(slab_base, 0) << "slab should have kernel-registered buffers";
+ int16_t slab_end =
+ slab_base + static_cast<int16_t>(slab_region->buffer_count);
+ int16_t dmabuf_slot =
dmabuf_entry->region->handles.iouring.base_buffer_index;
+ ASSERT_GE(dmabuf_slot, 0) << "dmabuf should have a kernel-registered slot";
EXPECT_TRUE(dmabuf_slot < slab_base || dmabuf_slot >= slab_end)
<< "dmabuf slot " << dmabuf_slot << " overlaps slab range ["
<< slab_base << ", " << slab_end << ")";
diff --git a/runtime/src/iree/async/platform/io_uring/proactor_registration.c b/runtime/src/iree/async/platform/io_uring/proactor_registration.c
index bbefffe..75f93a4 100644
--- a/runtime/src/iree/async/platform/io_uring/proactor_registration.c
+++ b/runtime/src/iree/async/platform/io_uring/proactor_registration.c
@@ -164,7 +164,7 @@
region->buffer_size = 0;
region->buffer_count = 0; // Not indexed (use address).
region->handles.iouring.buffer_group_id = -1; // Not a provided buffer ring.
- region->handles.iouring.base_buffer_index = 0;
+ region->handles.iouring.base_buffer_index = -1; // Not kernel-registered.
// Initialize the entry.
iree_async_buffer_registration_entry_t* entry = ®istration->entry;
@@ -299,7 +299,7 @@
region->buffer_count = 1;
region->handles.iouring.buffer_group_id = -1;
region->handles.iouring.base_buffer_index =
- (uint16_t)registration->buffer_table_slot;
+ (int16_t)registration->buffer_table_slot;
} else {
// No kernel registration — mmap-only fallback.
region->type = IREE_ASYNC_REGION_TYPE_DMABUF;
@@ -535,15 +535,27 @@
iree_io_uring_sparse_table_release(proactor->buffer_table,
(uint16_t)base_slot,
(uint16_t)buffer_count);
- status = iree_make_status(iree_status_code_from_errno(saved_errno),
- "IORING_REGISTER_BUFFERS_UPDATE failed (%d)",
- saved_errno);
+ if (saved_errno == ENOMEM) {
+ // Kernel couldn't pin pages — RLIMIT_MEMLOCK is likely too low.
+ // Fall back to copy-based I/O instead of failing hard. The region
+ // will have base_buffer_index = -1 so the send path uses regular
+ // sends instead of SEND_ZC with fixed buffers.
+ IREE_TRACE_MESSAGE(
+ WARNING,
+ "io_uring: RLIMIT_MEMLOCK too low to pin pages for zero-copy "
+ "send; falling back to copy-based I/O (raise with "
+ "'ulimit -l unlimited')");
+ } else {
+ status = iree_make_status(
+ iree_status_code_from_errno(saved_errno),
+ "IORING_REGISTER_BUFFERS_UPDATE failed (%d)", saved_errno);
+ }
+ } else {
+ slab_region->fixed_buffer_base = (uint16_t)base_slot;
+ slab_region->fixed_buffer_count = (uint16_t)buffer_count;
+ slab_region->registered_fixed_buffers = true;
}
}
- if (iree_status_is_ok(status)) {
- slab_region->fixed_buffer_base = (uint16_t)base_slot;
- slab_region->fixed_buffer_count = (uint16_t)buffer_count;
- }
iree_io_uring_sparse_table_unlock(proactor->buffer_table);
} else {
long ret = 0;
@@ -553,13 +565,22 @@
} while (ret < 0 && errno == EINTR);
if (ret < 0) {
int saved_errno = errno;
- status =
- iree_make_status(iree_status_code_from_errno(saved_errno),
- "IORING_REGISTER_BUFFERS failed (%d)", saved_errno);
+ if (saved_errno == ENOMEM) {
+ IREE_TRACE_MESSAGE(
+ WARNING,
+ "io_uring: RLIMIT_MEMLOCK too low to pin pages for zero-copy "
+ "send; falling back to copy-based I/O (raise with "
+ "'ulimit -l unlimited')");
+ } else {
+ status = iree_make_status(iree_status_code_from_errno(saved_errno),
+ "IORING_REGISTER_BUFFERS failed (%d)",
+ saved_errno);
+ }
} else {
proactor->legacy_registered_buffer_count = (uint16_t)buffer_count;
slab_region->fixed_buffer_base = 0;
slab_region->fixed_buffer_count = (uint16_t)buffer_count;
+ slab_region->registered_fixed_buffers = true;
}
}
@@ -567,10 +588,6 @@
iree_allocator_free(slab_region->allocator, iovecs);
}
- if (iree_status_is_ok(status)) {
- slab_region->registered_fixed_buffers = true;
- }
-
return status;
}
@@ -763,7 +780,12 @@
region->buffer_size = buffer_size;
region->buffer_count = (uint32_t)buffer_count;
region->handles.iouring.buffer_group_id = buffer_group_id;
- region->handles.iouring.base_buffer_index = slab_region->fixed_buffer_base;
+ // -1 when fixed buffer registration was skipped (ENOMEM/RLIMIT_MEMLOCK):
+ // send path sees this and falls back to copy-based I/O.
+ region->handles.iouring.base_buffer_index =
+ slab_region->registered_fixed_buffers
+ ? (int16_t)slab_region->fixed_buffer_base
+ : (int16_t)-1;
*out_region = region;
IREE_TRACE_ZONE_END(z0);
diff --git a/runtime/src/iree/async/platform/io_uring/proactor_submit.c b/runtime/src/iree/async/platform/io_uring/proactor_submit.c
index 4fb8fe8..732fbf1 100644
--- a/runtime/src/iree/async/platform/io_uring/proactor_submit.c
+++ b/runtime/src/iree/async/platform/io_uring/proactor_submit.c
@@ -386,8 +386,14 @@
if (offset_in_buffer + span.length > buffer_size) return iree_ok_status();
// Calculate final buffer index with overflow check.
- uint16_t base_buffer_index = region->handles.iouring.base_buffer_index;
- uint64_t final_index = (uint64_t)base_buffer_index + buffer_index_offset;
+ int16_t base_buffer_index = region->handles.iouring.base_buffer_index;
+
+ // -1 = region has indexed buffers for pool management but is not
+ // kernel-registered for zero-copy (e.g., RLIMIT_MEMLOCK too low).
+ if (base_buffer_index < 0) return iree_ok_status();
+
+ uint64_t final_index =
+ (uint64_t)(uint16_t)base_buffer_index + buffer_index_offset;
// Index overflow: registration allowed too many buffers or base_buffer_index
// is corrupt.
@@ -395,7 +401,7 @@
return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"fixed buffer index %" PRIu64
" exceeds uint16 maximum; "
- "base_buffer_index=%" PRIu16 " + offset=%" PRIu64,
+ "base_buffer_index=%" PRId16 " + offset=%" PRIu64,
final_index, base_buffer_index,
buffer_index_offset);
}
diff --git a/runtime/src/iree/async/region.h b/runtime/src/iree/async/region.h
index 253c3f6..834b454 100644
--- a/runtime/src/iree/async/region.h
+++ b/runtime/src/iree/async/region.h
@@ -191,7 +191,9 @@
// For fixed buffer table (send - application selects buffer).
// buf_index = span.offset / buffer_size + base_buffer_index
// Starting index in kernel's fixed buffer table.
- uint16_t base_buffer_index;
+ // -1 if not registered in the kernel's fixed buffer table (e.g.,
+ // RLIMIT_MEMLOCK too low to pin pages); send falls back to copy I/O.
+ int16_t base_buffer_index;
} iouring;
struct {
int fd;