blob: 6166ce28817508cfe3f246d53200b6a0cd91bf0f [file] [log] [blame]
// Copyright 2020 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// clang-format off: must be included before all other headers.
#include "iree/base/internal/threading_impl.h"
// clang-format on
#if defined(IREE_PLATFORM_WINDOWS)
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "iree/base/internal/atomics.h"
#include "iree/base/internal/threading.h"
// Great documentation:
// https://www.microsoftpressstore.com/articles/article.aspx?p=2233328
struct iree_thread_t {
iree_atomic_ref_count_t ref_count;
iree_allocator_t allocator;
char name[32];
HANDLE handle;
DWORD id;
iree_thread_entry_t entry;
void* entry_arg;
iree_atomic_int32_t is_suspended;
// Thread-safe (has its own synchronization).
iree_thread_override_list_t qos_override_list;
};
static void iree_thread_set_priority_class(
iree_thread_t* thread, iree_thread_priority_class_t priority_class);
// Sets the thread's name to the given NUL-terminated string.
//
// See:
// https://docs.microsoft.com/en-us/visualstudio/debugger/how-to-set-a-thread-name-in-native-code
static void iree_thread_set_name(HANDLE handle, const char* name) {
IREE_TRACE_ZONE_BEGIN(z0);
// Try first to use the modern SetThreadDescription API.
// This will work even if a debugger is not attached meaning that tools that
// don't use the debugger API can still query thread names. It's only
// available on Win10+.
typedef HRESULT(WINAPI * SetThreadDescriptionFn)(HANDLE hThread,
PCWSTR lpThreadDescription);
SetThreadDescriptionFn pSetThreadDescription =
(SetThreadDescriptionFn)GetProcAddress(GetModuleHandleW(L"Kernel32.dll"),
"SetThreadDescription");
if (pSetThreadDescription) {
wchar_t name_wide[16] = {0};
MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, name, -1, name_wide,
IREE_ARRAYSIZE(name_wide) - 1);
pSetThreadDescription(handle, name_wide);
IREE_TRACE_ZONE_END(z0);
return;
}
if (!IsDebuggerPresent()) {
// The name is only captured if a debugger is attached so we can avoid
// doing any of the work if none is present. This means that a debugger
// attached to the process after thread creation won't see thread names but
// that's a rare case anyway.
IREE_TRACE_ZONE_END(z0);
return;
}
#pragma pack(push, 8)
struct THREADNAME_INFO {
DWORD dwType; // Must be 0x1000.
LPCSTR szName; // Pointer to name (in user addr space).
DWORD dwThreadID; // Thread ID (-1=caller thread).
DWORD dwFlags; // Reserved for future use, must be zero.
};
#pragma pack(pop)
#pragma warning(push)
#pragma warning(disable : 6320 6322)
struct THREADNAME_INFO info;
info.dwType = 0x1000;
info.szName = name;
info.dwThreadID = GetThreadId(handle);
info.dwFlags = 0;
__try {
RaiseException(0x406D1388u, 0, sizeof(info) / sizeof(ULONG_PTR),
(ULONG_PTR*)(&info));
} __except (EXCEPTION_EXECUTE_HANDLER) {
}
#pragma warning(pop)
IREE_TRACE_ZONE_END(z0);
}
static DWORD WINAPI iree_thread_start_routine(LPVOID param) {
// NOTE: we own a reference to the thread handle so that the creation
// thread can't delete this out from under us.
iree_thread_t* thread = (iree_thread_t*)param;
// Set the thread name used by tracy (which must be called on the thread).
IREE_TRACE_SET_THREAD_NAME(thread->name);
// "Consume" the entry info so that we don't see it again (as we don't own
// its lifetime).
iree_thread_entry_t entry = thread->entry;
void* entry_arg = thread->entry_arg;
thread->entry = NULL;
thread->entry_arg = NULL;
// Call the user thread entry point function.
// Note that this can be a tail-call which saves a stack frame in all threads
// (which is really just to make call stacks in debuggers much cleaner).
return (DWORD)entry(entry_arg);
}
iree_status_t iree_thread_create(iree_thread_entry_t entry, void* entry_arg,
iree_thread_create_params_t params,
iree_allocator_t allocator,
iree_thread_t** out_thread) {
IREE_TRACE_ZONE_BEGIN(z0);
// Allocate our thread struct; we'll use it to shuttle params into the thread
// (including the user-specified entry_arg).
iree_thread_t* thread = NULL;
iree_status_t status =
iree_allocator_malloc(allocator, sizeof(*thread), (void**)&thread);
if (!iree_status_is_ok(status)) {
IREE_TRACE_ZONE_END(z0);
return status;
}
iree_atomic_ref_count_init(&thread->ref_count);
thread->allocator = allocator;
thread->entry = entry;
thread->entry_arg = entry_arg;
strncpy_s(thread->name, IREE_ARRAYSIZE(thread->name), params.name.data,
min(params.name.size, IREE_ARRAYSIZE(thread->name) - 1));
iree_atomic_store_int32(&thread->is_suspended,
params.create_suspended ? 1 : 0,
iree_memory_order_relaxed);
iree_thread_override_list_initialize(iree_thread_set_priority_class,
params.priority_class, thread->allocator,
&thread->qos_override_list);
*out_thread = thread;
// Create the thread either suspended or running as the user requested.
{
IREE_TRACE_ZONE_BEGIN_NAMED(z1, "CreateThread");
thread->handle = CreateThread(
NULL, params.stack_size, iree_thread_start_routine, thread,
params.create_suspended ? CREATE_SUSPENDED : 0, &thread->id);
IREE_TRACE_ZONE_END(z1);
}
if (thread->handle == INVALID_HANDLE_VALUE) {
iree_thread_release(thread); // for self
*out_thread = NULL;
IREE_TRACE_ZONE_END(z0);
return iree_make_status(IREE_STATUS_INTERNAL,
"thread creation failed with %lu", GetLastError());
}
// Immediately set thread properties before resuming (so that we don't
// start on the wrong core/at the wrong priority).
if (!iree_string_view_is_empty(params.name)) {
iree_thread_set_name(thread->handle, thread->name);
}
if (params.priority_class != IREE_THREAD_PRIORITY_CLASS_NORMAL) {
iree_thread_set_priority_class(thread, params.priority_class);
}
if (params.initial_affinity.specified) {
iree_thread_request_affinity(thread, params.initial_affinity);
}
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
static void iree_thread_delete(iree_thread_t* thread) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_thread_resume(thread);
if (thread->id != GetCurrentThreadId()) {
// Join with the thread. Since threads can delete themselves we must ensure
// they don't try to join with themselves and deadlock.
WaitForSingleObject(thread->handle, INFINITE);
}
CloseHandle(thread->handle);
iree_thread_override_list_deinitialize(&thread->qos_override_list);
iree_allocator_free(thread->allocator, thread);
IREE_TRACE_ZONE_END(z0);
}
void iree_thread_retain(iree_thread_t* thread) {
if (thread) {
iree_atomic_ref_count_inc(&thread->ref_count);
}
}
void iree_thread_release(iree_thread_t* thread) {
if (thread && iree_atomic_ref_count_dec(&thread->ref_count) == 1) {
iree_thread_delete(thread);
}
}
uintptr_t iree_thread_id(iree_thread_t* thread) {
return (uintptr_t)thread->id;
}
// Sets the thread priority to the given |priority_class| immediately.
static void iree_thread_set_priority_class(
iree_thread_t* thread, iree_thread_priority_class_t priority_class) {
IREE_TRACE_ZONE_BEGIN(z0);
DWORD priority = THREAD_PRIORITY_NORMAL;
switch (priority_class) {
case IREE_THREAD_PRIORITY_CLASS_LOWEST:
priority = THREAD_PRIORITY_LOWEST;
break;
case IREE_THREAD_PRIORITY_CLASS_LOW:
priority = THREAD_PRIORITY_BELOW_NORMAL;
break;
case IREE_THREAD_PRIORITY_CLASS_NORMAL:
priority = THREAD_PRIORITY_NORMAL;
break;
case IREE_THREAD_PRIORITY_CLASS_HIGH:
priority = THREAD_PRIORITY_ABOVE_NORMAL;
break;
case IREE_THREAD_PRIORITY_CLASS_HIGHEST:
priority = THREAD_PRIORITY_HIGHEST;
break;
}
SetThreadPriority(thread->handle, priority);
IREE_TRACE_ZONE_END(z0);
}
iree_thread_override_t* iree_thread_priority_class_override_begin(
iree_thread_t* thread, iree_thread_priority_class_t priority_class) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_thread_override_t* override = iree_thread_override_list_add(
&thread->qos_override_list, thread, priority_class);
IREE_TRACE_ZONE_END(z0);
return override;
}
void iree_thread_override_end(iree_thread_override_t* override) {
if (!override) return;
IREE_TRACE_ZONE_BEGIN(z0);
iree_thread_override_remove_self(override);
IREE_TRACE_ZONE_END(z0);
}
void iree_thread_request_affinity(iree_thread_t* thread,
iree_thread_affinity_t affinity) {
if (!affinity.specified) return;
IREE_TRACE_ZONE_BEGIN(z0);
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
char affinity_desc[32];
int affinity_desc_length = snprintf(
affinity_desc, IREE_ARRAYSIZE(affinity_desc), "group=%d, id=%d, smt=%d",
affinity.group, affinity.id, affinity.smt);
IREE_TRACE_ZONE_APPEND_TEXT(z0, affinity_desc, affinity_desc_length);
#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
GROUP_AFFINITY group_affinity;
memset(&group_affinity, 0, sizeof(group_affinity));
group_affinity.Group = affinity.group;
KAFFINITY affinity_mask = 1ull << affinity.id;
if (affinity.smt) {
affinity_mask |= 1ull << (affinity.id + 1);
}
group_affinity.Mask = affinity_mask;
SetThreadGroupAffinity(thread->handle, &group_affinity, NULL);
// TODO(benvanik): figure out of this is a bad thing; sometimes it can result
// in the scheduler alternating cores within the affinity mask; in theory it's
// just an SMT ID change and doesn't have any impact on caches but it'd be
// good to check.
PROCESSOR_NUMBER ideal_processor;
memset(&ideal_processor, 0, sizeof(ideal_processor));
ideal_processor.Group = affinity.group;
ideal_processor.Number = affinity.id;
SetThreadIdealProcessorEx(thread->handle, &ideal_processor, NULL);
IREE_TRACE_ZONE_END(z0);
}
void iree_thread_resume(iree_thread_t* thread) {
IREE_TRACE_ZONE_BEGIN(z0);
// NOTE: we don't track the suspend/resume depth here because we don't
// expose suspend as an operation (yet). If we did we'd want to make sure we
// always balance suspend/resume or else we'll mess with any
// debuggers/profilers that may be suspending threads for their own uses.
int32_t expected = 1;
if (iree_atomic_compare_exchange_strong_int32(
&thread->is_suspended, &expected, 0, iree_memory_order_acq_rel,
iree_memory_order_relaxed /* expected is unused */)) {
ResumeThread(thread->handle);
}
IREE_TRACE_ZONE_END(z0);
}
void iree_thread_join(iree_thread_t* thread) {
IREE_TRACE_ZONE_BEGIN(z0);
WaitForSingleObject(thread->handle, INFINITE);
IREE_TRACE_ZONE_END(z0);
}
void iree_thread_yield(void) { YieldProcessor(); }
#endif // IREE_PLATFORM_WINDOWS