blob: fb34bccb9207a24f213e843db6ba624d5f71c249 [file]
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "iree/hal/vulkan/emulated_timeline_semaphore.h"
#include "absl/container/inlined_vector.h"
#include "absl/synchronization/mutex.h"
#include "absl/utility/utility.h"
#include "iree/base/time.h"
#include "iree/base/tracing.h"
#include "iree/hal/vulkan/dynamic_symbols.h"
#include "iree/hal/vulkan/status_util.h"
namespace iree {
namespace hal {
namespace vulkan {
// static
StatusOr<ref_ptr<Semaphore>> EmulatedTimelineSemaphore::Create(
ref_ptr<VkDeviceHandle> logical_device,
std::function<Status(Semaphore*)> on_semaphore_signal,
std::function<void(Semaphore*)> on_semaphore_failure,
std::function<void(absl::Span<VkFence>)> on_fence_signal,
ref_ptr<TimePointSemaphorePool> semaphore_pool, uint64_t initial_value) {
IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::Create");
return make_ref<EmulatedTimelineSemaphore>(
std::move(logical_device), std::move(on_semaphore_signal),
std::move(on_semaphore_failure), std::move(on_fence_signal),
std::move(semaphore_pool), initial_value);
}
EmulatedTimelineSemaphore::EmulatedTimelineSemaphore(
ref_ptr<VkDeviceHandle> logical_device,
std::function<Status(Semaphore*)> on_semaphore_signal,
std::function<void(Semaphore*)> on_semaphore_failure,
std::function<void(absl::Span<VkFence>)> on_fence_signal,
ref_ptr<TimePointSemaphorePool> semaphore_pool, uint64_t initial_value)
: signaled_value_(initial_value),
logical_device_(std::move(logical_device)),
on_semaphore_signal_(std::move(on_semaphore_signal)),
on_semaphore_failure_(std::move(on_semaphore_failure)),
on_fence_signal_(std::move(on_fence_signal)),
semaphore_pool_(std::move(semaphore_pool)) {}
EmulatedTimelineSemaphore::~EmulatedTimelineSemaphore() {
IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::dtor");
IREE_CHECK_OK(TryToAdvanceTimeline(UINT64_MAX).status());
absl::MutexLock lock(&mutex_);
IREE_CHECK(outstanding_semaphores_.empty())
<< "Destroying an emulated timeline semaphore without first waiting on "
"outstanding signals";
}
StatusOr<uint64_t> EmulatedTimelineSemaphore::Query() {
IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::Query");
IREE_DVLOG(2) << "EmulatedTimelineSemaphore::Query";
IREE_ASSIGN_OR_RETURN(bool signaled, TryToAdvanceTimeline(UINT64_MAX));
(void)signaled;
uint64_t value = signaled_value_.load();
IREE_DVLOG(2) << "Current timeline value: " << value;
if (value == UINT64_MAX) {
absl::MutexLock lock(&mutex_);
return status_;
}
return value;
}
Status EmulatedTimelineSemaphore::Signal(uint64_t value) {
IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::Signal");
IREE_DVLOG(2) << "EmulatedTimelineSemaphore::Signal";
auto signaled_value = signaled_value_.exchange(value);
IREE_DVLOG(2) << "Previous value: " << signaled_value
<< "; new value: " << value;
// Make sure the previous signaled value is smaller than the new value.
IREE_CHECK(signaled_value < value)
<< "Attempting to signal a timeline value out of order; trying " << value
<< " but " << signaled_value << " already signaled";
// Inform the device to make progress given we have a new value signaled now.
IREE_RETURN_IF_ERROR(on_semaphore_signal_(this));
return OkStatus();
}
Status EmulatedTimelineSemaphore::Wait(uint64_t value, Time deadline_ns) {
IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::Wait");
IREE_DVLOG(2) << "EmulatedTimelineSemaphore::Wait";
VkFence fence = VK_NULL_HANDLE;
do {
IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::Wait#loop");
// First try to advance the timeline without blocking to see whether we've
// already reached the desired value.
IREE_ASSIGN_OR_RETURN(bool reached_desired_value,
TryToAdvanceTimeline(value));
if (reached_desired_value) return OkStatus();
// We must wait now. Find the first emulated time point that has a value >=
// the desired value so we can wait on its associated signal fence to make
// sure the timeline is advanced to the desired value.
absl::MutexLock lock(&mutex_);
auto semaphore = outstanding_semaphores_.begin();
for (; semaphore != outstanding_semaphores_.end(); ++semaphore) {
if ((*semaphore)->value >= value) break;
}
if (semaphore != outstanding_semaphores_.end()) {
if (!(*semaphore)->signal_fence) {
return InternalErrorBuilder(IREE_LOC)
<< "Timeline should have a signal fence for the first time "
"point beyond the signaled value";
}
IREE_DVLOG(2) << "Found timepoint semaphore " << *semaphore
<< " (value: " << (*semaphore)->value
<< ") to wait for desired timeline value: " << value;
fence = (*semaphore)->signal_fence->value();
// Found; we can break the loop and proceed to waiting now.
break;
}
// TODO(antiagainst): figure out a better way instead of the busy loop here.
} while (Now() < deadline_ns);
if (fence == VK_NULL_HANDLE) {
return DeadlineExceededErrorBuilder(IREE_LOC)
<< "Deadline reached when waiting timeline semaphore";
}
uint64_t timeout_ns =
static_cast<uint64_t>(DeadlineToRelativeTimeoutNanos(deadline_ns));
VK_RETURN_IF_ERROR(logical_device_->syms()->vkWaitForFences(
*logical_device_, /*fenceCount=*/1, &fence, /*waitAll=*/true,
timeout_ns));
return TryToAdvanceTimeline(value).status();
}
void EmulatedTimelineSemaphore::Fail(Status status) {
IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::Fail");
absl::MutexLock lock(&mutex_);
status_ = std::move(status);
signaled_value_.store(UINT64_MAX);
}
VkSemaphore EmulatedTimelineSemaphore::GetWaitSemaphore(
uint64_t value, const ref_ptr<TimePointFence>& wait_fence) {
IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::GetWaitSemaphore");
IREE_DVLOG(2) << "EmulatedTimelineSemaphore::GetWaitSemaphore";
absl::MutexLock lock(&mutex_);
VkSemaphore semaphore = VK_NULL_HANDLE;
for (TimePointSemaphore* point : outstanding_semaphores_) {
if (point->value > value && point->wait_fence) {
point->wait_fence = add_ref(wait_fence);
semaphore = point->semaphore;
break;
}
}
IREE_DVLOG(2) << "Binary VkSemaphore to wait on for timeline value (" << value
<< ") and wait fence (" << wait_fence.get()
<< "): " << semaphore;
return semaphore;
}
Status EmulatedTimelineSemaphore::CancelWaitSemaphore(VkSemaphore semaphore) {
IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::CancelWaitSemaphore");
IREE_DVLOG(2) << "EmulatedTimelineSemaphore::CancelWaitSemaphore";
absl::MutexLock lock(&mutex_);
for (TimePointSemaphore* point : outstanding_semaphores_) {
if (point->semaphore != semaphore) continue;
if (!point->wait_fence) {
return InvalidArgumentErrorBuilder(IREE_LOC)
<< "Time point wasn't waited before";
}
point->wait_fence = nullptr;
IREE_DVLOG(2) << "Cancelled waiting on binary VkSemaphore: " << semaphore;
return OkStatus();
}
return InvalidArgumentErrorBuilder(IREE_LOC)
<< "No time point for the given semaphore";
}
StatusOr<VkSemaphore> EmulatedTimelineSemaphore::GetSignalSemaphore(
uint64_t value, const ref_ptr<TimePointFence>& signal_fence) {
IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::GetSignalSemaphore");
IREE_DVLOG(2) << "EmulatedTimelineSemaphore::GetSignalSemaphore";
if (signaled_value_.load() >= value) {
return FailedPreconditionErrorBuilder(IREE_LOC)
<< "Timeline semaphore already signaled past " << value;
}
absl::MutexLock lock(&mutex_);
auto insertion_point = outstanding_semaphores_.begin();
while (insertion_point != outstanding_semaphores_.end()) {
if ((*insertion_point)->value > value) break;
}
IREE_ASSIGN_OR_RETURN(TimePointSemaphore * semaphore,
semaphore_pool_->Acquire());
semaphore->value = value;
semaphore->signal_fence = add_ref(signal_fence);
if (semaphore->wait_fence) {
return InternalErrorBuilder(IREE_LOC)
<< "Newly acquired time point semaphore should not have waiters";
}
outstanding_semaphores_.insert(insertion_point, semaphore);
IREE_DVLOG(2) << "Timepoint semaphore to signal for timeline value (" << value
<< ") and wait fence (" << signal_fence.get()
<< "): " << semaphore
<< " (binary VkSemaphore: " << semaphore->semaphore << ")";
return semaphore->semaphore;
}
StatusOr<bool> EmulatedTimelineSemaphore::TryToAdvanceTimeline(
uint64_t to_upper_value) {
absl::InlinedVector<VkFence, 4> signaled_fences;
auto status = TryToAdvanceTimeline(to_upper_value, &signaled_fences);
// Inform the queue that some fences are known to have signaled. This should
// happen here instead of inside the other TryToAdvanceTimeline to avoid
// potential mutex deadlock, given here we are not holding a mutex anymore.
if (!signaled_fences.empty()) {
on_fence_signal_(absl::MakeSpan(signaled_fences));
}
return status;
}
StatusOr<bool> EmulatedTimelineSemaphore::TryToAdvanceTimeline(
uint64_t to_upper_value, absl::InlinedVector<VkFence, 4>* signaled_fences) {
IREE_TRACE_SCOPE0("EmulatedTimelineSemaphore::TryToAdvanceTimeline");
IREE_DVLOG(3) << "EmulatedTimelineSemaphore::TryToAdvanceTimeline";
uint64_t past_value = signaled_value_.load();
IREE_DVLOG(3) << "Current timeline value: " << past_value
<< "; desired timeline value: " << to_upper_value;
// Fast path for when already signaled past the desired value.
if (past_value >= to_upper_value) return true;
// We hold the lock during the entire resolve process so that we can resolve
// to the furthest possible value.
absl::MutexLock lock(&mutex_);
IREE_DVLOG(3) << "# outstanding semaphores: "
<< outstanding_semaphores_.size();
// The timeline has not signaled past the desired value and there is no
// binary semaphore pending on GPU yet: certainly the timeline cannot
// advance to the desired value.
if (outstanding_semaphores_.empty()) return false;
IntrusiveList<TimePointSemaphore> resolved_semaphores;
auto clear_signal_fence = [&signaled_fences](ref_ptr<TimePointFence>& fence) {
if (fence) {
if (signaled_fences) signaled_fences->push_back(fence->value());
fence = nullptr;
}
};
bool keep_resolving = true;
bool reached_desired_value = false;
while (keep_resolving && !outstanding_semaphores_.empty()) {
auto* semaphore = outstanding_semaphores_.front();
IREE_DVLOG(3) << "Looking at timepoint semaphore " << semaphore << "..";
IREE_DVLOG(3) << " value: " << semaphore->value;
IREE_DVLOG(3) << " VkSemaphore: " << semaphore->semaphore;
IREE_DVLOG(3) << " signal fence: " << semaphore->signal_fence.get();
IREE_DVLOG(3) << " wait fence: " << semaphore->wait_fence.get();
// If the current semaphore is for a value beyond our upper limit, then
// early exit so that we don't spend time dealing with signals we don't yet
// care about. This can prevent live lock where one thread is signaling
// fences as fast/faster than another thread can consume them.
if (semaphore->value > to_upper_value) {
keep_resolving = false;
reached_desired_value = true;
break;
}
// If the current semaphore is for a value not greater than the past
// signaled value, then we know it was signaled previously. But there might
// be a waiter on it on GPU.
if (semaphore->value <= past_value) {
if (semaphore->signal_fence) {
return InternalErrorBuilder(IREE_LOC)
<< "Timeline should already signaled past this time point and "
"cleared the signal fence";
}
// If ther is no waiters, we can recycle this semaphore now. If there
// exists one waiter, then query its status and recycle on success. We
// only handle success status here. Others will be handled when the fence
// is checked for other semaphores' signaling status for the same queue
// submission.
if (!semaphore->wait_fence ||
semaphore->wait_fence->GetStatus() == VK_SUCCESS) {
clear_signal_fence(semaphore->signal_fence);
semaphore->wait_fence = nullptr;
outstanding_semaphores_.erase(semaphore);
resolved_semaphores.push_back(semaphore);
IREE_DVLOG(3) << "Resolved and recycling semaphore " << semaphore;
}
continue;
}
// This semaphore represents a value gerater than the known previously
// signaled value. We don't know its status so we need to really query now.
if (!semaphore->signal_fence) {
return InternalErrorBuilder(IREE_LOC)
<< "The status of this time point in the timeline should still be "
"pending with a singal fence";
}
VkResult signal_status = semaphore->signal_fence->GetStatus();
switch (signal_status) {
case VK_SUCCESS:
IREE_DVLOG(3) << "..semaphore signaled";
signaled_value_.store(semaphore->value);
clear_signal_fence(semaphore->signal_fence);
// If no waiters, we can recycle this semaphore now.
if (!semaphore->wait_fence) {
semaphore->wait_fence = nullptr;
outstanding_semaphores_.erase(semaphore);
resolved_semaphores.push_back(semaphore);
IREE_DVLOG(3) << "Resolved and recycling semaphore " << semaphore;
}
break;
case VK_NOT_READY:
// The fence has not been signaled yet so this is the furthest time
// point we can go in this timeline.
keep_resolving = false;
IREE_DVLOG(3) << "..semaphore not yet signaled";
break;
default:
// Fence indicates an error (device lost, out of memory, etc).
// Propagate this back to our status (and thus any waiters).
// Since we only take the first error we find we skip all remaining
// fences.
keep_resolving = false;
clear_signal_fence(semaphore->signal_fence);
status_ = VkResultToStatus(signal_status, IREE_LOC);
signaled_value_.store(UINT64_MAX);
break;
}
}
IREE_DVLOG(3) << "Releasing " << resolved_semaphores.size()
<< " resolved semaphores; " << outstanding_semaphores_.size()
<< " still outstanding";
semaphore_pool_->ReleaseResolved(&resolved_semaphores);
if (!status_.ok()) {
on_semaphore_failure_(this);
semaphore_pool_->ReleaseUnresolved(&outstanding_semaphores_);
return status_;
}
return reached_desired_value;
}
} // namespace vulkan
} // namespace hal
} // namespace iree