| /* Copyright (c) 2015-2025 The Khronos Group Inc. |
| * Copyright (c) 2015-2025 Valve Corporation |
| * Copyright (c) 2015-2025 LunarG, Inc. |
| * Copyright (C) 2015-2025 Google Inc. |
| * Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| #include "state_tracker/queue_state.h" |
| #include "state_tracker/cmd_buffer_state.h" |
| #include "state_tracker/state_tracker.h" |
| #include "state_tracker/image_state.h" |
| #include "state_tracker/wsi_state.h" |
| #include "containers/small_vector.h" |
| #include "containers/small_container.h" |
| |
| #include "profiling/profiling.h" |
| |
| void vvl::QueueSubmission::BeginUse() { |
| for (SemaphoreInfo &wait : wait_semaphores) { |
| wait.semaphore->BeginUse(); |
| } |
| for (CommandBufferSubmission &cb_submission : cb_submissions) { |
| cb_submission.cb->BeginUse(); |
| } |
| for (SemaphoreInfo &signal : signal_semaphores) { |
| signal.semaphore->BeginUse(); |
| } |
| if (fence) { |
| fence->BeginUse(); |
| } |
| } |
| |
| void vvl::QueueSubmission::EndUse() { |
| for (SemaphoreInfo &wait : wait_semaphores) { |
| wait.semaphore->EndUse(); |
| } |
| for (CommandBufferSubmission &cb_submission : cb_submissions) { |
| cb_submission.cb->EndUse(); |
| } |
| for (SemaphoreInfo &signal : signal_semaphores) { |
| signal.semaphore->EndUse(); |
| } |
| if (fence) { |
| fence->EndUse(); |
| } |
| } |
| |
| vvl::PreSubmitResult vvl::Queue::PreSubmit(std::vector<vvl::QueueSubmission> &&submissions) { |
| if (!submissions.empty()) { |
| submissions.back().is_last_submission = true; |
| } |
| for (auto &item : sub_states_) { |
| item.second->PreSubmit(submissions); |
| } |
| PreSubmitResult result; |
| for (QueueSubmission &submission : submissions) { |
| for (CommandBufferSubmission &cb_submission : submission.cb_submissions) { |
| auto cb_guard = cb_submission.cb->WriteLock(); |
| for (CommandBuffer *secondary_cmd_buffer : cb_submission.cb->linked_command_buffers) { |
| auto secondary_guard = secondary_cmd_buffer->WriteLock(); |
| secondary_cmd_buffer->submit_count++; |
| } |
| cb_submission.cb->submit_count++; |
| cb_submission.cb->SubmitTimeValidate(*this, submission.perf_submit_pass, submission.loc.Get()); |
| } |
| // seq_ is atomic so we don't need a lock until updating the deque below. |
| // Note that this relies on the external synchonization requirements for the |
| // VkQueue |
| submission.seq = ++seq_; |
| result.submission_seq = submission.seq; |
| submission.BeginUse(); |
| for (SemaphoreInfo &wait : submission.wait_semaphores) { |
| wait.semaphore->EnqueueWait(SubmissionReference(this, submission.seq), wait.payload); |
| timeline_wait_count_ += (wait.semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE) ? 1 : 0; |
| } |
| |
| for (SemaphoreInfo &signal : submission.signal_semaphores) { |
| signal.semaphore->EnqueueSignal(SubmissionReference(this, submission.seq), signal.payload); |
| } |
| |
| if (submission.fence) { |
| if (submission.fence->EnqueueSignal(this, submission.seq)) { |
| submission.has_external_fence = true; |
| } |
| } |
| { |
| auto guard = Lock(); |
| submissions_.emplace_back(std::move(submission)); |
| if (!thread_) { |
| thread_ = std::make_unique<std::thread>(&Queue::ThreadFunc, this); |
| } |
| } |
| } |
| return result; |
| } |
| |
| void vvl::Queue::Notify(uint64_t until_seq) { |
| auto guard = Lock(); |
| if (until_seq == kU64Max) { |
| until_seq = seq_.load(); |
| } |
| if (request_seq_ < until_seq) { |
| request_seq_ = until_seq; |
| } |
| cond_.notify_one(); |
| } |
| |
| void vvl::Queue::Wait(const Location &loc, uint64_t until_seq) { |
| std::shared_future<void> waiter; |
| { |
| auto guard = Lock(); |
| if (until_seq == kU64Max) { |
| until_seq = seq_.load(); |
| } |
| if (submissions_.empty() || until_seq < submissions_.begin()->seq) { |
| return; |
| } |
| uint64_t index = until_seq - submissions_.begin()->seq; |
| assert(index < submissions_.size()); |
| waiter = submissions_[static_cast<size_t>(index)].waiter; |
| } |
| auto wait_status = waiter.wait_until(GetCondWaitTimeout()); |
| if (wait_status != std::future_status::ready) { |
| dev_data_.LogError("INTERNAL-ERROR-VkQueue-state-timeout", Handle(), loc, |
| "The Validation Layers hit a timeout waiting for queue state to update." |
| " seq=%" PRIu64 " until=%" PRIu64, |
| seq_.load(), until_seq); |
| } |
| } |
| |
| void vvl::Queue::NotifyAndWait(const Location &loc, uint64_t until_seq) { |
| Notify(until_seq); |
| Wait(loc, until_seq); |
| } |
| |
| std::optional<vvl::SemaphoreInfo> vvl::Queue::FindTimelineWaitWithoutResolvingSignal(uint64_t until_seq) const { |
| // A simple optimization for a long sequence of submits without host waits. |
| // Stop iteration over submits if there are no timeline waits left. If only |
| // binary semaphores are used this will return immediately. |
| uint32_t processed_waits = 0; |
| |
| // Run algorithm in two separate steps to avoid lock-inversion with Semaphore::RetireWait: |
| // Semaphore::RetireWait() |
| // Semaphore::WriteLock() |
| // Semaphore::CanRetireTimelineWait |
| // TimePoint::Notify |
| // Queue::Lock() <-- semaphore lock is still held here |
| // |
| // Current function: |
| // Queue::Lock() |
| // queue lock is released here, can't lock-inverse now |
| // Semaphore::ReadLock() |
| |
| // Step 1. Get list of timeline waits (write-locks Queue) |
| small_vector<SemaphoreInfo, 8> timeline_waits; |
| { |
| auto guard = Lock(); |
| for (auto it = submissions_.rbegin(); it != submissions_.rend() && processed_waits < timeline_wait_count_; ++it) { |
| const vvl::QueueSubmission &submission = *it; |
| if (submission.seq <= until_seq) { |
| for (const auto &wait_info : submission.wait_semaphores) { |
| if (wait_info.semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE) { |
| timeline_waits.emplace_back(wait_info); |
| processed_waits++; |
| } |
| } |
| } |
| } |
| } |
| // Step 2. Query each timeline wait (read-locks Semaphore) |
| for (const SemaphoreInfo &wait_info : timeline_waits) { |
| if (wait_info.semaphore->Scope() != vvl::Semaphore::kInternal) { |
| // For external semaphore we can't track the signal. The conservative assumption |
| // for false positive free validation is that the signal is available, so skip |
| // this semaphore. |
| continue; |
| } |
| if (!wait_info.semaphore->HasResolvingTimelineSignal(wait_info.payload)) { |
| return wait_info; |
| } |
| } |
| return {}; |
| } |
| |
| // The submissions on present-only queue can be retired without explicit fence/semaphore sync. |
| // For example, application's main loop uses AcquireNextImage and also waits on the frame fence |
| // to sync with the main app queue (different than a present one). This ensures completion of |
| // previous presentations even we do not submit any sync primitives on the present-only queue. |
| // |
| // VVL needs helps to retire submsissions in such scenarios because by default it expects host |
| // sync command (such as WaitForFences) to have guarantee that submission has been completed. |
| // |
| // This implementation assumes that if error-free program has more active present requests than |
| // swapchain images, then at least the oldest present request was completed and corresponding |
| // image was re-acquired (and it got pushed to the present queue again). |
| void vvl::Queue::UpdatePresentOnlyQueueProgress(const DeviceState &device_state) { |
| uint64_t seq_to_advance_to = 0; |
| { |
| auto guard = Lock(); |
| assert(is_used_for_presentation && !is_used_for_regular_submits); |
| small_unordered_map<VkSwapchainKHR, uint32_t, 4> active_presentations; |
| for (const QueueSubmission &submission : submissions_) { |
| assert(submission.swapchain != VK_NULL_HANDLE); |
| active_presentations[submission.swapchain]++; |
| } |
| // Search for the swapchain with too many enqueued presentation requests |
| VkSwapchainKHR swapchain = VK_NULL_HANDLE; |
| for (const auto &[handle, count] : active_presentations) { |
| if (auto swapchain_state = device_state.Get<Swapchain>(handle)) { |
| if (count > swapchain_state->images.size()) { |
| swapchain = handle; |
| break; |
| } |
| } |
| } |
| // Get seq to retire the oldest presentation submissions. |
| if (swapchain != VK_NULL_HANDLE) { |
| for (const QueueSubmission &submission : submissions_) { |
| if (submission.swapchain == swapchain) { |
| seq_to_advance_to = submission.seq; |
| break; |
| } |
| } |
| } |
| } |
| if (seq_to_advance_to) { |
| Notify(seq_to_advance_to); |
| } |
| } |
| |
| void vvl::Queue::Destroy() { |
| std::unique_ptr<std::thread> dead_thread; |
| { |
| auto guard = Lock(); |
| exit_thread_ = true; |
| cond_.notify_all(); |
| dead_thread = std::move(thread_); |
| } |
| if (dead_thread && dead_thread->joinable()) { |
| dead_thread->join(); |
| dead_thread.reset(); |
| } |
| for (auto &item : sub_states_) { |
| item.second->Destroy(); |
| } |
| StateObject::Destroy(); |
| } |
| |
| void vvl::Queue::PostSubmit() { |
| auto guard = Lock(); |
| if (!submissions_.empty()) { |
| PostSubmit(submissions_.back()); |
| } |
| } |
| |
| void vvl::Queue::PostSubmit(QueueSubmission &submission) { |
| for (auto &item : sub_states_) { |
| item.second->PostSubmit(submissions_); |
| } |
| |
| // If dealing with external fences, the app might call vkWaitForFences, but might not and we might not know when the queue |
| // submission is done. If we find adding a "big lock" here is slow for real cases, we could have something run in a background |
| // thread calling vkGetFenceStatus to check for us. (This would require a good thing to test against) |
| if (submission.has_external_fence) { |
| submission.fence->NotifyAndWait(submission.loc.Get()); |
| } |
| } |
| |
| vvl::QueueSubmission *vvl::Queue::NextSubmission() { |
| QueueSubmission *result = nullptr; |
| // Find if the next submission is ready so that the thread function doesn't need to worry |
| // about locking. |
| auto guard = Lock(); |
| while (!exit_thread_ && (submissions_.empty() || request_seq_ < submissions_.front().seq)) { |
| // The queue thread must wait forever if nothing is happening, until we tell it to exit |
| cond_.wait(guard); |
| } |
| if (!exit_thread_) { |
| result = &submissions_.front(); |
| // NOTE: the submission must remain on the dequeue until we're done processing it so that |
| // anyone waiting for it can find the correct waiter |
| } |
| return result; |
| } |
| |
| void vvl::Queue::Retire(QueueSubmission &submission) { |
| submission.EndUse(); |
| if (dev_data_.is_device_lost) { |
| return; // the underlying objects might be destroyed/garbage |
| } |
| for (auto &wait : submission.wait_semaphores) { |
| wait.semaphore->RetireWait(this, wait.payload, submission.loc.Get(), true); |
| timeline_wait_count_ -= (wait.semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE) ? 1 : 0; |
| } |
| for (auto &item : sub_states_) { |
| item.second->Retire(submission); |
| } |
| for (auto &signal : submission.signal_semaphores) { |
| signal.semaphore->RetireSignal(signal.payload); |
| } |
| if (submission.fence) { |
| submission.fence->Retire(); |
| } |
| } |
| |
| void vvl::Queue::ThreadFunc() { |
| VVL_TracySetThreadName(__FUNCTION__); |
| |
| QueueSubmission *submission = nullptr; |
| |
| // Roll this queue forward, one submission at a time. |
| while (true) { |
| submission = NextSubmission(); |
| if (submission == nullptr) { |
| break; |
| } |
| Retire(*submission); |
| // wake up anyone waiting for this submission to be retired |
| { |
| std::promise<void> completed; |
| { |
| auto guard = Lock(); |
| completed = std::move(submission->completed); |
| submissions_.pop_front(); |
| } |
| completed.set_value(); |
| } |
| } |
| } |