blob: 7d507570f97d245cddfdc619d2e087eff1a2505d [file] [log] [blame]
/* Copyright (c) 2015-2025 The Khronos Group Inc.
* Copyright (c) 2015-2025 Valve Corporation
* Copyright (c) 2015-2025 LunarG, Inc.
* Copyright (C) 2015-2024 Google Inc.
* Modifications Copyright (C) 2020 Advanced Micro Devices, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "state_tracker/state_object.h"
#include "state_tracker/fence_state.h"
#include "state_tracker/semaphore_state.h"
#include <condition_variable>
#include <deque>
#include <future>
#include <thread>
#include <vector>
#include <string>
#include "error_message/error_location.h"
#include "chassis/dispatch_object.h"
#include "vk_layer_config.h"
namespace vvl {
class CommandBuffer;
class DeviceState;
class Image;
class Queue;
class QueueSubState;
struct CommandBufferSubmission {
std::shared_ptr<vvl::CommandBuffer> cb;
// Specifically made for GPU-AV, for it has unique problems: Error reporting is done *after*
// command buffer submissions, not at Pre/PostCall time.
// Contrary to sync val, GPU-AV cannot just look at `GetQueueState()->cmdbuf_label_stack`
// to construct an initial label stack. sync-val can do that because validation and error reporting is done
// *before* a command buffer list is submitted: validation is performed one command buffer at a time,
// and `GetQueueState()->cmdbuf_label_stack` is updated between those validations.
// When GPU-AV starts doing error reporting, when command buffers have completed,
// the label stack info stored in Queue state is lost.
// => GPU-AV needs to track this initial label stack per command buffer submission.
std::vector<std::string> initial_label_stack;
CommandBufferSubmission(std::shared_ptr<vvl::CommandBuffer> cb, std::vector<std::string> initial_label_stack)
: cb(std::move(cb)), initial_label_stack(std::move(initial_label_stack)) {}
CommandBufferSubmission(CommandBufferSubmission &&other)
: cb(std::move(other.cb)), initial_label_stack(std::move(other.initial_label_stack)) {}
CommandBufferSubmission &operator=(const CommandBufferSubmission &other) = default;
CommandBufferSubmission(const CommandBufferSubmission &) = default;
};
struct QueueSubmission {
QueueSubmission(const Location &loc_) : loc(loc_), completed(), waiter(completed.get_future()) {}
bool is_last_submission{false};
std::vector<vvl::CommandBufferSubmission> cb_submissions{};
std::vector<SemaphoreInfo> wait_semaphores;
std::vector<SemaphoreInfo> signal_semaphores;
std::shared_ptr<Fence> fence;
bool has_external_fence = false;
// Swapchain handle if this submission represents QueuePresent request
VkSwapchainKHR swapchain = VK_NULL_HANDLE;
std::shared_ptr<const vvl::Image> swapchain_image;
LocationCapture loc;
uint64_t seq{0};
uint32_t perf_submit_pass{0};
std::promise<void> completed;
std::shared_future<void> waiter;
void AddCommandBuffer(std::shared_ptr<vvl::CommandBuffer> cb_state, std::vector<std::string> initial_label_stack) {
cb_submissions.emplace_back(std::move(cb_state), std::move(initial_label_stack));
}
void AddSignalSemaphore(std::shared_ptr<Semaphore> &&semaphore_state, uint64_t value) {
signal_semaphores.emplace_back(std::move(semaphore_state), value);
}
void AddWaitSemaphore(std::shared_ptr<Semaphore> &&semaphore_state, uint64_t value) {
wait_semaphores.emplace_back(std::move(semaphore_state), value);
}
void AddFence(std::shared_ptr<Fence> &&fence_state) { fence = std::move(fence_state); }
void EndUse();
void BeginUse();
};
// This timeout is for all queue threads to update their state after we know
// (via being in a PostRecord call) that a fence, semaphore or wait for idle has completed.
//
// NOTE 2025-07-07: we did not have bugs related to timeouts for quite some time.
// At the same time low timeout value (10 seconds) was the source of confusion during
// debugging when the program was waiting on breakpoint longer than timeout value.
//
// Set infinite value for debug builds. For release builds stay on the safe side and use
// a larger but still waitable value. In the future, after more testing, we might want use
// infinite timeout in all cases (or it's possible that non-threaded solution will happen first,
// as part of imporving submit time validation).
//
// Timeout can be overwritten with VK_QUEUE_THREAD_DEFAULT_TIMEOUT environment variable.
static inline std::chrono::time_point<std::chrono::steady_clock> GetCondWaitTimeout() {
const std::string envvar_timeout_str = GetEnvironment("VK_QUEUE_THREAD_DEFAULT_TIMEOUT");
const uint64_t envvar_timeout_value = !envvar_timeout_str.empty() ? std::atoi(envvar_timeout_str.c_str()) : 0;
uint64_t timeout_seconds = 0;
if (envvar_timeout_value) {
timeout_seconds = envvar_timeout_value;
} else {
#ifndef NDEBUG
// infinite value for debug builds
timeout_seconds = vvl::kU32Max;
#else
// large timeout for release builds but still waitable in case of issue
timeout_seconds = 120;
#endif
}
return std::chrono::steady_clock::now() + std::chrono::seconds(timeout_seconds);
}
struct PreSubmitResult {
uint64_t last_submission_seq = 0;
uint64_t submission_seq = 0;
};
class Queue : public StateObject, public SubStateManager<QueueSubState> {
public:
Queue(DeviceState &dev_data, VkQueue handle, uint32_t family_index, uint32_t queue_index, VkDeviceQueueCreateFlags flags,
const VkQueueFamilyProperties &queueFamilyProperties)
: StateObject(handle, kVulkanObjectTypeQueue),
queue_family_index(family_index),
queue_index(queue_index),
create_flags(flags),
queue_family_properties(queueFamilyProperties),
dev_data_(dev_data) {}
~Queue() { Destroy(); }
void Destroy() override;
VkQueue VkHandle() const { return handle_.Cast<VkQueue>(); }
// called from the various PreCallRecordQueueSubmit() methods
PreSubmitResult PreSubmit(std::vector<QueueSubmission> &&submissions);
// called from the various PostCallRecordQueueSubmit() methods
void PostSubmit();
// Tell the queue thread that submissions up to and including the submission with
// sequence number until_seq have finished. kU64Max means to finish all submissions.
void Notify(uint64_t until_seq = kU64Max);
// Wait for the queue thread to finish processing submissions with sequence numbers
// up to and including until_seq. kU64Max means to finish all submissions.
void Wait(const Location &loc, uint64_t until_seq = kU64Max);
// Helper that combines Notify and Wait
void NotifyAndWait(const Location &loc, uint64_t until_seq = kU64Max);
// Find a timeline wait that does not have a resolving signal submitted yet.
// Check submissions up to and including until_seq.
std::optional<SemaphoreInfo> FindTimelineWaitWithoutResolvingSignal(uint64_t until_seq) const;
// VVL needs helps to retire submsissions on present-only queue that does not use explicit host synchronization
void UpdatePresentOnlyQueueProgress(const DeviceState &device_state);
// Queue family index. As queueFamilyIndex parameter in vkGetDeviceQueue.
const uint32_t queue_family_index;
// Index of the queue within a queue family. As queueIndex parameter in vkGetDeviceQueue.
const uint32_t queue_index;
const VkDeviceQueueCreateFlags create_flags;
const VkQueueFamilyProperties queue_family_properties;
// Track command buffer label stack accross all command buffers submitted to this queue.
// Access to this variable relies on external queue synchronization.
std::vector<std::string> cmdbuf_label_stack;
// Track the last closed label. It is used in the error messages to help locate unbalanced vkCmdEndDebugUtilsLabelEXT command.
// Access to this variable relies on external queue synchronization.
std::string last_closed_cmdbuf_label;
// Stop per-queue label tracking after the first label mismatch error.
// Access to this variable relies on external queue synchronization.
bool found_unbalanced_cmdbuf_label = false;
// If at any point this queue was used for specific queue operations
bool is_used_for_presentation = false; // QueuePresent
bool is_used_for_regular_submits = false; // QueueSubmit and QueueBindSparse
using LockGuard = std::unique_lock<std::mutex>;
LockGuard Lock() const { return LockGuard(lock_); }
const std::deque<QueueSubmission> &Submissions() { return submissions_; }
protected:
// called from the various PostCallRecordQueueSubmit() methods
void PostSubmit(QueueSubmission &submission);
// called when the worker thread decides a submissions has finished executing
void Retire(QueueSubmission &submission);
private:
uint32_t timeline_wait_count_ = 0;
void ThreadFunc();
QueueSubmission *NextSubmission();
DeviceState &dev_data_;
// state related to submitting to the queue, all data members must
// be accessed with lock_ held
std::unique_ptr<std::thread> thread_;
std::deque<QueueSubmission> submissions_;
std::atomic<uint64_t> seq_{0};
uint64_t request_seq_{0};
bool exit_thread_{false};
mutable std::mutex lock_;
// condition to wake up the queue's thread
std::condition_variable cond_;
};
class QueueSubState {
public:
explicit QueueSubState(Queue &q) : base(q) {}
QueueSubState(const QueueSubState &) = delete;
QueueSubState &operator=(const QueueSubState &) = delete;
virtual ~QueueSubState() {}
virtual void Destroy() {}
virtual void PreSubmit(std::vector<QueueSubmission> &submissions) {}
virtual void PostSubmit(std::deque<QueueSubmission> &submissions_) {}
virtual void Retire(QueueSubmission &submission) {}
VulkanTypedHandle Handle() const { return base.Handle(); }
VkQueue VkHandle() const { return base.VkHandle(); }
Queue &base;
};
} // namespace vvl