blob: 9d67095783e4edc38cc06b6876b082ffd603c723 [file] [log] [blame]
/* Copyright (c) 2024-2026 The Khronos Group Inc.
* Copyright (c) 2024-2026 Valve Corporation
* Copyright (c) 2024-2026 LunarG, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "sync_stats.h"
#if VVL_ENABLE_SYNCVAL_STATS != 0
#include "sync_commandbuffer.h"
#include "sync_validation.h"
#include "state_tracker/state_tracker.h"
#include <iostream>
namespace vvl {
// Until C++ 26 std::atomic<T>::fetch_max arrives
// https://en.cppreference.com/w/cpp/atomic/atomic/fetch_max
// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p0493r5.pdf
template <typename T>
inline T atomic_fetch_max(std::atomic<T> &current_max, const T &value) noexcept {
T t = current_max.load();
while (!current_max.compare_exchange_weak(t, std::max(t, value)))
;
return t;
}
} // namespace vvl
namespace syncval {
// NOTE: fetch_add/fetch_sub return value before increment/decrement.
// Our Add/Sub functions return new counter values, so they need to
// adjust result of the atomic function by adding/subtracting one.
void Value32::Update(uint32_t new_value) { u32.store(new_value); }
uint32_t Value32::Add(uint32_t n) { return u32.fetch_add(n) + 1; }
uint32_t Value32::Sub(uint32_t n) { return u32.fetch_sub(n) - 1; }
void Value64::Update(uint64_t new_value) { u64.store(new_value); }
uint64_t Value64::Add(uint64_t n) { return u64.fetch_add(n) + 1; }
uint64_t Value64::Sub(uint64_t n) { return u64.fetch_sub(n) - 1; }
void ValueMax32::Update(uint32_t new_value) {
value.Update(new_value);
vvl::atomic_fetch_max(max_value.u32, new_value);
}
void ValueMax32::Add(uint32_t n) {
uint32_t new_value = value.Add(n);
vvl::atomic_fetch_max(max_value.u32, new_value);
}
void ValueMax32::Sub(uint32_t n) { value.Sub(n); }
void ValueMax64::Update(uint64_t new_value) {
value.Update(new_value);
vvl::atomic_fetch_max(max_value.u64, new_value);
}
void ValueMax64::Add(uint64_t n) {
uint64_t new_value = value.Add(n);
vvl::atomic_fetch_max(max_value.u64, new_value);
}
void ValueMax64::Sub(uint64_t n) { value.Sub(n); }
Stats::~Stats() {
if (report_on_destruction) {
const std::string report = CreateReport();
std::cout << report;
}
}
void Stats::AddCommandBufferContext() { command_buffer_contexts.Add(1); }
void Stats::RemoveCommandBufferContext() { command_buffer_contexts.Sub(1); }
void Stats::AddQueueBatchContext() { queue_batch_contexts.Add(1); }
void Stats::RemoveQueueBatchContext() { queue_batch_contexts.Sub(1); }
void Stats::AddTimelineSignals(uint32_t count) { timeline_signals.Add(count); }
void Stats::RemoveTimelineSignals(uint32_t count) { timeline_signals.Sub(count); }
void Stats::AddUnresolvedBatch() { unresolved_batches.Add(1); }
void Stats::RemoveUnresolvedBatch() { unresolved_batches.Sub(1); }
void Stats::AddHandleRecord(uint32_t count) { handle_records.Add(count); }
void Stats::RemoveHandleRecord(uint32_t count) { handle_records.Sub(count); }
void AccessContextStats::UpdateMax(const AccessContextStats& cur_stats) {
#define UPDATE_MAX(field) field = std::max(field, cur_stats.field)
UPDATE_MAX(access_contexts);
UPDATE_MAX(access_states);
UPDATE_MAX(read_states);
UPDATE_MAX(write_states);
UPDATE_MAX(first_accesses);
UPDATE_MAX(access_states_with_multiple_reads);
UPDATE_MAX(access_states_with_multiple_firsts);
UPDATE_MAX(access_states_with_dynamic_allocations);
UPDATE_MAX(access_states_dynamic_allocation_size);
#undef UPDATE_MAX
}
void UpdateAccessMapStats(const AccessMap& access_map, AccessContextStats& stats) {
stats.access_contexts += 1;
stats.access_states += (uint32_t)access_map.Size();
for (const auto& entry : access_map) {
const AccessState& access_state = entry.second;
access_state.UpdateStats(stats);
}
}
void AccessStats::Update(SyncValidator& validator) {
std::unique_lock<std::mutex> lock(access_stats_mutex);
cb_access_stats = {};
queue_access_stats = {};
subpass_access_stats = {};
validator.device_state->ForEachShared<vvl::CommandBuffer>([this](std::shared_ptr<vvl::CommandBuffer> cb) {
const CommandBufferAccessContext* cb_access_context = GetAccessContext(*cb);
cb_access_context->UpdateStats(*this);
});
for (const auto& batch : validator.GetAllQueueBatchContexts()) {
const AccessContext& access_context = batch->GetAccessContext();
UpdateAccessMapStats(access_context.GetAccessMap(), queue_access_stats);
}
max_cb_access_stats.UpdateMax(cb_access_stats);
max_queue_access_stats.UpdateMax(queue_access_stats);
max_subpass_access_stats.UpdateMax(subpass_access_stats);
}
void Stats::UpdateAccessStats(SyncValidator& validator) { access_stats.Update(validator); }
void Stats::OnBarrierCommand(uint32_t memory_barrier_count, uint32_t buffer_barrier_count, uint32_t image_barrier_count,
uint32_t execution_dependencies_count) {
if (memory_barrier_count) {
barrier_stats.memory_barriers.Add(memory_barrier_count);
}
if (buffer_barrier_count) {
barrier_stats.buffer_barriers.Add(buffer_barrier_count);
}
if (image_barrier_count) {
barrier_stats.image_barriers.Add(image_barrier_count);
}
if (execution_dependencies_count) {
barrier_stats.execution_dependencies.Add(execution_dependencies_count);
}
const uint32_t command_total_barrier_count = memory_barrier_count + buffer_barrier_count + image_barrier_count;
if (command_total_barrier_count == 1) {
barrier_stats.single_barrier_commands.Add(1);
} else if (command_total_barrier_count > 1) {
barrier_stats.multi_barrier_commands.Add(1);
}
if (memory_barrier_count > 1 && buffer_barrier_count == 0 && image_barrier_count == 0) {
barrier_stats.multi_barrier_commands_only_memory.Add(1);
} else if (buffer_barrier_count > 1 && memory_barrier_count == 0 && image_barrier_count == 0) {
barrier_stats.multi_barrier_commands_only_buffer.Add(1);
} else if (image_barrier_count > 1 && memory_barrier_count == 0 && buffer_barrier_count == 0) {
barrier_stats.multi_barrier_commands_only_image.Add(1);
}
}
void Stats::UpdateMemoryStats() {
#if defined(USE_MIMALLOC_STATS)
mi_stats_merge();
{
std::unique_lock<std::mutex> lock(mi_stats_mutex);
mi_stats_get(sizeof(mi_stats), &mi_stats);
}
#endif
}
void Stats::ReportOnDestruction() { report_on_destruction = true; }
std::string Stats::CreateReport() {
std::ostringstream ss;
ss << std::left;
auto print_common_stats = [&ss](const char* field_name, const ValueMax32& stat) {
ss << std::setw(32) << field_name;
ss << std::setw(12) << stat.value.u32 << stat.max_value.u32;
ss << "\n";
};
auto print_common_stats64 = [&ss](const char* field_name, uint64_t v1, uint64_t v2) {
ss << std::setw(32) << field_name;
ss << std::setw(12) << v1 << v2;
ss << "\n";
};
auto print_access_state_stats = [&ss](const char* context_type, const AccessContextStats& stats) {
ss << std::setw(13) << std::string(context_type) + "(" + std::to_string(stats.access_contexts) + ")";
ss << std::setw(11) << stats.access_states;
const uint64_t access_state_objects_size = sizeof(AccessState) * stats.access_states;
const double size_mb = ((double)access_state_objects_size / 1024.0 / 1024.0);
ss << std::fixed << std::setprecision(2) << std::setw(11) << size_mb;
ss.unsetf(std::ios::floatfield);
ss << std::setw(2) << "| ";
ss << std::setw(10) << stats.read_states;
ss << std::setw(10) << stats.write_states;
ss << std::setw(9) << stats.first_accesses;
ss << std::setw(2) << "| ";
ss << std::setw(12) << stats.access_states_with_multiple_reads;
ss << std::setw(13) << stats.access_states_with_multiple_firsts;
ss << std::setw(13) << stats.access_states_with_dynamic_allocations;
ss << std::setw(15) << stats.access_states_dynamic_allocation_size;
ss << "\n";
};
ss << "-----------------------\n";
ss << "Common stats count max_count\n";
ss << "-----------------------\n";
print_common_stats("CommandBufferAccessContext", command_buffer_contexts);
print_common_stats("QueueBatchContext", queue_batch_contexts);
print_common_stats("Timeline signal", timeline_signals);
print_common_stats("Unresolved batch", unresolved_batches);
print_common_stats("HandleRecord", handle_records);
uint64_t handle_record_memory = handle_records.value.u32 * sizeof(HandleRecord);
uint64_t handle_record_max_memory = handle_records.max_value.u32 * sizeof(HandleRecord);
print_common_stats64("HandleRecord bytes", handle_record_memory, handle_record_max_memory);
const char* access_stats_header =
"context accesses size (MB) | reads writes firsts | many_reads many_firsts have_allocs allocated (B)\n";
ss << "\n";
ss << "-----------------------\n";
ss << "AccessState stats\n";
ss << "-----------------------\n";
ss << access_stats_header;
print_access_state_stats("CB", access_stats.cb_access_stats);
print_access_state_stats("Queue", access_stats.queue_access_stats);
print_access_state_stats("Subpass", access_stats.subpass_access_stats);
ss << "\n";
ss << "-----------------------\n";
ss << "MAX AccessState stats\n";
ss << "-----------------------\n";
ss << access_stats_header;
print_access_state_stats("CB", access_stats.max_cb_access_stats);
print_access_state_stats("Queue", access_stats.max_queue_access_stats);
print_access_state_stats("Subpass", access_stats.max_subpass_access_stats);
ss << "\n";
ss << "Memory barriers : " << barrier_stats.memory_barriers.u32 << "\n";
ss << "Buffer barriers : " << barrier_stats.buffer_barriers.u32 << "\n";
ss << "Image barriers : " << barrier_stats.image_barriers.u32 << "\n";
ss << "Execution dependencies : " << barrier_stats.execution_dependencies.u32 << "\n\n";
ss << "Single barrier commands : " << barrier_stats.single_barrier_commands.u32 << "\n";
ss << "Multi barrier commands : " << barrier_stats.multi_barrier_commands.u32 << "\n";
ss << "Multi barrier commands only memory : " << barrier_stats.multi_barrier_commands_only_memory.u32 << "\n";
ss << "Multi barrier commands only buffer : " << barrier_stats.multi_barrier_commands_only_buffer.u32 << "\n";
ss << "Multi barrier commands only image : " << barrier_stats.multi_barrier_commands_only_image.u32 << "\n";
ss << "\n";
ss << "Layout ordering barrier registry size: " << GetLayoutOrderingBarrierLookup().ObjectCount();
ss << "\n";
ss << "Max last reads array size";
ss << ": CB: " << access_stats.cb_access_stats.max_last_reads_count;
ss << ", Queue: " << access_stats.queue_access_stats.max_last_reads_count;
ss << ", Subpass: " << access_stats.subpass_access_stats.max_last_reads_count;
ss << "\n";
ss << "Max first accesses array size";
ss << ": CB: " << access_stats.cb_access_stats.max_first_accesses_size;
ss << ", Queue: " << access_stats.queue_access_stats.max_first_accesses_size;
ss << ", Subpass: " << access_stats.subpass_access_stats.max_first_accesses_size;
ss << "\n";
#if defined(USE_MIMALLOC_STATS)
// Print allocation counts (these are not reported by mi_stats_print_out)
ss << "\n";
ss << "malloc_normal_count: " << mi_stats.malloc_normal_count.total << "\n";
ss << "malloc_huge_count: " << mi_stats.malloc_huge_count.total << "\n";
ss << "\n";
// Print main mimalloc stats
mi_stats_print_out([](const char* msg, void* arg) { *static_cast<std::ostringstream*>(arg) << msg; }, &ss);
ss << "\n";
#endif
return ss.str();
}
} // namespace syncval
#endif // VVL_ENABLE_SYNCVAL_STATS != 0