| /* Copyright (c) 2024-2026 The Khronos Group Inc. |
| * Copyright (c) 2024-2026 Valve Corporation |
| * Copyright (c) 2024-2026 LunarG, Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "sync_stats.h" |
| |
| #if VVL_ENABLE_SYNCVAL_STATS != 0 |
| #include "sync_commandbuffer.h" |
| #include "sync_validation.h" |
| #include "state_tracker/state_tracker.h" |
| |
| #include <iostream> |
| |
| namespace vvl { |
| // Until C++ 26 std::atomic<T>::fetch_max arrives |
| // https://en.cppreference.com/w/cpp/atomic/atomic/fetch_max |
| // https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p0493r5.pdf |
| template <typename T> |
| inline T atomic_fetch_max(std::atomic<T> ¤t_max, const T &value) noexcept { |
| T t = current_max.load(); |
| while (!current_max.compare_exchange_weak(t, std::max(t, value))) |
| ; |
| return t; |
| } |
| } // namespace vvl |
| |
| namespace syncval { |
| |
| // NOTE: fetch_add/fetch_sub return value before increment/decrement. |
| // Our Add/Sub functions return new counter values, so they need to |
| // adjust result of the atomic function by adding/subtracting one. |
| |
| void Value32::Update(uint32_t new_value) { u32.store(new_value); } |
| uint32_t Value32::Add(uint32_t n) { return u32.fetch_add(n) + 1; } |
| uint32_t Value32::Sub(uint32_t n) { return u32.fetch_sub(n) - 1; } |
| |
| void Value64::Update(uint64_t new_value) { u64.store(new_value); } |
| uint64_t Value64::Add(uint64_t n) { return u64.fetch_add(n) + 1; } |
| uint64_t Value64::Sub(uint64_t n) { return u64.fetch_sub(n) - 1; } |
| |
| void ValueMax32::Update(uint32_t new_value) { |
| value.Update(new_value); |
| vvl::atomic_fetch_max(max_value.u32, new_value); |
| } |
| void ValueMax32::Add(uint32_t n) { |
| uint32_t new_value = value.Add(n); |
| vvl::atomic_fetch_max(max_value.u32, new_value); |
| } |
| void ValueMax32::Sub(uint32_t n) { value.Sub(n); } |
| |
| void ValueMax64::Update(uint64_t new_value) { |
| value.Update(new_value); |
| vvl::atomic_fetch_max(max_value.u64, new_value); |
| } |
| void ValueMax64::Add(uint64_t n) { |
| uint64_t new_value = value.Add(n); |
| vvl::atomic_fetch_max(max_value.u64, new_value); |
| } |
| void ValueMax64::Sub(uint64_t n) { value.Sub(n); } |
| |
| Stats::~Stats() { |
| if (report_on_destruction) { |
| const std::string report = CreateReport(); |
| std::cout << report; |
| } |
| } |
| |
| void Stats::AddCommandBufferContext() { command_buffer_contexts.Add(1); } |
| void Stats::RemoveCommandBufferContext() { command_buffer_contexts.Sub(1); } |
| |
| void Stats::AddQueueBatchContext() { queue_batch_contexts.Add(1); } |
| void Stats::RemoveQueueBatchContext() { queue_batch_contexts.Sub(1); } |
| |
| void Stats::AddTimelineSignals(uint32_t count) { timeline_signals.Add(count); } |
| void Stats::RemoveTimelineSignals(uint32_t count) { timeline_signals.Sub(count); } |
| |
| void Stats::AddUnresolvedBatch() { unresolved_batches.Add(1); } |
| void Stats::RemoveUnresolvedBatch() { unresolved_batches.Sub(1); } |
| |
| void Stats::AddHandleRecord(uint32_t count) { handle_records.Add(count); } |
| void Stats::RemoveHandleRecord(uint32_t count) { handle_records.Sub(count); } |
| |
| void AccessContextStats::UpdateMax(const AccessContextStats& cur_stats) { |
| #define UPDATE_MAX(field) field = std::max(field, cur_stats.field) |
| UPDATE_MAX(access_contexts); |
| UPDATE_MAX(access_states); |
| UPDATE_MAX(read_states); |
| UPDATE_MAX(write_states); |
| UPDATE_MAX(first_accesses); |
| UPDATE_MAX(access_states_with_multiple_reads); |
| UPDATE_MAX(access_states_with_multiple_firsts); |
| UPDATE_MAX(access_states_with_dynamic_allocations); |
| UPDATE_MAX(access_states_dynamic_allocation_size); |
| #undef UPDATE_MAX |
| } |
| |
| void UpdateAccessMapStats(const AccessMap& access_map, AccessContextStats& stats) { |
| stats.access_contexts += 1; |
| stats.access_states += (uint32_t)access_map.Size(); |
| for (const auto& entry : access_map) { |
| const AccessState& access_state = entry.second; |
| access_state.UpdateStats(stats); |
| } |
| } |
| |
| void AccessStats::Update(SyncValidator& validator) { |
| std::unique_lock<std::mutex> lock(access_stats_mutex); |
| cb_access_stats = {}; |
| queue_access_stats = {}; |
| subpass_access_stats = {}; |
| |
| validator.device_state->ForEachShared<vvl::CommandBuffer>([this](std::shared_ptr<vvl::CommandBuffer> cb) { |
| const CommandBufferAccessContext* cb_access_context = GetAccessContext(*cb); |
| cb_access_context->UpdateStats(*this); |
| }); |
| for (const auto& batch : validator.GetAllQueueBatchContexts()) { |
| const AccessContext& access_context = batch->GetAccessContext(); |
| UpdateAccessMapStats(access_context.GetAccessMap(), queue_access_stats); |
| } |
| |
| max_cb_access_stats.UpdateMax(cb_access_stats); |
| max_queue_access_stats.UpdateMax(queue_access_stats); |
| max_subpass_access_stats.UpdateMax(subpass_access_stats); |
| } |
| |
| void Stats::UpdateAccessStats(SyncValidator& validator) { access_stats.Update(validator); } |
| |
| void Stats::OnBarrierCommand(uint32_t memory_barrier_count, uint32_t buffer_barrier_count, uint32_t image_barrier_count, |
| uint32_t execution_dependencies_count) { |
| if (memory_barrier_count) { |
| barrier_stats.memory_barriers.Add(memory_barrier_count); |
| } |
| if (buffer_barrier_count) { |
| barrier_stats.buffer_barriers.Add(buffer_barrier_count); |
| } |
| if (image_barrier_count) { |
| barrier_stats.image_barriers.Add(image_barrier_count); |
| } |
| if (execution_dependencies_count) { |
| barrier_stats.execution_dependencies.Add(execution_dependencies_count); |
| } |
| |
| const uint32_t command_total_barrier_count = memory_barrier_count + buffer_barrier_count + image_barrier_count; |
| if (command_total_barrier_count == 1) { |
| barrier_stats.single_barrier_commands.Add(1); |
| } else if (command_total_barrier_count > 1) { |
| barrier_stats.multi_barrier_commands.Add(1); |
| } |
| |
| if (memory_barrier_count > 1 && buffer_barrier_count == 0 && image_barrier_count == 0) { |
| barrier_stats.multi_barrier_commands_only_memory.Add(1); |
| } else if (buffer_barrier_count > 1 && memory_barrier_count == 0 && image_barrier_count == 0) { |
| barrier_stats.multi_barrier_commands_only_buffer.Add(1); |
| } else if (image_barrier_count > 1 && memory_barrier_count == 0 && buffer_barrier_count == 0) { |
| barrier_stats.multi_barrier_commands_only_image.Add(1); |
| } |
| } |
| |
| void Stats::UpdateMemoryStats() { |
| #if defined(USE_MIMALLOC_STATS) |
| mi_stats_merge(); |
| { |
| std::unique_lock<std::mutex> lock(mi_stats_mutex); |
| mi_stats_get(sizeof(mi_stats), &mi_stats); |
| } |
| #endif |
| } |
| |
| void Stats::ReportOnDestruction() { report_on_destruction = true; } |
| |
| std::string Stats::CreateReport() { |
| std::ostringstream ss; |
| ss << std::left; |
| |
| auto print_common_stats = [&ss](const char* field_name, const ValueMax32& stat) { |
| ss << std::setw(32) << field_name; |
| ss << std::setw(12) << stat.value.u32 << stat.max_value.u32; |
| ss << "\n"; |
| }; |
| auto print_common_stats64 = [&ss](const char* field_name, uint64_t v1, uint64_t v2) { |
| ss << std::setw(32) << field_name; |
| ss << std::setw(12) << v1 << v2; |
| ss << "\n"; |
| }; |
| auto print_access_state_stats = [&ss](const char* context_type, const AccessContextStats& stats) { |
| ss << std::setw(13) << std::string(context_type) + "(" + std::to_string(stats.access_contexts) + ")"; |
| ss << std::setw(11) << stats.access_states; |
| |
| const uint64_t access_state_objects_size = sizeof(AccessState) * stats.access_states; |
| const double size_mb = ((double)access_state_objects_size / 1024.0 / 1024.0); |
| ss << std::fixed << std::setprecision(2) << std::setw(11) << size_mb; |
| ss.unsetf(std::ios::floatfield); |
| |
| ss << std::setw(2) << "| "; |
| ss << std::setw(10) << stats.read_states; |
| ss << std::setw(10) << stats.write_states; |
| ss << std::setw(9) << stats.first_accesses; |
| |
| ss << std::setw(2) << "| "; |
| ss << std::setw(12) << stats.access_states_with_multiple_reads; |
| ss << std::setw(13) << stats.access_states_with_multiple_firsts; |
| ss << std::setw(13) << stats.access_states_with_dynamic_allocations; |
| ss << std::setw(15) << stats.access_states_dynamic_allocation_size; |
| ss << "\n"; |
| }; |
| |
| ss << "-----------------------\n"; |
| ss << "Common stats count max_count\n"; |
| ss << "-----------------------\n"; |
| print_common_stats("CommandBufferAccessContext", command_buffer_contexts); |
| print_common_stats("QueueBatchContext", queue_batch_contexts); |
| print_common_stats("Timeline signal", timeline_signals); |
| print_common_stats("Unresolved batch", unresolved_batches); |
| print_common_stats("HandleRecord", handle_records); |
| |
| uint64_t handle_record_memory = handle_records.value.u32 * sizeof(HandleRecord); |
| uint64_t handle_record_max_memory = handle_records.max_value.u32 * sizeof(HandleRecord); |
| print_common_stats64("HandleRecord bytes", handle_record_memory, handle_record_max_memory); |
| |
| const char* access_stats_header = |
| "context accesses size (MB) | reads writes firsts | many_reads many_firsts have_allocs allocated (B)\n"; |
| |
| ss << "\n"; |
| ss << "-----------------------\n"; |
| ss << "AccessState stats\n"; |
| ss << "-----------------------\n"; |
| ss << access_stats_header; |
| print_access_state_stats("CB", access_stats.cb_access_stats); |
| print_access_state_stats("Queue", access_stats.queue_access_stats); |
| print_access_state_stats("Subpass", access_stats.subpass_access_stats); |
| |
| ss << "\n"; |
| ss << "-----------------------\n"; |
| ss << "MAX AccessState stats\n"; |
| ss << "-----------------------\n"; |
| ss << access_stats_header; |
| print_access_state_stats("CB", access_stats.max_cb_access_stats); |
| print_access_state_stats("Queue", access_stats.max_queue_access_stats); |
| print_access_state_stats("Subpass", access_stats.max_subpass_access_stats); |
| |
| ss << "\n"; |
| ss << "Memory barriers : " << barrier_stats.memory_barriers.u32 << "\n"; |
| ss << "Buffer barriers : " << barrier_stats.buffer_barriers.u32 << "\n"; |
| ss << "Image barriers : " << barrier_stats.image_barriers.u32 << "\n"; |
| ss << "Execution dependencies : " << barrier_stats.execution_dependencies.u32 << "\n\n"; |
| ss << "Single barrier commands : " << barrier_stats.single_barrier_commands.u32 << "\n"; |
| ss << "Multi barrier commands : " << barrier_stats.multi_barrier_commands.u32 << "\n"; |
| ss << "Multi barrier commands only memory : " << barrier_stats.multi_barrier_commands_only_memory.u32 << "\n"; |
| ss << "Multi barrier commands only buffer : " << barrier_stats.multi_barrier_commands_only_buffer.u32 << "\n"; |
| ss << "Multi barrier commands only image : " << barrier_stats.multi_barrier_commands_only_image.u32 << "\n"; |
| |
| ss << "\n"; |
| ss << "Layout ordering barrier registry size: " << GetLayoutOrderingBarrierLookup().ObjectCount(); |
| ss << "\n"; |
| ss << "Max last reads array size"; |
| ss << ": CB: " << access_stats.cb_access_stats.max_last_reads_count; |
| ss << ", Queue: " << access_stats.queue_access_stats.max_last_reads_count; |
| ss << ", Subpass: " << access_stats.subpass_access_stats.max_last_reads_count; |
| ss << "\n"; |
| ss << "Max first accesses array size"; |
| ss << ": CB: " << access_stats.cb_access_stats.max_first_accesses_size; |
| ss << ", Queue: " << access_stats.queue_access_stats.max_first_accesses_size; |
| ss << ", Subpass: " << access_stats.subpass_access_stats.max_first_accesses_size; |
| ss << "\n"; |
| |
| #if defined(USE_MIMALLOC_STATS) |
| // Print allocation counts (these are not reported by mi_stats_print_out) |
| ss << "\n"; |
| ss << "malloc_normal_count: " << mi_stats.malloc_normal_count.total << "\n"; |
| ss << "malloc_huge_count: " << mi_stats.malloc_huge_count.total << "\n"; |
| ss << "\n"; |
| // Print main mimalloc stats |
| mi_stats_print_out([](const char* msg, void* arg) { *static_cast<std::ostringstream*>(arg) << msg; }, &ss); |
| ss << "\n"; |
| #endif |
| return ss.str(); |
| } |
| |
| } // namespace syncval |
| #endif // VVL_ENABLE_SYNCVAL_STATS != 0 |