layers/sync/sync_stats.cpp - external/github.com/KhronosGroup/Vulkan-ValidationLayers - Git at Google

 /* Copyright (c) 2024-2026 The Khronos Group Inc.
  * Copyright (c) 2024-2026 Valve Corporation
  * Copyright (c) 2024-2026 LunarG, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "sync_stats.h"

 #if VVL_ENABLE_SYNCVAL_STATS != 0
 #include "sync_commandbuffer.h"
 #include "sync_validation.h"
 #include "state_tracker/state_tracker.h"

 #include <iostream>

 namespace vvl {
 // Until C++ 26 std::atomic<T>::fetch_max arrives
 // https://en.cppreference.com/w/cpp/atomic/atomic/fetch_max
 // https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p0493r5.pdf
 template <typename T>
 inline T atomic_fetch_max(std::atomic<T> &current_max, const T &value) noexcept {
     T t = current_max.load();
     while (!current_max.compare_exchange_weak(t, std::max(t, value)))
         ;
     return t;
 }
 }  // namespace vvl

 namespace syncval {

 // NOTE: fetch_add/fetch_sub return value before increment/decrement.
 // Our Add/Sub functions return new counter values, so they need to
 // adjust result of the atomic function by adding/subtracting one.

 void Value32::Update(uint32_t new_value) { u32.store(new_value); }
 uint32_t Value32::Add(uint32_t n) { return u32.fetch_add(n) + 1; }
 uint32_t Value32::Sub(uint32_t n) { return u32.fetch_sub(n) - 1; }

 void Value64::Update(uint64_t new_value) { u64.store(new_value); }
 uint64_t Value64::Add(uint64_t n) { return u64.fetch_add(n) + 1; }
 uint64_t Value64::Sub(uint64_t n) { return u64.fetch_sub(n) - 1; }

 void ValueMax32::Update(uint32_t new_value) {
     value.Update(new_value);
     vvl::atomic_fetch_max(max_value.u32, new_value);
 }
 void ValueMax32::Add(uint32_t n) {
     uint32_t new_value = value.Add(n);
     vvl::atomic_fetch_max(max_value.u32, new_value);
 }
 void ValueMax32::Sub(uint32_t n) { value.Sub(n); }

 void ValueMax64::Update(uint64_t new_value) {
     value.Update(new_value);
     vvl::atomic_fetch_max(max_value.u64, new_value);
 }
 void ValueMax64::Add(uint64_t n) {
     uint64_t new_value = value.Add(n);
     vvl::atomic_fetch_max(max_value.u64, new_value);
 }
 void ValueMax64::Sub(uint64_t n) { value.Sub(n); }

 Stats::~Stats() {
     if (report_on_destruction) {
         const std::string report = CreateReport();
         std::cout << report;
     }
 }

 void Stats::AddCommandBufferContext() { command_buffer_contexts.Add(1); }
 void Stats::RemoveCommandBufferContext() { command_buffer_contexts.Sub(1); }

 void Stats::AddQueueBatchContext() { queue_batch_contexts.Add(1); }
 void Stats::RemoveQueueBatchContext() { queue_batch_contexts.Sub(1); }

 void Stats::AddTimelineSignals(uint32_t count) { timeline_signals.Add(count); }
 void Stats::RemoveTimelineSignals(uint32_t count) { timeline_signals.Sub(count); }

 void Stats::AddUnresolvedBatch() { unresolved_batches.Add(1); }
 void Stats::RemoveUnresolvedBatch() { unresolved_batches.Sub(1); }

 void Stats::AddHandleRecord(uint32_t count) { handle_records.Add(count); }
 void Stats::RemoveHandleRecord(uint32_t count) { handle_records.Sub(count); }

 void AccessContextStats::UpdateMax(const AccessContextStats& cur_stats) {
 #define UPDATE_MAX(field) field = std::max(field, cur_stats.field)
     UPDATE_MAX(access_contexts);
     UPDATE_MAX(access_states);
     UPDATE_MAX(read_states);
     UPDATE_MAX(write_states);
     UPDATE_MAX(first_accesses);
     UPDATE_MAX(access_states_with_multiple_reads);
     UPDATE_MAX(access_states_with_multiple_firsts);
     UPDATE_MAX(access_states_with_dynamic_allocations);
     UPDATE_MAX(access_states_dynamic_allocation_size);
 #undef UPDATE_MAX
 }

 void UpdateAccessMapStats(const AccessMap& access_map, AccessContextStats& stats) {
     stats.access_contexts += 1;
     stats.access_states += (uint32_t)access_map.Size();
     for (const auto& entry : access_map) {
         const AccessState& access_state = entry.second;
         access_state.UpdateStats(stats);
     }
 }

 void AccessStats::Update(SyncValidator& validator) {
     std::unique_lock<std::mutex> lock(access_stats_mutex);
     cb_access_stats = {};
     queue_access_stats = {};
     subpass_access_stats = {};

     validator.device_state->ForEachShared<vvl::CommandBuffer>([this](std::shared_ptr<vvl::CommandBuffer> cb) {
         const CommandBufferAccessContext* cb_access_context = GetAccessContext(*cb);
         cb_access_context->UpdateStats(*this);
     });
     for (const auto& batch : validator.GetAllQueueBatchContexts()) {
         const AccessContext& access_context = batch->GetAccessContext();
         UpdateAccessMapStats(access_context.GetAccessMap(), queue_access_stats);
     }

     max_cb_access_stats.UpdateMax(cb_access_stats);
     max_queue_access_stats.UpdateMax(queue_access_stats);
     max_subpass_access_stats.UpdateMax(subpass_access_stats);
 }

 void Stats::UpdateAccessStats(SyncValidator& validator) { access_stats.Update(validator); }

 void Stats::OnBarrierCommand(uint32_t memory_barrier_count, uint32_t buffer_barrier_count, uint32_t image_barrier_count,
                              uint32_t execution_dependencies_count) {
     if (memory_barrier_count) {
         barrier_stats.memory_barriers.Add(memory_barrier_count);
     }
     if (buffer_barrier_count) {
         barrier_stats.buffer_barriers.Add(buffer_barrier_count);
     }
     if (image_barrier_count) {
         barrier_stats.image_barriers.Add(image_barrier_count);
     }
     if (execution_dependencies_count) {
         barrier_stats.execution_dependencies.Add(execution_dependencies_count);
     }

     const uint32_t command_total_barrier_count = memory_barrier_count + buffer_barrier_count + image_barrier_count;
     if (command_total_barrier_count == 1) {
         barrier_stats.single_barrier_commands.Add(1);
     } else if (command_total_barrier_count > 1) {
         barrier_stats.multi_barrier_commands.Add(1);
     }

     if (memory_barrier_count > 1 && buffer_barrier_count == 0 && image_barrier_count == 0) {
         barrier_stats.multi_barrier_commands_only_memory.Add(1);
     } else if (buffer_barrier_count > 1 && memory_barrier_count == 0 && image_barrier_count == 0) {
         barrier_stats.multi_barrier_commands_only_buffer.Add(1);
     } else if (image_barrier_count > 1 && memory_barrier_count == 0 && buffer_barrier_count == 0) {
         barrier_stats.multi_barrier_commands_only_image.Add(1);
     }
 }

 void Stats::UpdateMemoryStats() {
 #if defined(USE_MIMALLOC_STATS)
     mi_stats_merge();
     {
         std::unique_lock<std::mutex> lock(mi_stats_mutex);
         mi_stats_get(sizeof(mi_stats), &mi_stats);
     }
 #endif
 }

 void Stats::ReportOnDestruction() { report_on_destruction = true; }

 std::string Stats::CreateReport() {
     std::ostringstream ss;
     ss << std::left;

     auto print_common_stats = [&ss](const char* field_name, const ValueMax32& stat) {
         ss << std::setw(32) << field_name;
         ss << std::setw(12) << stat.value.u32 << stat.max_value.u32;
         ss << "\n";
     };
     auto print_common_stats64 = [&ss](const char* field_name, uint64_t v1, uint64_t v2) {
         ss << std::setw(32) << field_name;
         ss << std::setw(12) << v1 << v2;
         ss << "\n";
     };
     auto print_access_state_stats = [&ss](const char* context_type, const AccessContextStats& stats) {
         ss << std::setw(13) << std::string(context_type) + "(" + std::to_string(stats.access_contexts) + ")";
         ss << std::setw(11) << stats.access_states;

         const uint64_t access_state_objects_size = sizeof(AccessState) * stats.access_states;
         const double size_mb = ((double)access_state_objects_size / 1024.0 / 1024.0);
         ss << std::fixed << std::setprecision(2) << std::setw(11) << size_mb;
         ss.unsetf(std::ios::floatfield);

         ss << std::setw(2) << "| ";
         ss << std::setw(10) << stats.read_states;
         ss << std::setw(10) << stats.write_states;
         ss << std::setw(9) << stats.first_accesses;

         ss << std::setw(2) << "| ";
         ss << std::setw(12) << stats.access_states_with_multiple_reads;
         ss << std::setw(13) << stats.access_states_with_multiple_firsts;
         ss << std::setw(13) << stats.access_states_with_dynamic_allocations;
         ss << std::setw(15) << stats.access_states_dynamic_allocation_size;
         ss << "\n";
     };

     ss << "-----------------------\n";
     ss << "Common stats                    count       max_count\n";
     ss << "-----------------------\n";
     print_common_stats("CommandBufferAccessContext", command_buffer_contexts);
     print_common_stats("QueueBatchContext", queue_batch_contexts);
     print_common_stats("Timeline signal", timeline_signals);
     print_common_stats("Unresolved batch", unresolved_batches);
     print_common_stats("HandleRecord", handle_records);

     uint64_t handle_record_memory = handle_records.value.u32 * sizeof(HandleRecord);
     uint64_t handle_record_max_memory = handle_records.max_value.u32 * sizeof(HandleRecord);
     print_common_stats64("HandleRecord bytes", handle_record_memory, handle_record_max_memory);

     const char* access_stats_header =
         "context      accesses   size (MB)  | reads     writes    firsts   | many_reads  many_firsts  have_allocs  allocated (B)\n";

     ss << "\n";
     ss << "-----------------------\n";
     ss << "AccessState stats\n";
     ss << "-----------------------\n";
     ss << access_stats_header;
     print_access_state_stats("CB", access_stats.cb_access_stats);
     print_access_state_stats("Queue", access_stats.queue_access_stats);
     print_access_state_stats("Subpass", access_stats.subpass_access_stats);

     ss << "\n";
     ss << "-----------------------\n";
     ss << "MAX AccessState stats\n";
     ss << "-----------------------\n";
     ss << access_stats_header;
     print_access_state_stats("CB", access_stats.max_cb_access_stats);
     print_access_state_stats("Queue", access_stats.max_queue_access_stats);
     print_access_state_stats("Subpass", access_stats.max_subpass_access_stats);

     ss << "\n";
     ss << "Memory barriers          : " << barrier_stats.memory_barriers.u32 << "\n";
     ss << "Buffer barriers          : " << barrier_stats.buffer_barriers.u32 << "\n";
     ss << "Image barriers           : " << barrier_stats.image_barriers.u32 << "\n";
     ss << "Execution dependencies   : " << barrier_stats.execution_dependencies.u32 << "\n\n";
     ss << "Single barrier commands              : " << barrier_stats.single_barrier_commands.u32 << "\n";
     ss << "Multi barrier commands               : " << barrier_stats.multi_barrier_commands.u32 << "\n";
     ss << "Multi barrier commands only memory   : " << barrier_stats.multi_barrier_commands_only_memory.u32 << "\n";
     ss << "Multi barrier commands only buffer   : " << barrier_stats.multi_barrier_commands_only_buffer.u32 << "\n";
     ss << "Multi barrier commands only image    : " << barrier_stats.multi_barrier_commands_only_image.u32 << "\n";

     ss << "\n";
     ss << "Layout ordering barrier registry size: " << GetLayoutOrderingBarrierLookup().ObjectCount();
     ss << "\n";
     ss << "Max last reads array size";
     ss << ": CB: " << access_stats.cb_access_stats.max_last_reads_count;
     ss << ", Queue: " << access_stats.queue_access_stats.max_last_reads_count;
     ss << ", Subpass: " << access_stats.subpass_access_stats.max_last_reads_count;
     ss << "\n";
     ss << "Max first accesses array size";
     ss << ": CB: " << access_stats.cb_access_stats.max_first_accesses_size;
     ss << ", Queue: " << access_stats.queue_access_stats.max_first_accesses_size;
     ss << ", Subpass: " << access_stats.subpass_access_stats.max_first_accesses_size;
     ss << "\n";

 #if defined(USE_MIMALLOC_STATS)
     // Print allocation counts (these are not reported by mi_stats_print_out)
     ss << "\n";
     ss << "malloc_normal_count: " << mi_stats.malloc_normal_count.total << "\n";
     ss << "malloc_huge_count: " << mi_stats.malloc_huge_count.total << "\n";
     ss << "\n";
     // Print main mimalloc stats
     mi_stats_print_out([](const char* msg, void* arg) { *static_cast<std::ostringstream*>(arg) << msg; }, &ss);
     ss << "\n";
 #endif
     return ss.str();
 }

 }  // namespace syncval
 #endif  // VVL_ENABLE_SYNCVAL_STATS != 0
	/* Copyright (c) 2024-2026 The Khronos Group Inc.
	* Copyright (c) 2024-2026 Valve Corporation
	* Copyright (c) 2024-2026 LunarG, Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include "sync_stats.h"

	#if VVL_ENABLE_SYNCVAL_STATS != 0
	#include "sync_commandbuffer.h"
	#include "sync_validation.h"
	#include "state_tracker/state_tracker.h"

	#include <iostream>

	namespace vvl {
	// Until C++ 26 std::atomic<T>::fetch_max arrives
	// https://en.cppreference.com/w/cpp/atomic/atomic/fetch_max
	// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p0493r5.pdf
	template <typename T>
	inline T atomic_fetch_max(std::atomic<T> &current_max, const T &value) noexcept {
	T t = current_max.load();
	while (!current_max.compare_exchange_weak(t, std::max(t, value)))
	;
	return t;
	}
	} // namespace vvl

	namespace syncval {

	// NOTE: fetch_add/fetch_sub return value before increment/decrement.
	// Our Add/Sub functions return new counter values, so they need to
	// adjust result of the atomic function by adding/subtracting one.

	void Value32::Update(uint32_t new_value) { u32.store(new_value); }
	uint32_t Value32::Add(uint32_t n) { return u32.fetch_add(n) + 1; }
	uint32_t Value32::Sub(uint32_t n) { return u32.fetch_sub(n) - 1; }

	void Value64::Update(uint64_t new_value) { u64.store(new_value); }
	uint64_t Value64::Add(uint64_t n) { return u64.fetch_add(n) + 1; }
	uint64_t Value64::Sub(uint64_t n) { return u64.fetch_sub(n) - 1; }

	void ValueMax32::Update(uint32_t new_value) {
	value.Update(new_value);
	vvl::atomic_fetch_max(max_value.u32, new_value);
	}
	void ValueMax32::Add(uint32_t n) {
	uint32_t new_value = value.Add(n);
	vvl::atomic_fetch_max(max_value.u32, new_value);
	}
	void ValueMax32::Sub(uint32_t n) { value.Sub(n); }

	void ValueMax64::Update(uint64_t new_value) {
	value.Update(new_value);
	vvl::atomic_fetch_max(max_value.u64, new_value);
	}
	void ValueMax64::Add(uint64_t n) {
	uint64_t new_value = value.Add(n);
	vvl::atomic_fetch_max(max_value.u64, new_value);
	}
	void ValueMax64::Sub(uint64_t n) { value.Sub(n); }

	Stats::~Stats() {
	if (report_on_destruction) {
	const std::string report = CreateReport();
	std::cout << report;
	}
	}

	void Stats::AddCommandBufferContext() { command_buffer_contexts.Add(1); }
	void Stats::RemoveCommandBufferContext() { command_buffer_contexts.Sub(1); }

	void Stats::AddQueueBatchContext() { queue_batch_contexts.Add(1); }
	void Stats::RemoveQueueBatchContext() { queue_batch_contexts.Sub(1); }

	void Stats::AddTimelineSignals(uint32_t count) { timeline_signals.Add(count); }
	void Stats::RemoveTimelineSignals(uint32_t count) { timeline_signals.Sub(count); }

	void Stats::AddUnresolvedBatch() { unresolved_batches.Add(1); }
	void Stats::RemoveUnresolvedBatch() { unresolved_batches.Sub(1); }

	void Stats::AddHandleRecord(uint32_t count) { handle_records.Add(count); }
	void Stats::RemoveHandleRecord(uint32_t count) { handle_records.Sub(count); }

	void AccessContextStats::UpdateMax(const AccessContextStats& cur_stats) {
	#define UPDATE_MAX(field) field = std::max(field, cur_stats.field)
	UPDATE_MAX(access_contexts);
	UPDATE_MAX(access_states);
	UPDATE_MAX(read_states);
	UPDATE_MAX(write_states);
	UPDATE_MAX(first_accesses);
	UPDATE_MAX(access_states_with_multiple_reads);
	UPDATE_MAX(access_states_with_multiple_firsts);
	UPDATE_MAX(access_states_with_dynamic_allocations);
	UPDATE_MAX(access_states_dynamic_allocation_size);
	#undef UPDATE_MAX
	}

	void UpdateAccessMapStats(const AccessMap& access_map, AccessContextStats& stats) {
	stats.access_contexts += 1;
	stats.access_states += (uint32_t)access_map.Size();
	for (const auto& entry : access_map) {
	const AccessState& access_state = entry.second;
	access_state.UpdateStats(stats);
	}
	}

	void AccessStats::Update(SyncValidator& validator) {
	std::unique_lock<std::mutex> lock(access_stats_mutex);
	cb_access_stats = {};
	queue_access_stats = {};
	subpass_access_stats = {};

	validator.device_state->ForEachShared<vvl::CommandBuffer>([this](std::shared_ptr<vvl::CommandBuffer> cb) {
	const CommandBufferAccessContext* cb_access_context = GetAccessContext(*cb);
	cb_access_context->UpdateStats(*this);
	});
	for (const auto& batch : validator.GetAllQueueBatchContexts()) {
	const AccessContext& access_context = batch->GetAccessContext();
	UpdateAccessMapStats(access_context.GetAccessMap(), queue_access_stats);
	}

	max_cb_access_stats.UpdateMax(cb_access_stats);
	max_queue_access_stats.UpdateMax(queue_access_stats);
	max_subpass_access_stats.UpdateMax(subpass_access_stats);
	}

	void Stats::UpdateAccessStats(SyncValidator& validator) { access_stats.Update(validator); }

	void Stats::OnBarrierCommand(uint32_t memory_barrier_count, uint32_t buffer_barrier_count, uint32_t image_barrier_count,
	uint32_t execution_dependencies_count) {
	if (memory_barrier_count) {
	barrier_stats.memory_barriers.Add(memory_barrier_count);
	}
	if (buffer_barrier_count) {
	barrier_stats.buffer_barriers.Add(buffer_barrier_count);
	}
	if (image_barrier_count) {
	barrier_stats.image_barriers.Add(image_barrier_count);
	}
	if (execution_dependencies_count) {
	barrier_stats.execution_dependencies.Add(execution_dependencies_count);
	}

	const uint32_t command_total_barrier_count = memory_barrier_count + buffer_barrier_count + image_barrier_count;
	if (command_total_barrier_count == 1) {
	barrier_stats.single_barrier_commands.Add(1);
	} else if (command_total_barrier_count > 1) {
	barrier_stats.multi_barrier_commands.Add(1);
	}

	if (memory_barrier_count > 1 && buffer_barrier_count == 0 && image_barrier_count == 0) {
	barrier_stats.multi_barrier_commands_only_memory.Add(1);
	} else if (buffer_barrier_count > 1 && memory_barrier_count == 0 && image_barrier_count == 0) {
	barrier_stats.multi_barrier_commands_only_buffer.Add(1);
	} else if (image_barrier_count > 1 && memory_barrier_count == 0 && buffer_barrier_count == 0) {
	barrier_stats.multi_barrier_commands_only_image.Add(1);
	}
	}

	void Stats::UpdateMemoryStats() {
	#if defined(USE_MIMALLOC_STATS)
	mi_stats_merge();
	{
	std::unique_lock<std::mutex> lock(mi_stats_mutex);
	mi_stats_get(sizeof(mi_stats), &mi_stats);
	}
	#endif
	}

	void Stats::ReportOnDestruction() { report_on_destruction = true; }

	std::string Stats::CreateReport() {
	std::ostringstream ss;
	ss << std::left;

	auto print_common_stats = [&ss](const char* field_name, const ValueMax32& stat) {
	ss << std::setw(32) << field_name;
	ss << std::setw(12) << stat.value.u32 << stat.max_value.u32;
	ss << "\n";
	};
	auto print_common_stats64 = [&ss](const char* field_name, uint64_t v1, uint64_t v2) {
	ss << std::setw(32) << field_name;
	ss << std::setw(12) << v1 << v2;
	ss << "\n";
	};
	auto print_access_state_stats = [&ss](const char* context_type, const AccessContextStats& stats) {
	ss << std::setw(13) << std::string(context_type) + "(" + std::to_string(stats.access_contexts) + ")";
	ss << std::setw(11) << stats.access_states;

	const uint64_t access_state_objects_size = sizeof(AccessState) * stats.access_states;
	const double size_mb = ((double)access_state_objects_size / 1024.0 / 1024.0);
	ss << std::fixed << std::setprecision(2) << std::setw(11) << size_mb;
	ss.unsetf(std::ios::floatfield);

	ss << std::setw(2) << "\| ";
	ss << std::setw(10) << stats.read_states;
	ss << std::setw(10) << stats.write_states;
	ss << std::setw(9) << stats.first_accesses;

	ss << std::setw(2) << "\| ";
	ss << std::setw(12) << stats.access_states_with_multiple_reads;
	ss << std::setw(13) << stats.access_states_with_multiple_firsts;
	ss << std::setw(13) << stats.access_states_with_dynamic_allocations;
	ss << std::setw(15) << stats.access_states_dynamic_allocation_size;
	ss << "\n";
	};

	ss << "-----------------------\n";
	ss << "Common stats count max_count\n";
	ss << "-----------------------\n";
	print_common_stats("CommandBufferAccessContext", command_buffer_contexts);
	print_common_stats("QueueBatchContext", queue_batch_contexts);
	print_common_stats("Timeline signal", timeline_signals);
	print_common_stats("Unresolved batch", unresolved_batches);
	print_common_stats("HandleRecord", handle_records);

	uint64_t handle_record_memory = handle_records.value.u32 * sizeof(HandleRecord);
	uint64_t handle_record_max_memory = handle_records.max_value.u32 * sizeof(HandleRecord);
	print_common_stats64("HandleRecord bytes", handle_record_memory, handle_record_max_memory);

	const char* access_stats_header =
	"context accesses size (MB) \| reads writes firsts \| many_reads many_firsts have_allocs allocated (B)\n";

	ss << "\n";
	ss << "-----------------------\n";
	ss << "AccessState stats\n";
	ss << "-----------------------\n";
	ss << access_stats_header;
	print_access_state_stats("CB", access_stats.cb_access_stats);
	print_access_state_stats("Queue", access_stats.queue_access_stats);
	print_access_state_stats("Subpass", access_stats.subpass_access_stats);

	ss << "\n";
	ss << "-----------------------\n";
	ss << "MAX AccessState stats\n";
	ss << "-----------------------\n";
	ss << access_stats_header;
	print_access_state_stats("CB", access_stats.max_cb_access_stats);
	print_access_state_stats("Queue", access_stats.max_queue_access_stats);
	print_access_state_stats("Subpass", access_stats.max_subpass_access_stats);

	ss << "\n";
	ss << "Memory barriers : " << barrier_stats.memory_barriers.u32 << "\n";
	ss << "Buffer barriers : " << barrier_stats.buffer_barriers.u32 << "\n";
	ss << "Image barriers : " << barrier_stats.image_barriers.u32 << "\n";
	ss << "Execution dependencies : " << barrier_stats.execution_dependencies.u32 << "\n\n";
	ss << "Single barrier commands : " << barrier_stats.single_barrier_commands.u32 << "\n";
	ss << "Multi barrier commands : " << barrier_stats.multi_barrier_commands.u32 << "\n";
	ss << "Multi barrier commands only memory : " << barrier_stats.multi_barrier_commands_only_memory.u32 << "\n";
	ss << "Multi barrier commands only buffer : " << barrier_stats.multi_barrier_commands_only_buffer.u32 << "\n";
	ss << "Multi barrier commands only image : " << barrier_stats.multi_barrier_commands_only_image.u32 << "\n";

	ss << "\n";
	ss << "Layout ordering barrier registry size: " << GetLayoutOrderingBarrierLookup().ObjectCount();
	ss << "\n";
	ss << "Max last reads array size";
	ss << ": CB: " << access_stats.cb_access_stats.max_last_reads_count;
	ss << ", Queue: " << access_stats.queue_access_stats.max_last_reads_count;
	ss << ", Subpass: " << access_stats.subpass_access_stats.max_last_reads_count;
	ss << "\n";
	ss << "Max first accesses array size";
	ss << ": CB: " << access_stats.cb_access_stats.max_first_accesses_size;
	ss << ", Queue: " << access_stats.queue_access_stats.max_first_accesses_size;
	ss << ", Subpass: " << access_stats.subpass_access_stats.max_first_accesses_size;
	ss << "\n";

	#if defined(USE_MIMALLOC_STATS)
	// Print allocation counts (these are not reported by mi_stats_print_out)
	ss << "\n";
	ss << "malloc_normal_count: " << mi_stats.malloc_normal_count.total << "\n";
	ss << "malloc_huge_count: " << mi_stats.malloc_huge_count.total << "\n";
	ss << "\n";
	// Print main mimalloc stats
	mi_stats_print_out([](const char* msg, void* arg) { static_cast<std::ostringstream>(arg) << msg; }, &ss);
	ss << "\n";
	#endif
	return ss.str();
	}

	} // namespace syncval
	#endif // VVL_ENABLE_SYNCVAL_STATS != 0