| // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| #include "vm/profiler.h" |
| |
| #include <utility> |
| |
| #include "platform/address_sanitizer.h" |
| #include "platform/atomic.h" |
| #include "platform/memory_sanitizer.h" |
| #include "platform/thread_sanitizer.h" |
| #include "platform/utils.h" |
| #if defined(SUPPORT_PERFETTO) |
| #include "third_party/perfetto/protos/perfetto/trace/profiling/profile_packet.pbzero.h" |
| #endif |
| #include "vm/allocation.h" |
| #include "vm/code_patcher.h" |
| #if !defined(DART_PRECOMPILED_RUNTIME) |
| #include "vm/compiler/compiler_state.h" |
| #endif |
| #include "vm/debugger.h" |
| #include "vm/globals.h" |
| #include "vm/heap/safepoint.h" |
| #if defined(DART_PRECOMPILED_RUNTIME) |
| #include "vm/image_snapshot.h" |
| #endif |
| #include "vm/instructions.h" |
| #include "vm/isolate.h" |
| #include "vm/json_stream.h" |
| #include "vm/lockers.h" |
| #include "vm/message_handler.h" |
| #include "vm/native_symbol.h" |
| #include "vm/object.h" |
| #include "vm/object_store.h" |
| #include "vm/os.h" |
| #if defined(SUPPORT_PERFETTO) |
| #include "vm/perfetto_utils.h" |
| #endif |
| #include "vm/profiler_service.h" |
| #include "vm/reusable_handles.h" |
| #include "vm/signal_handler.h" |
| #include "vm/simulator.h" |
| #include "vm/stack_frame.h" |
| #include "vm/timeline.h" |
| #include "vm/version.h" |
| |
| namespace dart { |
| |
| static constexpr intptr_t kMaxSamplesPerTick = 4; |
| |
| DEFINE_FLAG(bool, trace_profiled_isolates, false, "Trace profiled isolates."); |
| |
| DEFINE_FLAG(int, |
| profile_period, |
| 1000, |
| "Time between profiler samples in microseconds. Minimum 50."); |
| DEFINE_FLAG(int, |
| max_profile_depth, |
| Sample::kPCArraySizeInWords* kMaxSamplesPerTick, |
| "Maximum number stack frames walked. Minimum 2. Maximum 255."); |
| DEFINE_FLAG(bool, profile_vm, false, "Always collect native stack traces."); |
| DEFINE_FLAG(bool, |
| profile_vm_allocation, |
| false, |
| "Collect native stack traces when tracing Dart allocations."); |
| |
| DEFINE_FLAG( |
| int, |
| sample_buffer_duration, |
| 0, |
| "Defines the size of the profiler sample buffer to contain at least " |
| "N seconds of samples at a given sample rate. If not provided, the " |
| "default is ~4 seconds. Large values will greatly increase memory " |
| "consumption."); |
| DEFINE_FLAG( |
| bool, |
| profile_startup, |
| false, |
| "Make the profiler discard new samples once the profiler sample buffer is " |
| "full. When this flag is not set, the profiler sample buffer is used as a " |
| "ring buffer, meaning that once it is full, new samples start overwriting " |
| "the oldest ones. This flag itself does not enable the profiler; the " |
| "profiler must be enabled separately, e.g. with --profiler."); |
| |
| #if defined(DART_INCLUDE_STACK_DUMPER) |
| ProfilerCounters Profiler::counters_ = {}; |
| |
| static void DumpStackFrame(uword pc, uword fp, const char* name, uword offset) { |
| OS::PrintErr(" pc 0x%" Pp " fp 0x%" Pp " %s+0x%" Px "\n", pc, fp, name, |
| offset); |
| } |
| |
| void DumpStackFrame(intptr_t frame_index, uword pc, uword fp) { |
| uword start = 0; |
| // The pc for all frames except the top frame is a return address, which can |
| // belong to a different inlining interval than the call. Subtract one to get |
| // the symbolization for the call. |
| uword lookup_pc = frame_index == 0 ? pc : pc - 1; |
| if (auto const name = |
| NativeSymbolResolver::LookupSymbolName(lookup_pc, &start)) { |
| DumpStackFrame(pc, fp, name, pc - start); |
| NativeSymbolResolver::FreeSymbolName(name); |
| return; |
| } |
| |
| const char* dso_name; |
| uword dso_base; |
| if (NativeSymbolResolver::LookupSharedObject(pc, &dso_base, &dso_name)) { |
| DumpStackFrame(pc, fp, dso_name, pc - dso_base); |
| NativeSymbolResolver::FreeSymbolName(dso_name); |
| return; |
| } |
| |
| #if !defined(DART_PRECOMPILED_RUNTIME) |
| // This relies on heap iteration, which might fail if we're crashing because |
| // of heap corruption. A nested crash symbolizing a JIT frame will prevent |
| // seeing all caller frames, so only do this when we aren't able to use the |
| // safer StackFrameIterator. |
| Thread* thread = Thread::Current(); |
| bool symbolize_jit_code = |
| (thread != nullptr) && |
| (thread->execution_state() != Thread::kThreadInNative) && |
| (thread->execution_state() != Thread::kThreadInVM); |
| if (symbolize_jit_code) { |
| Code result; |
| result = Code::FindCodeUnsafe(lookup_pc); |
| if (!result.IsNull()) { |
| DumpStackFrame( |
| pc, fp, |
| result.QualifiedName(NameFormattingParams(Object::kInternalName)), |
| pc - result.PayloadStart()); |
| return; |
| } |
| } |
| #endif |
| |
| OS::PrintErr(" pc 0x%" Pp " fp 0x%" Pp " Unknown symbol\n", pc, fp); |
| } |
| |
| class ProfilerStackWalker : public ValueObject { |
| public: |
| ProfilerStackWalker(Sample* head_sample, Isolate* isolate) |
| : sample_(head_sample), |
| isolate_(isolate), |
| frame_index_(0), |
| total_frames_(0) { |
| if (sample_ == nullptr) { |
| ASSERT(isolate_ == nullptr); |
| } else { |
| ASSERT(isolate_ != nullptr); |
| ASSERT(sample_->head_sample()); |
| } |
| } |
| |
| bool Append(uword pc, uword fp) { |
| if (sample_ == nullptr) { |
| DumpStackFrame(frame_index_, pc, fp); |
| frame_index_++; |
| total_frames_++; |
| return true; |
| } |
| #if defined(DART_INCLUDE_PROFILER) |
| if (total_frames_ >= Profiler::CurrentConfig().max_depth) { |
| sample_->set_truncated_trace(true); |
| return false; |
| } |
| ASSERT(sample_ != nullptr); |
| if (frame_index_ == Sample::kPCArraySizeInWords) { |
| Sample* new_sample = SampleBlock::ReserveSampleAndLink(sample_, isolate_); |
| if (new_sample == nullptr) { |
| // Could not reserve new sample- mark this as truncated. |
| sample_->set_truncated_trace(true); |
| return false; |
| } |
| frame_index_ = 0; |
| sample_ = new_sample; |
| } |
| ASSERT(frame_index_ < Sample::kPCArraySizeInWords); |
| sample_->SetAt(frame_index_, pc); |
| frame_index_++; |
| total_frames_++; |
| return true; |
| #else |
| UNREACHABLE(); |
| return false; |
| #endif |
| } |
| |
| protected: |
| Sample* sample_; |
| Isolate* const isolate_; |
| intptr_t frame_index_; |
| intptr_t total_frames_; |
| }; |
| |
| // MSAN/ASAN are unaware of frames initialized by generated code. |
| // ProfilerNativeStackWalker may also read a random slot in the stack if a |
| // function on the stack doesn't use frame pointers and puts something that |
| // looks like a stack address into the FP register. |
| NO_SANITIZE_ADDRESS |
| NO_SANITIZE_MEMORY |
| static uword* LoadStackSlot(uword* ptr) { |
| return reinterpret_cast<uword*>(*ptr); |
| } |
| |
| // Clang on Windows inlines the load from LoadStackSlot and still applies the |
| // sanitizer instrumentation to the load in callers. |
| #if defined(DART_HOST_OS_WINDOWS) |
| #define WINDOWS_EXTRA_NO_SANITIZE_ADDRESS NO_SANITIZE_ADDRESS |
| #else |
| #define WINDOWS_EXTRA_NO_SANITIZE_ADDRESS |
| #endif |
| |
| // The layout of C stack frames. |
| #if defined(HOST_ARCH_IA32) || defined(HOST_ARCH_X64) || \ |
| defined(HOST_ARCH_ARM) || defined(HOST_ARCH_ARM64) |
| // +-------------+ |
| // | saved IP/LR | |
| // +-------------+ |
| // | saved FP | <- FP |
| // +-------------+ |
| static constexpr intptr_t kHostSavedCallerPcSlotFromFp = 1; |
| static constexpr intptr_t kHostSavedCallerFpSlotFromFp = 0; |
| #elif defined(HOST_ARCH_RISCV32) || defined(HOST_ARCH_RISCV64) |
| // +-------------+ |
| // | | <- FP |
| // +-------------+ |
| // | saved RA | |
| // +-------------+ |
| // | saved FP | |
| // +-------------+ |
| static constexpr intptr_t kHostSavedCallerPcSlotFromFp = -1; |
| static constexpr intptr_t kHostSavedCallerFpSlotFromFp = -2; |
| #else |
| #error What architecture? |
| #endif |
| |
| // If the VM is compiled without frame pointers (which is the default on |
| // recent GCC versions with optimizing enabled) the stack walking code may |
| // fail. |
| // |
| class ProfilerNativeStackWalker : public ProfilerStackWalker { |
| public: |
| ProfilerNativeStackWalker(ProfilerCounters* counters, |
| Sample* sample, |
| Isolate* isolate, |
| uword stack_lower, |
| uword stack_upper, |
| uword pc, |
| uword fp) |
| : ProfilerStackWalker(sample, isolate), |
| counters_(counters), |
| stack_upper_(stack_upper), |
| original_pc_(pc), |
| original_fp_(fp), |
| lower_bound_(stack_lower) {} |
| |
| WINDOWS_EXTRA_NO_SANITIZE_ADDRESS |
| void walk() { |
| Append(original_pc_, original_fp_); |
| |
| uword* pc = reinterpret_cast<uword*>(original_pc_); |
| uword* fp = reinterpret_cast<uword*>(original_fp_); |
| uword* previous_fp = fp; |
| |
| if (!ValidFramePointer(fp)) { |
| counters_->incomplete_sample_fp_bounds.fetch_add(1); |
| return; |
| } |
| |
| while (true) { |
| pc = CallerPC(fp); |
| previous_fp = fp; |
| fp = CallerFP(fp); |
| |
| if (fp == nullptr) { |
| return; |
| } |
| |
| if (fp <= previous_fp) { |
| // Frame pointer did not move to a higher address. |
| counters_->incomplete_sample_fp_step.fetch_add(1); |
| return; |
| } |
| |
| if (!ValidFramePointer(fp)) { |
| // Frame pointer is outside of isolate stack boundary. |
| counters_->incomplete_sample_fp_bounds.fetch_add(1); |
| return; |
| } |
| |
| const uword pc_value = reinterpret_cast<uword>(pc); |
| if ((pc_value + 1) < pc_value) { |
| // It is not uncommon to encounter an invalid pc as we |
| // traverse a stack frame. Most of these we can tolerate. If |
| // the pc is so large that adding one to it will cause an |
| // overflow it is invalid and it will cause headaches later |
| // while we are building the profile. Discard it. |
| counters_->incomplete_sample_bad_pc.fetch_add(1); |
| return; |
| } |
| |
| // Move the lower bound up. |
| lower_bound_ = reinterpret_cast<uword>(fp); |
| |
| if (!Append(pc_value, reinterpret_cast<uword>(fp))) { |
| return; |
| } |
| } |
| } |
| |
| private: |
| WINDOWS_EXTRA_NO_SANITIZE_ADDRESS |
| uword* CallerPC(uword* fp) const { |
| ASSERT(fp != nullptr); |
| return LoadStackSlot(fp + kHostSavedCallerPcSlotFromFp); |
| } |
| |
| WINDOWS_EXTRA_NO_SANITIZE_ADDRESS |
| uword* CallerFP(uword* fp) const { |
| ASSERT(fp != nullptr); |
| return LoadStackSlot(fp + kHostSavedCallerFpSlotFromFp); |
| } |
| |
| bool ValidFramePointer(uword* fp) const { |
| if (fp == nullptr) { |
| return false; |
| } |
| if (!Utils::IsAligned(fp, kWordSize)) { |
| return false; |
| } |
| uword cursor = reinterpret_cast<uword>(fp); |
| cursor += sizeof(fp); |
| bool r = (cursor >= lower_bound_) && (cursor < stack_upper_); |
| return r; |
| } |
| |
| ProfilerCounters* const counters_; |
| const uword stack_upper_; |
| const uword original_pc_; |
| const uword original_fp_; |
| uword lower_bound_; |
| }; |
| |
| static bool GetAndValidateCurrentThreadStackBounds(uintptr_t fp, |
| uintptr_t sp, |
| uword* stack_lower, |
| uword* stack_upper) { |
| ASSERT(stack_lower != nullptr); |
| ASSERT(stack_upper != nullptr); |
| if (!OSThread::GetCurrentStackBounds(stack_lower, stack_upper)) { |
| return false; |
| } |
| if ((*stack_lower == 0) || (*stack_upper == 0) || |
| (*stack_lower >= *stack_upper)) { |
| return false; // Bad bounds. |
| } |
| if ((sp < *stack_lower) || (sp >= *stack_upper)) { |
| return false; // Bad SP. |
| } |
| if ((fp < *stack_lower) || (fp >= *stack_upper)) { |
| return false; // Bad FP. |
| } |
| return true; |
| } |
| |
| void Profiler::DumpStackTrace(void* context) { |
| if (context == nullptr) { |
| DumpStackTrace(/*for_crash=*/true); |
| return; |
| } |
| #if defined(DART_HOST_OS_LINUX) || defined(DART_HOST_OS_MACOS) || \ |
| defined(DART_HOST_OS_ANDROID) |
| ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(context); |
| mcontext_t mcontext = ucontext->uc_mcontext; |
| uword pc = SignalHandler::GetProgramCounter(mcontext); |
| uword fp = SignalHandler::GetFramePointer(mcontext); |
| uword sp = SignalHandler::GetCStackPointer(mcontext); |
| DumpStackTrace(sp, fp, pc, /*for_crash=*/true); |
| #elif defined(DART_HOST_OS_WINDOWS) |
| CONTEXT* ctx = reinterpret_cast<CONTEXT*>(context); |
| #if defined(HOST_ARCH_IA32) |
| uword pc = static_cast<uword>(ctx->Eip); |
| uword fp = static_cast<uword>(ctx->Ebp); |
| uword sp = static_cast<uword>(ctx->Esp); |
| #elif defined(HOST_ARCH_X64) |
| uword pc = static_cast<uword>(ctx->Rip); |
| uword fp = static_cast<uword>(ctx->Rbp); |
| uword sp = static_cast<uword>(ctx->Rsp); |
| #elif defined(HOST_ARCH_ARM) |
| uword pc = static_cast<uword>(ctx->Pc); |
| uword fp = static_cast<uword>(ctx->R11); |
| uword sp = static_cast<uword>(ctx->Sp); |
| #elif defined(HOST_ARCH_ARM64) |
| uword pc = static_cast<uword>(ctx->Pc); |
| uword fp = static_cast<uword>(ctx->Fp); |
| uword sp = static_cast<uword>(ctx->Sp); |
| #else |
| #error Unsupported architecture. |
| #endif |
| DumpStackTrace(sp, fp, pc, /*for_crash=*/true); |
| #else |
| // TODO(fschneider): Add support for more platforms. |
| // Do nothing on unsupported platforms. |
| #endif |
| } |
| |
| // We need the call to DumpStackTrace to be a non-tail call and this function to |
| // not get the shrink wrap optimization, otherwise the frame from which we start |
| // our stack walk may be clobbered before the stack walk begins. |
| #ifdef _MSC_VER |
| #pragma optimize("", off) |
| #elif __clang__ |
| __attribute__((optnone)) |
| #elif __GNUC__ |
| __attribute__((optimize(0))) |
| #endif |
| void Profiler::DumpStackTrace(bool for_crash) { |
| uintptr_t sp = OSThread::GetCurrentStackPointer(); |
| uintptr_t fp = GET_FP_REGISTER(); |
| uintptr_t pc = OS::GetProgramCounter(); |
| |
| DumpStackTrace(sp, fp, pc, for_crash); |
| } |
| #ifdef _MSC_VER |
| #pragma optimize("", on) |
| #endif |
| |
| static void DumpCompilerState(Thread* thread) { |
| #if !defined(DART_PRECOMPILED_RUNTIME) |
| if (thread != nullptr && thread->execution_state() == Thread::kThreadInVM && |
| thread->HasCompilerState()) { |
| thread->compiler_state().ReportCrash(); |
| } |
| #endif |
| } |
| |
| void Profiler::DumpStackTrace(uword sp, uword fp, uword pc, bool for_crash) { |
| if (for_crash) { |
| // Allow only one stack trace to prevent recursively printing stack traces |
| // if we hit an assert while printing the stack. |
| static RelaxedAtomic<uintptr_t> started_dump = 0; |
| if (started_dump.fetch_add(1u) != 0) { |
| OS::PrintErr("Aborting reentrant request for stack trace.\n"); |
| return; |
| } |
| } |
| |
| auto thread = Thread::Current(); // nullptr if no current isolate. |
| auto isolate = thread == nullptr ? nullptr : thread->isolate(); |
| auto isolate_group = thread == nullptr ? nullptr : thread->isolate_group(); |
| auto source = isolate_group == nullptr ? nullptr : isolate_group->source(); |
| auto vm_source = |
| Dart::vm_isolate() == nullptr ? nullptr : Dart::vm_isolate()->source(); |
| const char* isolate_group_name = |
| isolate_group == nullptr ? "(nil)" : isolate_group->source()->name; |
| const char* isolate_name = isolate == nullptr ? "(nil)" : isolate->name(); |
| #ifdef SUPPORT_TIMELINE |
| const intptr_t thread_id = |
| OSThread::ThreadIdToIntPtr(OSThread::GetCurrentThreadTraceId()); |
| #else |
| const intptr_t thread_id = -1; |
| #endif |
| OS::PrintErr("version=%s\n", Version::String()); |
| OS::PrintErr("pid=%" Pd ", thread=%" Pd |
| ", isolate_group=%s(%p), isolate=%s(%p)\n", |
| static_cast<intptr_t>(OS::ProcessId()), thread_id, |
| isolate_group_name, isolate_group, isolate_name, isolate); |
| #if defined(DART_COMPRESSED_POINTERS) |
| const char kCompressedPointers[] = "yes"; |
| #else |
| const char kCompressedPointers[] = "no"; |
| #endif |
| #if defined(DART_INCLUDE_SIMULATOR) |
| const char kUsingSimulator[] = "yes"; |
| #else |
| const char kUsingSimulator[] = "no"; |
| #endif |
| OS::PrintErr("os=%s, arch=%s, comp=%s, sim=%s\n", kHostOperatingSystemName, |
| kTargetArchitectureName, kCompressedPointers, kUsingSimulator); |
| OS::PrintErr("isolate_instructions=%" Px ", vm_instructions=%" Px "\n", |
| source == nullptr |
| ? 0 |
| : reinterpret_cast<uword>(source->snapshot_instructions), |
| vm_source == nullptr |
| ? 0 |
| : reinterpret_cast<uword>(vm_source->snapshot_instructions)); |
| OS::PrintErr("fp=%" Px ", sp=%" Px ", pc=%" Px "\n", fp, sp, pc); |
| |
| uword stack_lower = 0; |
| uword stack_upper = 0; |
| if (!GetAndValidateCurrentThreadStackBounds(fp, sp, &stack_lower, |
| &stack_upper)) { |
| OS::PrintErr( |
| "Stack dump aborted because " |
| "GetAndValidateCurrentThreadStackBounds failed.\n"); |
| if (pc != 0) { // At the very least dump the top frame. |
| DumpStackFrame(0, pc, fp); |
| } |
| DumpCompilerState(thread); |
| return; |
| } |
| |
| ProfilerNativeStackWalker native_stack_walker( |
| &counters_, nullptr, nullptr, stack_lower, stack_upper, pc, fp); |
| native_stack_walker.walk(); |
| OS::PrintErr("-- End of DumpStackTrace\n"); |
| |
| if (thread != nullptr) { |
| if (thread->execution_state() == Thread::kThreadInNative) { |
| TransitionNativeToVM transition(thread); |
| StackFrame::DumpCurrentTrace(); |
| } else if (thread->execution_state() == Thread::kThreadInVM) { |
| StackFrame::DumpCurrentTrace(); |
| } else if (thread->execution_state() == Thread::kThreadInGenerated) { |
| // No exit frame, walk from the crash's registers. |
| #if defined(DART_DYNAMIC_MODULES) |
| if (thread->vm_tag() == VMTag::kDartInterpretedTagId) { |
| Interpreter* interpreter = thread->interpreter(); |
| sp = interpreter->get_sp(); |
| fp = interpreter->get_fp(); |
| pc = interpreter->get_pc(); |
| StackFrame::DumpCurrentTrace(sp, fp, pc); |
| } |
| #endif // defined(DART_DYNAMIC_MODULES) |
| if (thread->vm_tag() == VMTag::kDartTagId) { |
| StackFrame::DumpCurrentTrace(sp, fp, pc); |
| } |
| } |
| } |
| |
| DumpCompilerState(thread); |
| } |
| #endif // defined(DART_INCLUDE_STACK_DUMPER) |
| |
| #if defined(DART_INCLUDE_PROFILER) |
| |
| Monitor* Profiler::monitor_ = nullptr; |
| Profiler::Config Profiler::config_ = {.enabled = false, |
| .period_us = 0, |
| .max_depth = 0}; |
| RelaxedAtomic<bool> Profiler::running_ = false; |
| SampleBlockBuffer* Profiler::sample_block_buffer_ = nullptr; |
| |
| #if defined(SUPPORT_TIMELINE) && defined(SUPPORT_PERFETTO) |
| bool SampleBlockProcessor::initialized_ = false; |
| bool SampleBlockProcessor::shutdown_ = false; |
| bool SampleBlockProcessor::drain_ = false; |
| bool SampleBlockProcessor::thread_running_ = false; |
| ThreadJoinId SampleBlockProcessor::processor_thread_id_ = |
| OSThread::kInvalidThreadJoinId; |
| Monitor* SampleBlockProcessor::monitor_ = nullptr; |
| #endif |
| |
| void Profiler::Init() { |
| monitor_ = new Monitor(); |
| ThreadInterrupter::Init(); |
| #if defined(SUPPORT_TIMELINE) && defined(SUPPORT_PERFETTO) |
| SampleBlockProcessor::Init(); |
| #endif |
| SetConfig({}); |
| } |
| |
| void Profiler::Cleanup() { |
| { |
| SafepointMonitorLocker lock(monitor_); |
| StopLocked(); |
| } |
| |
| #if defined(SUPPORT_TIMELINE) && defined(SUPPORT_PERFETTO) |
| SampleBlockProcessor::Cleanup(); |
| #endif |
| ThreadInterrupter::Cleanup(); |
| delete monitor_; |
| } |
| |
| namespace { |
| Profiler::Config NormalizeConfig(const Profiler::Config& config) { |
| const intptr_t kMinimumDepth = 2; |
| const intptr_t kMaximumDepth = 255; |
| const intptr_t kMinimumProfilePeriodUs = 50; |
| return { |
| .enabled = config.enabled, |
| .period_us = Utils::Maximum(kMinimumProfilePeriodUs, config.period_us), |
| .max_depth = Utils::Minimum( |
| kMaximumDepth, |
| Utils::Maximum(kMinimumDepth, config.max_depth.load())), |
| #if defined(SUPPORT_TIMELINE) && defined(SUPPORT_PERFETTO) |
| .stream_to_timeline = config.stream_to_timeline, |
| #endif |
| }; |
| } |
| } // namespace |
| |
| void Profiler::SetConfig(const Profiler::Config& config) { |
| SafepointMonitorLocker lock(monitor_); |
| |
| const auto new_config = NormalizeConfig(config); |
| const auto old_config = config_; |
| config_ = new_config; |
| |
| if (new_config.enabled != old_config.enabled) { |
| // Update running state. |
| if (new_config.enabled) { |
| StartLocked(); |
| } else { |
| StopLocked(); |
| } |
| } else if (old_config.enabled) { |
| #if defined(SUPPORT_TIMELINE) && defined(SUPPORT_PERFETTO) |
| if (new_config.stream_to_timeline != old_config.stream_to_timeline) { |
| if (new_config.stream_to_timeline) { |
| SampleBlockProcessor::Startup(); |
| } else { |
| SampleBlockProcessor::Shutdown(); |
| } |
| } |
| #endif |
| |
| // Check if we need to reconfigure a running profiler. |
| // |
| // Note: this will not resize the sampling buffer, you |
| // need to stop and restart the profiler to resize it. |
| if (new_config.period_us != old_config.period_us) { |
| ThreadInterrupter::SetInterruptPeriod(new_config.period_us); |
| } |
| |
| // Profiling thread will automatically pickup a change in |
| // config_.max_depth, but to resize underlying buffer |
| // you need to start and stop the profiler. |
| } |
| } |
| |
| void Profiler::StartLocked() { |
| RELEASE_ASSERT(!running_); |
| |
| // The profiler may have been shutdown previously, in which case the sample |
| // buffer will have already been initialized. However it might be too small. |
| const intptr_t sample_buffer_capacity = CalculateSampleBufferCapacity(); |
| if (sample_block_buffer_ != nullptr && |
| sample_buffer_capacity > sample_block_buffer_->Capacity()) { |
| delete sample_block_buffer_; |
| sample_block_buffer_ = nullptr; |
| } |
| if (sample_block_buffer_ == nullptr) { |
| sample_block_buffer_ = new SampleBlockBuffer(sample_buffer_capacity); |
| } |
| ThreadInterrupter::SetInterruptPeriod(config_.period_us); |
| ThreadInterrupter::Startup(); |
| #if defined(SUPPORT_TIMELINE) && defined(SUPPORT_PERFETTO) |
| if (config_.stream_to_timeline) { |
| SampleBlockProcessor::Startup(); |
| } |
| #endif |
| running_ = true; |
| } |
| |
| class SampleBlockCleanupVisitor : public IsolateVisitor { |
| public: |
| SampleBlockCleanupVisitor() = default; |
| virtual ~SampleBlockCleanupVisitor() = default; |
| |
| void VisitIsolate(Isolate* isolate) { |
| isolate->set_current_allocation_sample_block(nullptr); |
| isolate->set_current_sample_block(nullptr); |
| } |
| }; |
| |
| void Profiler::StopLocked() { |
| if (!running_) { |
| return; |
| } |
| |
| ThreadInterrupter::Shutdown(); |
| #if defined(SUPPORT_TIMELINE) && defined(SUPPORT_PERFETTO) |
| SampleBlockProcessor::Shutdown(); |
| #endif |
| |
| SampleBlockCleanupVisitor visitor; |
| Isolate::VisitIsolates(&visitor); |
| |
| running_ = false; |
| } |
| |
| intptr_t Profiler::CalculateSampleBufferCapacity() { |
| if (FLAG_sample_buffer_duration <= 0) { |
| return SampleBlockBuffer::kDefaultBlockCount; |
| } |
| // Deeper stacks require more than a single Sample object to be represented |
| // correctly. These samples are chained, so we need to determine the worst |
| // case sample chain length for a single stack. |
| // |
| // We use the fact that `ceil((float)a / (float)b) == (a + b - 1) / b` when |
| // `a` and `b` are positive integers below. |
| const intptr_t max_sample_chain_length = |
| (config_.max_depth + Sample::kPCArraySizeInWords - 1) / |
| Sample::kPCArraySizeInWords; |
| const intptr_t kMicrosPerSec = 1000000; |
| const intptr_t samples_per_second = kMicrosPerSec / config_.period_us; |
| const intptr_t sample_count = FLAG_sample_buffer_duration * |
| samples_per_second * max_sample_chain_length; |
| return (sample_count / SampleBlock::kSamplesPerBlock) + 1; |
| } |
| |
| SampleBlockBuffer::SampleBlockBuffer(intptr_t blocks, |
| intptr_t samples_per_block) { |
| const intptr_t size = Utils::RoundUp( |
| blocks * samples_per_block * sizeof(Sample), VirtualMemory::PageSize()); |
| const bool executable = false; |
| const bool compressed = false; |
| memory_ = |
| VirtualMemory::Allocate(size, executable, compressed, "dart-profiler"); |
| if (memory_ == nullptr) { |
| OUT_OF_MEMORY(); |
| } |
| sample_buffer_ = reinterpret_cast<Sample*>(memory_->address()); |
| blocks_ = new SampleBlock[blocks]; |
| for (intptr_t i = 0; i < blocks; ++i) { |
| blocks_[i].Init(&sample_buffer_[i * samples_per_block], samples_per_block); |
| } |
| capacity_ = blocks; |
| cursor_ = 0; |
| } |
| |
| SampleBlockBuffer::~SampleBlockBuffer() { |
| delete[] blocks_; |
| blocks_ = nullptr; |
| delete memory_; |
| memory_ = nullptr; |
| capacity_ = 0; |
| cursor_ = 0; |
| } |
| |
| SampleBlock* SampleBlockBuffer::ReserveSampleBlock() { |
| intptr_t capacity = capacity_; |
| intptr_t start = cursor_.fetch_add(1) % capacity; |
| intptr_t i = start; |
| do { |
| SampleBlock* block = &blocks_[i]; |
| if (block->TryAllocateFree()) { |
| return block; |
| } |
| i = (i + 1) % capacity; |
| } while (i != start); |
| |
| if (FLAG_profile_startup) { |
| // There are no free blocks and [FLAG_profile_startup] is set, so we stop |
| // recording samples. |
| return nullptr; |
| } else { |
| // There are no free blocks and [FLAG_profile_startup] is not set, so we |
| // reuse a completed block if one is available. |
| i = start; |
| do { |
| SampleBlock* block = &blocks_[i]; |
| if (block->TryAllocateCompleted()) { |
| return block; |
| } |
| i = (i + 1) % capacity; |
| } while (i != start); |
| |
| return nullptr; |
| } |
| } |
| |
| void SampleBlockBuffer::FreeCompletedBlocks() { |
| for (intptr_t i = 0; i < capacity_; i++) { |
| blocks_[i].FreeCompleted(); |
| } |
| } |
| |
| static void FlushSampleBlocks(Isolate* isolate) { |
| ASSERT(isolate != nullptr); |
| bool flushed = false; |
| |
| SampleBlock* block = isolate->exchange_current_sample_block(nullptr); |
| if (block != nullptr) { |
| block->MarkCompleted(); |
| flushed = true; |
| } |
| |
| block = isolate->exchange_current_allocation_sample_block(nullptr); |
| if (block != nullptr) { |
| // Allocation samples are collected synchronously. |
| block->MarkCompleted(); |
| flushed = true; |
| } |
| |
| if (flushed) { |
| isolate->TrySetHasCompletedBlocks(); |
| } |
| } |
| |
| ProcessedSampleBuffer* SampleBlockBuffer::BuildProcessedSampleBuffer( |
| Isolate* isolate, |
| SampleFilter* filter, |
| ProcessedSampleBuffer* buffer) { |
| ASSERT(isolate != nullptr); |
| |
| Thread* thread = Thread::Current(); |
| Zone* zone = thread->zone(); |
| |
| if (buffer == nullptr) { |
| buffer = new (zone) ProcessedSampleBuffer(); |
| } |
| |
| FlushSampleBlocks(isolate); |
| |
| for (intptr_t i = 0; i < capacity_; ++i) { |
| SampleBlock* block = &blocks_[i]; |
| if (block->TryAcquireStreaming(isolate)) { |
| block->BuildProcessedSampleBuffer(filter, buffer); |
| if (filter->take_samples()) { |
| block->StreamingToFree(); |
| } else { |
| block->StreamingToCompleted(); |
| } |
| } |
| } |
| |
| return buffer; |
| } |
| |
| Sample* SampleBlock::ReserveSample() { |
| intptr_t slot = cursor_.fetch_add(1u); |
| if (slot < capacity_) { |
| return At(slot); |
| } |
| return nullptr; |
| } |
| |
| Sample* SampleBlock::ReserveSampleAndLink(Sample* previous, Isolate* isolate) { |
| ASSERT(previous != nullptr); |
| SampleBlockBuffer* buffer = Profiler::sample_block_buffer(); |
| ASSERT(isolate != nullptr); |
| Sample* next = previous->is_allocation_sample() |
| ? buffer->ReserveAllocationSample(isolate) |
| : buffer->ReserveCPUSample(isolate); |
| if (next == nullptr) { |
| return nullptr; // No blocks left, so drop sample. |
| } |
| next->Init(previous->port(), previous->timestamp(), previous->tid()); |
| next->set_head_sample(false); |
| // Mark that previous continues at next. |
| previous->SetContinuation(next); |
| return next; |
| } |
| |
| Sample* SampleBlockBuffer::ReserveCPUSample(Isolate* isolate) { |
| return ReserveSampleImpl(isolate, false); |
| } |
| |
| Sample* SampleBlockBuffer::ReserveAllocationSample(Isolate* isolate) { |
| return ReserveSampleImpl(isolate, true); |
| } |
| |
| Sample* SampleBlockBuffer::ReserveSampleImpl(Isolate* isolate, |
| bool allocation_sample) { |
| SampleBlock* block = allocation_sample |
| ? isolate->current_allocation_sample_block() |
| : isolate->current_sample_block(); |
| Sample* sample = nullptr; |
| if (block != nullptr) { |
| sample = block->ReserveSample(); |
| } |
| if (sample != nullptr) { |
| return sample; |
| } |
| |
| SampleBlock* next = ReserveSampleBlock(); |
| if (next == nullptr) { |
| // We're out of blocks to reserve. Drop the sample. |
| return nullptr; |
| } |
| |
| next->set_owner(isolate); |
| if (allocation_sample) { |
| isolate->set_current_allocation_sample_block(next); |
| } else { |
| isolate->set_current_sample_block(next); |
| } |
| if (block != nullptr) { |
| block->MarkCompleted(); |
| if (!Isolate::IsSystemIsolate(isolate)) { |
| isolate->TrySetHasCompletedBlocks(); |
| } |
| } |
| return next->ReserveSample(); |
| } |
| |
| // Attempts to find the true return address when a Dart frame is being setup |
| // or torn down. |
| // NOTE: Architecture specific implementations below. |
| class ReturnAddressLocator : public ValueObject { |
| public: |
| ReturnAddressLocator(Sample* sample, const Code& code) |
| : stack_buffer_(sample->GetStackBuffer()), |
| pc_(sample->pc()), |
| code_(Code::ZoneHandle(code.ptr())) { |
| ASSERT(!code_.IsNull()); |
| ASSERT(code_.ContainsInstructionAt(pc())); |
| } |
| |
| ReturnAddressLocator(uword pc, |
| RelaxedAtomic<uword>* stack_buffer, |
| const Code& code) |
| : stack_buffer_(stack_buffer), |
| pc_(pc), |
| code_(Code::ZoneHandle(code.ptr())) { |
| ASSERT(!code_.IsNull()); |
| ASSERT(code_.ContainsInstructionAt(pc_)); |
| } |
| |
| uword pc() { return pc_; } |
| |
| // Returns false on failure. |
| bool LocateReturnAddress(uword* return_address); |
| |
| // Returns offset into code object. |
| intptr_t RelativePC() { |
| ASSERT(pc() >= code_.PayloadStart()); |
| return static_cast<intptr_t>(pc() - code_.PayloadStart()); |
| } |
| |
| uint8_t* CodePointer(intptr_t offset) { |
| const intptr_t size = code_.Size(); |
| ASSERT(offset < size); |
| uint8_t* code_pointer = reinterpret_cast<uint8_t*>(code_.PayloadStart()); |
| code_pointer += offset; |
| return code_pointer; |
| } |
| |
| uword StackAt(intptr_t i) { |
| ASSERT(i >= 0); |
| ASSERT(i < Sample::kStackBufferSizeInWords); |
| return stack_buffer_[i]; |
| } |
| |
| private: |
| RelaxedAtomic<uword>* stack_buffer_; |
| uword pc_; |
| const Code& code_; |
| }; |
| |
| #if defined(TARGET_ARCH_IA32) || defined(TARGET_ARCH_X64) |
| bool ReturnAddressLocator::LocateReturnAddress(uword* return_address) { |
| ASSERT(return_address != nullptr); |
| const intptr_t offset = RelativePC(); |
| ASSERT(offset >= 0); |
| const intptr_t size = code_.Size(); |
| ASSERT(offset < size); |
| const intptr_t prologue_offset = code_.GetPrologueOffset(); |
| if (offset < prologue_offset) { |
| // Before the prologue, return address is at the top of the stack. |
| // TODO(johnmccutchan): Some intrinsics and stubs do not conform to the |
| // expected stack layout. Use a more robust solution for those code objects. |
| *return_address = StackAt(0); |
| return true; |
| } |
| // Detect if we are: |
| // push ebp <--- here |
| // mov ebp, esp |
| // on X64 the register names are different but the sequence is the same. |
| ProloguePattern pp(pc()); |
| if (pp.IsValid()) { |
| // Stack layout: |
| // 0 RETURN ADDRESS. |
| *return_address = StackAt(0); |
| return true; |
| } |
| // Detect if we are: |
| // push ebp |
| // mov ebp, esp <--- here |
| // on X64 the register names are different but the sequence is the same. |
| SetFramePointerPattern sfpp(pc()); |
| if (sfpp.IsValid()) { |
| // Stack layout: |
| // 0 CALLER FRAME POINTER |
| // 1 RETURN ADDRESS |
| *return_address = StackAt(1); |
| return true; |
| } |
| // Detect if we are: |
| // ret <--- here |
| ReturnPattern rp(pc()); |
| if (rp.IsValid()) { |
| // Stack layout: |
| // 0 RETURN ADDRESS. |
| *return_address = StackAt(0); |
| return true; |
| } |
| return false; |
| } |
| #elif defined(TARGET_ARCH_ARM) || defined(TARGET_ARCH_ARM64) || \ |
| defined(TARGET_ARCH_RISCV32) || defined(TARGET_ARCH_RISCV64) |
| bool ReturnAddressLocator::LocateReturnAddress(uword* return_address) { |
| ASSERT(return_address != nullptr); |
| return false; |
| } |
| #else |
| #error ReturnAddressLocator implementation missing for this architecture. |
| #endif |
| |
| bool SampleFilter::TimeFilterSample(Sample* sample) { |
| if ((time_origin_micros_ == -1) || (time_extent_micros_ == -1)) { |
| // No time filter passed in, always pass. |
| return true; |
| } |
| const int64_t timestamp = sample->timestamp(); |
| int64_t delta = timestamp - time_origin_micros_; |
| return (delta >= 0) && (delta <= time_extent_micros_); |
| } |
| |
| bool SampleFilter::TaskFilterSample(Sample* sample) { |
| const intptr_t task = static_cast<intptr_t>(sample->thread_task()); |
| if (thread_task_mask_ == kNoTaskFilter) { |
| return true; |
| } |
| return (task & thread_task_mask_) != 0; |
| } |
| |
| ClearProfileVisitor::ClearProfileVisitor(Isolate* isolate) |
| : SampleVisitor(isolate->main_port()) {} |
| |
| void ClearProfileVisitor::VisitSample(Sample* sample) { |
| sample->Clear(); |
| } |
| |
| // Executing Dart code, walk the stack. |
| class ProfilerDartStackWalker : public ProfilerStackWalker { |
| public: |
| ProfilerDartStackWalker(Thread* thread, |
| Sample* sample, |
| Isolate* isolate, |
| uword pc, |
| uword fp) |
| : ProfilerStackWalker(sample, isolate), |
| thread_(thread), |
| pc_(reinterpret_cast<uword*>(pc)), |
| fp_(reinterpret_cast<uword*>(fp)) {} |
| |
| void WalkFromExitFrame() { |
| // Can't be in the middle of a prologue. Won't need an SP or LR fixup. |
| sample_->set_exit_frame_sample(true); |
| |
| // Skip exit frame. |
| pc_ = CallerPC(); |
| fp_ = CallerFP(); |
| walk(); |
| } |
| |
| void WalkFromTopFrame(uword sp, uword lr) { |
| const bool is_interpreted_frame = IsInterpretedFrame(); |
| const bool is_entry_frame = |
| #if defined(TARGET_ARCH_IA32) || defined(TARGET_ARCH_X64) |
| StubCode::InInvocationStub(thread_, Stack(sp, 0), |
| is_interpreted_frame) || |
| StubCode::InInvocationStub(thread_, Stack(sp, 1), is_interpreted_frame); |
| #else |
| StubCode::InInvocationStub(thread_, lr, is_interpreted_frame); |
| #endif |
| if (is_entry_frame) { |
| // During the prologue of a function, CallerPC will return the caller's |
| // caller. For most frames, the missing PC will be added during profile |
| // processing. However, during this stack walk, it can cause us to fail |
| // to identify the entry frame and lead the stack walk into the weeds. |
| // Do not continue the stalk walk since this might be a false positive |
| // from a Smi or unboxed value. |
| sample_->set_ignore_sample(true); |
| return; |
| } |
| walk(); |
| } |
| |
| private: |
| void walk() { |
| for (;;) { |
| // Skip entry frame. |
| if (StubCode::InInvocationStub(thread_, reinterpret_cast<uword>(pc_), |
| IsInterpretedFrame())) { |
| pc_ = nullptr; |
| fp_ = ExitLink(); |
| if (fp_ == nullptr) { |
| break; // End of Dart stack. |
| } |
| |
| // Skip exit frame. |
| pc_ = CallerPC(); |
| fp_ = CallerFP(); |
| |
| // At least one frame between exit and next entry frame. |
| RELEASE_ASSERT(!StubCode::InInvocationStub( |
| thread_, reinterpret_cast<uword>(pc_), IsInterpretedFrame())); |
| } |
| |
| if (!Append(reinterpret_cast<uword>(pc_), reinterpret_cast<uword>(fp_))) { |
| break; // Sample is full. |
| } |
| |
| pc_ = CallerPC(); |
| fp_ = CallerFP(); |
| } |
| } |
| |
| bool IsInterpretedFrame() const { |
| #if defined(DART_DYNAMIC_MODULES) |
| Interpreter* interpreter = thread_->interpreter(); |
| return (interpreter != nullptr) && |
| interpreter->HasFrame(reinterpret_cast<uword>(fp_)); |
| #else |
| return false; |
| #endif |
| } |
| |
| uword* CallerPC() const { |
| ASSERT(fp_ != nullptr); |
| uword* caller_pc_ptr = |
| fp_ + (IsInterpretedFrame() ? kKBCSavedCallerPcSlotFromFp |
| : kSavedCallerPcSlotFromFp); |
| return LoadStackSlot(caller_pc_ptr); |
| } |
| |
| uword* CallerFP() const { |
| ASSERT(fp_ != nullptr); |
| uword* caller_fp_ptr = |
| fp_ + (IsInterpretedFrame() ? kKBCSavedCallerFpSlotFromFp |
| : kSavedCallerFpSlotFromFp); |
| return LoadStackSlot(caller_fp_ptr); |
| } |
| |
| uword* ExitLink() const { |
| ASSERT(fp_ != nullptr); |
| uword* exit_link_ptr = |
| fp_ + (IsInterpretedFrame() ? kKBCExitLinkSlotFromEntryFp |
| : kExitLinkSlotFromEntryFp); |
| return LoadStackSlot(exit_link_ptr); |
| } |
| |
| uword Stack(uword sp, intptr_t index) const { |
| ASSERT(sp != 0); |
| return reinterpret_cast<uword>( |
| LoadStackSlot(reinterpret_cast<uword*>(sp) + index)); |
| } |
| |
| Thread* const thread_; |
| uword* pc_; |
| uword* fp_; |
| }; |
| |
| static void CopyStackBuffer(Sample* sample, uword sp_addr) { |
| #if defined(TARGET_ARCH_IA32) || defined(TARGET_ARCH_X64) |
| ASSERT(sample != nullptr); |
| uword* sp = reinterpret_cast<uword*>(sp_addr); |
| RelaxedAtomic<uword>* buffer = sample->GetStackBuffer(); |
| if (sp != nullptr) { |
| for (intptr_t i = 0; i < Sample::kStackBufferSizeInWords; i++) { |
| buffer[i] = reinterpret_cast<uword>(LoadStackSlot(sp)); |
| sp++; |
| } |
| } |
| #endif |
| } |
| |
| static Sample* SetupSample(Thread* thread, |
| bool allocation_sample, |
| ThreadId tid) { |
| ASSERT(thread != nullptr); |
| Isolate* isolate = thread->isolate(); |
| SampleBlockBuffer* buffer = Profiler::sample_block_buffer(); |
| Sample* sample = allocation_sample ? buffer->ReserveAllocationSample(isolate) |
| : buffer->ReserveCPUSample(isolate); |
| if (sample == nullptr) { |
| return nullptr; |
| } |
| sample->Init(isolate->main_port(), OS::GetCurrentMonotonicMicros(), tid); |
| uword vm_tag = thread->vm_tag(); |
| #if defined(DART_INCLUDE_SIMULATOR) |
| // When running in the simulator, the runtime entry function address |
| // (stored as the vm tag) is the address of a redirect function. |
| // Attempt to find the real runtime entry function address and use that. |
| if (FLAG_use_simulator) { |
| uword redirect_vm_tag = Simulator::FunctionForRedirect(vm_tag); |
| if (redirect_vm_tag != 0) { |
| vm_tag = redirect_vm_tag; |
| } |
| } |
| #endif |
| sample->set_vm_tag(vm_tag); |
| sample->set_user_tag(thread->user_tag()); |
| sample->set_thread_task(thread->task_kind()); |
| return sample; |
| } |
| |
| static bool CheckIsolate(Isolate* isolate) { |
| if ((isolate == nullptr) || (Dart::vm_isolate() == nullptr)) { |
| // No isolate. |
| return false; |
| } |
| return isolate != Dart::vm_isolate(); |
| } |
| |
| void Profiler::SampleAllocation(Thread* thread, |
| intptr_t cid, |
| uint32_t identity_hash) { |
| ASSERT(thread != nullptr); |
| OSThread* os_thread = thread->os_thread(); |
| ASSERT(os_thread != nullptr); |
| Isolate* isolate = thread->isolate(); |
| if (!CheckIsolate(isolate)) { |
| return; |
| } |
| |
| Sample* sample = |
| SetupSample(thread, /*allocation_sample=*/true, os_thread->trace_id()); |
| if (sample == nullptr) { |
| // We were unable to assign a sample for this allocation. |
| counters_.sample_allocation_failure++; |
| return; |
| } |
| sample->SetAllocationCid(cid); |
| sample->set_allocation_identity_hash(identity_hash); |
| |
| if (FLAG_profile_vm_allocation) { |
| uintptr_t fp = GET_FP_REGISTER(); |
| uintptr_t pc = OS::GetProgramCounter(); |
| uword stack_lower = os_thread->stack_limit(); |
| uword stack_upper = os_thread->stack_base(); |
| if ((stack_lower >= stack_upper) || (stack_lower == 0) || |
| (stack_upper == 0) || (fp < stack_lower) || (fp >= stack_upper)) { |
| counters_.single_frame_sample_get_and_validate_stack_bounds.fetch_add(1); |
| return; |
| } |
| ProfilerNativeStackWalker native_stack_walker( |
| &counters_, sample, isolate, stack_lower, stack_upper, pc, fp); |
| native_stack_walker.walk(); |
| } else if (thread->HasExitedDartCode()) { |
| uintptr_t fp = thread->top_exit_frame_info(); |
| uintptr_t pc = 0; |
| ProfilerDartStackWalker dart_exit_stack_walker(thread, sample, isolate, pc, |
| fp); |
| dart_exit_stack_walker.WalkFromExitFrame(); |
| } else { |
| // Fall back. |
| uintptr_t pc = OS::GetProgramCounter(); |
| sample->SetAt(0, pc); |
| } |
| } |
| |
| void Profiler::SampleThreadSingleFrame(Thread* thread, |
| Sample* sample, |
| uintptr_t pc) { |
| ASSERT(thread != nullptr); |
| OSThread* os_thread = thread->os_thread(); |
| ASSERT(os_thread != nullptr); |
| ASSERT(Profiler::sample_block_buffer() != nullptr); |
| |
| #if !defined(PRODUCT) |
| Isolate* isolate = thread->isolate(); |
| |
| // Increment counter for vm tag. |
| VMTagCounters* counters = isolate->vm_tag_counters(); |
| ASSERT(counters != nullptr); |
| if (thread->IsDartMutatorThread()) { |
| counters->Increment(sample->vm_tag()); |
| } |
| #endif |
| |
| // Write the single pc value. |
| sample->SetAt(0, pc); |
| } |
| |
| void ReleaseToCurrentBlock(Isolate* isolate) { |
| #if defined(DART_HOST_OS_MACOS) || defined(DART_HOST_OS_WINDOWS) || \ |
| defined(DART_HOST_OS_FUCHSIA) |
| // The sample is collected by a different thread. The sample appears all at |
| // once from the profiled thread's point of view. Establish the isolate |
| // flushing its own current block happens-after the most recent sample |
| // written in that block by dumping a dependency through the current block. |
| // TSAN doesn't otherwise know this is already true because it doesn't have |
| // special treatment for thread_suspend/resume. |
| SampleBlock* block = isolate->current_sample_block(); |
| isolate->exchange_current_sample_block(block); |
| #elif defined(DART_HOST_OS_LINUX) || defined(DART_HOST_OS_ANDROID) |
| // The sample is collected by a signal handler on the same thread being |
| // sampled. |
| #else |
| #error What kind of sampler? |
| #endif |
| } |
| |
| void Profiler::SampleThread(Thread* thread, |
| const InterruptedThreadState& state) { |
| ASSERT(thread != nullptr); |
| OSThread* os_thread = thread->os_thread(); |
| ASSERT(os_thread != nullptr); |
| Isolate* isolate = thread->isolate(); |
| |
| // Double check if interrupts are disabled after the thread interrupter |
| // decided to send a signal. |
| if (!os_thread->ThreadInterruptsEnabled()) { |
| return; |
| } |
| |
| // Thread is not doing VM work. |
| if (thread->task_kind() == Thread::kUnknownTask) { |
| counters_.bail_out_unknown_task.fetch_add(1); |
| return; |
| } |
| |
| if (!CheckIsolate(isolate)) { |
| counters_.bail_out_check_isolate.fetch_add(1); |
| return; |
| } |
| |
| if (StubCode::InJumpToFrameStub(thread, state.pc)) { |
| // The JumpToFrame stub manually adjusts the stack pointer, frame |
| // pointer, and some isolate state. It is not safe to walk the |
| // stack when executing this stub. |
| counters_.bail_out_jump_to_exception_handler.fetch_add(1); |
| return; |
| } |
| |
| if (thread->IsDeoptimizing()) { |
| counters_.bail_out_deoptimizing.fetch_add(1); |
| return; |
| } |
| |
| // Setup sample. |
| Sample* sample = |
| SetupSample(thread, /*allocation_sample=*/false, os_thread->trace_id()); |
| if (sample == nullptr) { |
| // We were unable to assign a sample for this profiler tick. |
| counters_.sample_allocation_failure++; |
| return; |
| } |
| |
| #if !defined(PRODUCT) |
| // Increment counter for vm tag. |
| VMTagCounters* counters = isolate->vm_tag_counters(); |
| ASSERT(counters != nullptr); |
| if (thread->IsDartMutatorThread()) { |
| counters->Increment(sample->vm_tag()); |
| } |
| #endif |
| |
| if (FLAG_profile_vm) { |
| uintptr_t fp = state.fp; |
| uintptr_t pc = state.pc; |
| uword stack_lower = os_thread->stack_limit(); |
| uword stack_upper = os_thread->stack_base(); |
| if ((fp < stack_lower) || (fp >= stack_upper)) { |
| counters_.single_frame_sample_get_and_validate_stack_bounds.fetch_add(1); |
| SampleThreadSingleFrame(thread, sample, pc); |
| ReleaseToCurrentBlock(isolate); |
| return; |
| } |
| |
| counters_.stack_walker_native.fetch_add(1); |
| ProfilerNativeStackWalker native_stack_walker( |
| &counters_, sample, isolate, stack_lower, stack_upper, pc, fp); |
| native_stack_walker.walk(); |
| } else if (thread->HasExitedDartCode()) { |
| uintptr_t fp = thread->top_exit_frame_info(); |
| uintptr_t pc = 0; |
| counters_.stack_walker_dart_exit.fetch_add(1); |
| ProfilerDartStackWalker dart_stack_walker(thread, sample, isolate, pc, fp); |
| dart_stack_walker.WalkFromExitFrame(); |
| } else if (thread->IsExecutingDartCode()) { |
| uintptr_t sp = state.dsp; |
| uintptr_t fp = state.fp; |
| uintptr_t pc = state.pc; |
| uintptr_t lr = state.lr; |
| #if defined(DART_INCLUDE_SIMULATOR) |
| if (FLAG_use_simulator) { |
| Simulator* simulator = isolate->simulator(); |
| sp = simulator->get_register(SPREG); |
| fp = simulator->get_register(FPREG); |
| pc = simulator->get_pc(); |
| lr = simulator->get_lr(); |
| } |
| #endif |
| #if defined(DART_DYNAMIC_MODULES) |
| if (thread->vm_tag() == VMTag::kDartInterpretedTagId) { |
| sp = 0; |
| pc = thread->interpreter()->get_pc(); |
| fp = thread->interpreter()->get_fp(); |
| lr = 0; |
| RELEASE_ASSERT(thread->interpreter()->HasFrame(fp)); |
| } |
| #endif |
| |
| // We can only trust the stack pointer if we are executing Dart code. |
| // See http://dartbug.com/20421 for details. |
| CopyStackBuffer(sample, sp); |
| |
| counters_.stack_walker_dart_exit.fetch_add(1); |
| ProfilerDartStackWalker dart_stack_walker(thread, sample, isolate, pc, fp); |
| dart_stack_walker.WalkFromTopFrame(sp, lr); |
| } else { |
| counters_.stack_walker_none.fetch_add(1); |
| sample->SetAt(0, state.pc); |
| } |
| |
| ReleaseToCurrentBlock(isolate); |
| } |
| |
| CodeDescriptor::CodeDescriptor(const AbstractCode code) : code_(code) {} |
| |
| uword CodeDescriptor::Start() const { |
| return code_.PayloadStart(); |
| } |
| |
| uword CodeDescriptor::Size() const { |
| return code_.Size(); |
| } |
| |
| int64_t CodeDescriptor::CompileTimestamp() const { |
| return code_.compile_timestamp(); |
| } |
| |
| CodeLookupTable::CodeLookupTable(Thread* thread) { |
| Build(thread); |
| } |
| |
| class CodeLookupTableBuilder : public ObjectVisitor { |
| public: |
| explicit CodeLookupTableBuilder(CodeLookupTable* table) : table_(table) { |
| ASSERT(table_ != nullptr); |
| } |
| |
| ~CodeLookupTableBuilder() {} |
| |
| void VisitObject(ObjectPtr raw_obj) override { |
| if (raw_obj->IsCode() && !Code::IsUnknownDartCode(Code::RawCast(raw_obj))) { |
| table_->Add(Code::Handle(Code::RawCast(raw_obj))); |
| } else if (raw_obj->IsBytecode()) { |
| table_->Add(Bytecode::Handle(Bytecode::RawCast(raw_obj))); |
| } |
| } |
| |
| private: |
| CodeLookupTable* table_; |
| }; |
| |
| void CodeLookupTable::Build(Thread* thread) { |
| ASSERT(thread != nullptr); |
| Isolate* vm_isolate = Dart::vm_isolate(); |
| ASSERT(vm_isolate != nullptr); |
| |
| // Clear. |
| code_objects_.Clear(); |
| |
| thread->CheckForSafepoint(); |
| // Add all found Code objects. |
| if (FLAG_precompiled_mode) { |
| const GrowableObjectArray& tables = GrowableObjectArray::Handle( |
| IsolateGroup::Current()->object_store()->instructions_tables()); |
| InstructionsTable& table = InstructionsTable::Handle(); |
| Array& codes = Array::Handle(); |
| for (intptr_t i = 0; i < tables.Length(); i++) { |
| table ^= tables.At(i); |
| codes = table.code_objects(); |
| for (intptr_t j = 0; j < codes.Length(); j++) { |
| Code& code = Code::Handle(); // Separate handle for each. |
| code ^= codes.At(j); |
| if (!Code::IsUnknownDartCode(code.ptr())) { |
| Add(code); |
| } |
| } |
| } |
| } else { |
| TimelineBeginEndScope tl(Timeline::GetIsolateStream(), |
| "CodeLookupTable::Build HeapIterationScope"); |
| HeapIterationScope iteration(thread); |
| CodeLookupTableBuilder cltb(this); |
| iteration.IterateVMIsolateObjects(&cltb); |
| iteration.IterateOldObjects(&cltb); |
| } |
| thread->CheckForSafepoint(); |
| |
| // Sort by entry. |
| code_objects_.Sort(CodeDescriptor::Compare); |
| |
| #if defined(DEBUG) |
| if (length() <= 1) { |
| return; |
| } |
| ASSERT(FindCode(0) == nullptr); |
| ASSERT(FindCode(~0) == nullptr); |
| // Sanity check that we don't have duplicate entries and that the entries |
| // are sorted. |
| for (intptr_t i = 0; i < length() - 1; i++) { |
| const CodeDescriptor* a = At(i); |
| const CodeDescriptor* b = At(i + 1); |
| ASSERT(a->Start() < b->Start()); |
| ASSERT(FindCode(a->Start()) == a); |
| ASSERT(FindCode(b->Start()) == b); |
| ASSERT(FindCode(a->Start() + a->Size() - 1) == a); |
| ASSERT(FindCode(b->Start() + b->Size() - 1) == b); |
| } |
| #endif |
| } |
| |
| void CodeLookupTable::Add(const Object& code) { |
| ASSERT(!code.IsNull()); |
| ASSERT(code.IsCode() || code.IsBytecode()); |
| CodeDescriptor* cd = new CodeDescriptor(AbstractCode(code.ptr())); |
| code_objects_.Add(cd); |
| } |
| |
| const CodeDescriptor* CodeLookupTable::FindCode(uword pc) const { |
| intptr_t first = 0; |
| intptr_t count = length(); |
| while (count > 0) { |
| intptr_t current = first; |
| intptr_t step = count / 2; |
| current += step; |
| const CodeDescriptor* cd = At(current); |
| if (pc >= cd->Start()) { |
| first = ++current; |
| count -= step + 1; |
| } else { |
| count = step; |
| } |
| } |
| // First points to the first code object whose entry is greater than PC. |
| // That means the code object we need to check is first - 1. |
| if (first == 0) { |
| return nullptr; |
| } |
| first--; |
| ASSERT(first >= 0); |
| ASSERT(first < length()); |
| const CodeDescriptor* cd = At(first); |
| if (cd->Contains(pc)) { |
| return cd; |
| } |
| return nullptr; |
| } |
| |
| ProcessedSampleBuffer* SampleBuffer::BuildProcessedSampleBuffer( |
| SampleFilter* filter, |
| ProcessedSampleBuffer* buffer) { |
| Thread* thread = Thread::Current(); |
| Zone* zone = thread->zone(); |
| |
| if (buffer == nullptr) { |
| buffer = new (zone) ProcessedSampleBuffer(); |
| } |
| |
| const intptr_t length = capacity(); |
| for (intptr_t i = 0; i < length; i++) { |
| thread->CheckForSafepoint(); |
| Sample* sample = At(i); |
| if (sample->ignore_sample()) { |
| // Bad sample. |
| continue; |
| } |
| if (!sample->head_sample()) { |
| // An inner sample in a chain of samples. |
| continue; |
| } |
| if (sample->timestamp() == 0) { |
| // Empty. |
| continue; |
| } |
| if (sample->At(0) == 0) { |
| // No frames. |
| continue; |
| } |
| if (filter != nullptr) { |
| // If we're requesting all the native allocation samples, we don't care |
| // whether or not we're in the same isolate as the sample. |
| if (sample->port() != filter->port()) { |
| // Another isolate. |
| continue; |
| } |
| if (!filter->TimeFilterSample(sample)) { |
| // Did not pass time filter. |
| continue; |
| } |
| if (!filter->TaskFilterSample(sample)) { |
| // Did not pass task filter. |
| continue; |
| } |
| if (!filter->FilterSample(sample)) { |
| // Did not pass filter. |
| continue; |
| } |
| } |
| buffer->Add(BuildProcessedSample(sample, buffer->code_lookup_table())); |
| } |
| return buffer; |
| } |
| |
| #if defined(SUPPORT_PERFETTO) && defined(DART_PRECOMPILED_RUNTIME) |
| class PerfettoPerfSampleWriter : public ValueObject { |
| public: |
| PerfettoPerfSampleWriter( |
| int64_t from_micros, |
| int64_t to_micros, |
| perfetto_utils::InternedDataBuilder& interned_data_builder, |
| void* file, |
| Dart_FileWriteCallback write_bytes) |
| : from_micros_(from_micros), |
| to_micros_(to_micros), |
| file_(file), |
| write_bytes_(write_bytes), |
| interned_data_builder_(interned_data_builder) { |
| CollectMappings(); |
| } |
| |
| ~PerfettoPerfSampleWriter() { |
| for (auto m : mappings_) { |
| delete m; |
| } |
| } |
| |
| struct SnapshotMapping : public MallocAllocated { |
| uint32_t iid; |
| |
| uword start; |
| uword end; |
| const char* path; |
| Dart_Port isolate_group_id; |
| bool is_root_unit; |
| |
| bool Contains(uword pc) { return start < pc && pc <= end; } |
| }; |
| |
| void CollectMappings() { |
| IsolateGroup::ForEach([&](IsolateGroup* group) { |
| const auto group_source = group->source(); |
| const auto isolate_group_instructions = |
| reinterpret_cast<uword>(group_source->snapshot_instructions); |
| const Image isolate_group_image(isolate_group_instructions); |
| group->heap()->old_space()->ForEachImagePage([&](Page* page) { |
| if (page->is_executable()) { |
| mappings_.Add(new SnapshotMapping{ |
| .start = page->object_start(), |
| .end = page->object_end(), |
| .path = group->source()->script_uri, |
| .isolate_group_id = group->id(), |
| .is_root_unit = |
| (page->object_start() == |
| reinterpret_cast<uword>(isolate_group_image.object_start())), |
| }); |
| } |
| }); |
| }); |
| |
| mappings_.Sort([](auto a, auto b) -> int { |
| if ((*a)->start < (*b)->start) return -1; |
| if ((*a)->start > (*b)->start) return 1; |
| return 0; |
| }); |
| |
| // Remove duplicated mappings. |
| intptr_t j = 0; |
| for (intptr_t i = 0; i < mappings_.length(); i++) { |
| if (j > 0 && mappings_[j - 1]->start == mappings_[i]->start) { |
| delete mappings_[i]; |
| } else { |
| mappings_[j++] = mappings_[i]; |
| } |
| } |
| mappings_.SetLength(j); |
| } |
| |
| void WriteSamples(SampleBuffer* buffer) { |
| const intptr_t length = buffer->capacity(); |
| for (intptr_t i = 0; i < length; i++) { |
| Sample* sample = buffer->At(i); |
| |
| if (sample->ignore_sample()) { |
| // Bad sample. |
| continue; |
| } |
| |
| if (!sample->head_sample()) { |
| // An inner sample in a chain of samples. |
| continue; |
| } |
| |
| if (sample->timestamp() == 0) { |
| // Empty. |
| continue; |
| } |
| |
| if (sample->At(0) == 0) { |
| // No frames. |
| continue; |
| } |
| |
| if (sample->is_allocation_sample()) { |
| continue; |
| } |
| |
| auto timestamp = sample->timestamp(); |
| if (from_micros_ > timestamp || to_micros_ < timestamp) { |
| continue; |
| } |
| |
| WriteSample(sample); |
| } |
| } |
| |
| std::pair<uint32_t, uint64_t> FindMapping(uword pc) { |
| const auto lower_bound = |
| std::lower_bound(mappings_.begin(), mappings_.end(), pc, |
| [](auto m, auto pc) { return m->end < pc; }); |
| |
| if (lower_bound == mappings_.end() || !(*lower_bound)->Contains(pc)) { |
| return std::make_pair(0, pc); |
| } |
| |
| const auto m = *lower_bound; |
| |
| return std::make_pair(InternMapping(m), pc - m->start); |
| } |
| |
| uint32_t InternMapping(SnapshotMapping* m) { |
| if (m->iid == 0) { |
| // When Perfetto is matching ModuleSymbols to a corresponding mapping, |
| // it uses both path and build_id for matching (and both of them are |
| // used as opaque identifiers). We use this to support deferred units: |
| // all mappings corresponding to an isolate group have the same build-id |
| // (which is based on isolate group id) while path is based on the script |
| // uri with address of the mapping appended for non-root units - this |
| // makes the combination of path+build_id unique for each unit including |
| // the root one. |
| // |
| // Additionally we make sure to prepend "/" to the path if it does not |
| // start with "/" to compensation for similar logic in Perfetto: |
| // Mapping.path_string_ids is an array of path components, to construct |
| // mappings path from path components Perfetto joins them with "/" |
| // and prepends "/" if there is no leading slash (see [1]). To normalize |
| // paths between Mapping and ModuleSymbols we simply ensure that path |
| // here always starts with "/". |
| // |
| // [1]: https://github.com/google/perfetto/blob/a3e107ec803c876a870205f89c1e37742184b598/src/trace_processor/importers/proto/profile_packet_utils.cc#L24-L38 |
| |
| const char* path = m->path; |
| if (!m->is_root_unit) { |
| Utils::SNPrint(&name_buf_[0], ARRAY_SIZE(name_buf_), |
| "%s%s(%016" Px64 ")", m->path[0] == '/' ? "" : "/", |
| m->path, static_cast<uint64_t>(m->start)); |
| path = name_buf_; |
| } else if (m->path[0] != '/') { |
| Utils::SNPrint(&name_buf_[0], ARRAY_SIZE(name_buf_), "/%s", m->path); |
| path = name_buf_; |
| } |
| |
| const auto path_id = interned_data_builder_.mapping_paths().Intern(path); |
| const auto build_id_iid = |
| interned_data_builder_.InternSyntheticBuildIdForIsolateGroup( |
| m->isolate_group_id); |
| |
| m->iid = interned_data_builder_.mappings().Intern({ |
| .start = m->start, |
| .end = m->end, |
| .path_string = path_id, |
| .build_id = build_id_iid, |
| }); |
| } |
| return m->iid; |
| } |
| |
| void WriteSample(Sample* sample) { |
| WriteClockSnapshotPacket(); |
| |
| // Walk the sampled PCs and intern the stack. |
| callstack_.Clear(); |
| |
| Sample* current = sample; |
| bool unknown_mappings = false; |
| intptr_t pc_adjustment = 0; |
| while (current != nullptr) { |
| for (intptr_t i = 0; i < Sample::kPCArraySizeInWords; i++) { |
| if (current->At(i) == 0) { |
| break; |
| } |
| |
| const uword pc = current->At(i) + pc_adjustment; |
| const auto [mapping_iid, rel_pc] = FindMapping(pc); |
| |
| const auto frame_iid = interned_data_builder_.frames().Intern({ |
| .rel_pc = rel_pc, |
| .mapping_iid = mapping_iid, |
| }); |
| |
| if (mapping_iid == 0) { |
| unknown_mappings = true; |
| |
| // Eagerly symbolize native frames. |
| const auto& frame = |
| interned_data_builder_.frames().GetByIid(frame_iid); |
| if (frame.function_name_iid == 0) { |
| const auto name_iid = |
| interned_data_builder_.function_names().Intern( |
| LookupNativeName(pc)); |
| const_cast<perfetto_utils::InternedDataBuilder::Frame&>(frame) |
| .function_name_iid = name_iid; |
| } |
| } |
| |
| callstack_.Add(frame_iid); |
| pc_adjustment = -1; |
| } |
| |
| current = current->Next(); |
| } |
| |
| if (unknown_mappings) { |
| interned_data_builder_.MarkNeedUnknownMapping(); |
| } |
| |
| // Perfetto UI requires callstack frames to be in caller-first order, while |
| // profiler records samples in callee-first order. |
| callstack_.Reverse(); |
| |
| const auto callstack_iid = interned_data_builder_.callstacks().Intern( |
| {&callstack_[0], callstack_.length()}); |
| |
| perfetto_utils::SetTrustedPacketSequenceId(packet_.get()); |
| perfetto_utils::SetTimestampAndMonotonicClockId(packet_.get(), |
| sample->timestamp()); |
| |
| auto& perf_sample = *packet_->set_perf_sample(); |
| perf_sample.set_pid(pid_); |
| perf_sample.set_tid(OSThread::ThreadIdToIntPtr(sample->tid())); |
| perf_sample.set_callstack_iid(callstack_iid); |
| |
| interned_data_builder_.AttachInternedDataTo(packet_.get()); |
| |
| perfetto_utils::WritePacketBytes(&packet_, [this](auto bytes, auto size) { |
| write_bytes_(bytes, size, file_); |
| }); |
| packet_.Reset(); |
| } |
| |
| private: |
| void WriteClockSnapshotPacket() { |
| if (clock_snapshot_written_) { |
| return; |
| } |
| |
| perfetto_utils::PopulateClockSnapshotPacket(packet_.get()); |
| perfetto_utils::WritePacketBytes(&packet_, [this](auto bytes, auto size) { |
| write_bytes_(bytes, size, file_); |
| }); |
| packet_.Reset(); |
| clock_snapshot_written_ = true; |
| } |
| |
| char* LookupNativeName(uword pc) { |
| uword start; |
| if (auto const name = NativeSymbolResolver::LookupSymbolName(pc, &start)) { |
| Utils::SNPrint(&name_buf_[0], ARRAY_SIZE(name_buf_), |
| "[Native] %s+0x%" Px "", name, pc - start); |
| NativeSymbolResolver::FreeSymbolName(name); |
| return &name_buf_[0]; |
| } |
| |
| uword dso_base; |
| const char* dso_name; |
| if (NativeSymbolResolver::LookupSharedObject(pc, &dso_base, &dso_name)) { |
| uword dso_offset = pc - dso_base; |
| Utils::SNPrint(&name_buf_[0], ARRAY_SIZE(name_buf_), |
| "[Native] %s+0x%" Px "", dso_name, dso_offset); |
| NativeSymbolResolver::FreeSymbolName(dso_name); |
| return &name_buf_[0]; |
| } else { |
| Utils::SNPrint(&name_buf_[0], ARRAY_SIZE(name_buf_), "[Native] %" Px "", |
| pc); |
| return &name_buf_[0]; |
| } |
| } |
| |
| int64_t from_micros_; |
| int64_t to_micros_; |
| |
| void* file_; |
| Dart_FileWriteCallback write_bytes_; |
| |
| const intptr_t pid_ = OS::ProcessId(); |
| |
| MallocGrowableArray<SnapshotMapping*> mappings_; |
| char name_buf_[1024]; |
| |
| perfetto_utils::InternedDataBuilder& interned_data_builder_; |
| |
| bool clock_snapshot_written_ = false; |
| protozero::HeapBuffered<perfetto::protos::pbzero::TracePacket> packet_; |
| MallocGrowableArray<uint64_t> callstack_{128}; |
| }; |
| |
| void SampleBlockBuffer::WritePerfetto( |
| int64_t from_micros, |
| int64_t to_micros, |
| perfetto_utils::InternedDataBuilder& interned_data_builder, |
| void* file, |
| Dart_FileWriteCallback write_bytes) { |
| PerfettoPerfSampleWriter writer(from_micros, to_micros, interned_data_builder, |
| file, write_bytes); |
| |
| for (intptr_t i = 0; i < capacity_; ++i) { |
| SampleBlock* block = &blocks_[i]; |
| if (block->TryAcquireStreaming(/*isolate=*/nullptr)) { |
| writer.WriteSamples(block); |
| block->StreamingToFree(); // We consumed samples. |
| } |
| } |
| } |
| #endif |
| |
| ProcessedSample* SampleBuffer::BuildProcessedSample( |
| Sample* sample, |
| const CodeLookupTable& clt) { |
| Thread* thread = Thread::Current(); |
| Zone* zone = thread->zone(); |
| |
| ProcessedSample* processed_sample = new (zone) ProcessedSample(); |
| |
| // Copy state bits from sample. |
| processed_sample->set_timestamp(sample->timestamp()); |
| processed_sample->set_tid(sample->tid()); |
| processed_sample->set_vm_tag(sample->vm_tag()); |
| processed_sample->set_user_tag(sample->user_tag()); |
| if (sample->is_allocation_sample()) { |
| processed_sample->set_allocation_cid(sample->allocation_cid()); |
| processed_sample->set_allocation_identity_hash( |
| sample->allocation_identity_hash()); |
| } |
| processed_sample->set_first_frame_executing(!sample->exit_frame_sample()); |
| |
| // Copy stack trace from sample(s). |
| bool truncated = false; |
| |
| for (Sample* current = sample; current != nullptr; |
| current = current->Next()) { |
| for (intptr_t i = 0; i < Sample::kPCArraySizeInWords; i++) { |
| if (current->At(i) == 0) { |
| break; |
| } |
| processed_sample->Add(current->At(i)); |
| } |
| |
| truncated = truncated || current->truncated_trace(); |
| } |
| |
| if (!sample->exit_frame_sample()) { |
| processed_sample->FixupCaller(clt, /*pc_marker=*/0, |
| sample->GetStackBuffer()); |
| } |
| |
| processed_sample->set_truncated(truncated); |
| return processed_sample; |
| } |
| |
| ProcessedSample::ProcessedSample() |
| : pcs_(Sample::kPCArraySizeInWords), |
| timestamp_(0), |
| vm_tag_(0), |
| user_tag_(0), |
| allocation_cid_(-1), |
| allocation_identity_hash_(0), |
| truncated_(false) {} |
| |
| void ProcessedSample::FixupCaller(const CodeLookupTable& clt, |
| uword pc_marker, |
| RelaxedAtomic<uword>* stack_buffer) { |
| const CodeDescriptor* cd = clt.FindCode(At(0)); |
| if (cd == nullptr) { |
| // No Dart code. |
| return; |
| } |
| if (cd->CompileTimestamp() > timestamp()) { |
| // Code compiled after sample. Ignore. |
| return; |
| } |
| CheckForMissingDartFrame(clt, cd, pc_marker, stack_buffer); |
| } |
| |
| void ProcessedSample::CheckForMissingDartFrame( |
| const CodeLookupTable& clt, |
| const CodeDescriptor* cd, |
| uword pc_marker, |
| RelaxedAtomic<uword>* stack_buffer) { |
| ASSERT(cd != nullptr); |
| if (cd->code().IsBytecode()) { |
| // Bytecode frame build is atomic from the profiler's perspective, |
| // there are no missing frames. |
| return; |
| } |
| const Code& code = Code::Handle(Code::RawCast(cd->code().ptr())); |
| ASSERT(!code.IsNull()); |
| // Some stubs (and intrinsics) do not push a frame onto the stack leaving |
| // the frame pointer in the caller. |
| // |
| // PC -> STUB |
| // FP -> DART3 <-+ |
| // DART2 <-| <- TOP FRAME RETURN ADDRESS. |
| // DART1 <-| |
| // ..... |
| // |
| // In this case, traversing the linked stack frames will not collect a PC |
| // inside DART3. The stack will incorrectly be: STUB, DART2, DART1. |
| // In Dart code, after pushing the FP onto the stack, an IP in the current |
| // function is pushed onto the stack as well. This stack slot is called |
| // the PC marker. We can use the PC marker to insert DART3 into the stack |
| // so that it will correctly be: STUB, DART3, DART2, DART1. Note the |
| // inserted PC may not accurately reflect the true return address into DART3. |
| |
| // The pc marker is our current best guess of a return address. |
| uword return_address = pc_marker; |
| |
| // Attempt to find a better return address. |
| ReturnAddressLocator ral(At(0), stack_buffer, code); |
| |
| if (!ral.LocateReturnAddress(&return_address)) { |
| ASSERT(return_address == pc_marker); |
| if (code.GetPrologueOffset() == 0) { |
| // Code has the prologue at offset 0. The frame is already setup and |
| // can be trusted. |
| return; |
| } |
| // Could not find a better return address than the pc_marker. |
| if (code.ContainsInstructionAt(return_address)) { |
| // PC marker is in the same code as pc, no missing frame. |
| return; |
| } |
| } |
| |
| if (clt.FindCode(return_address) == nullptr) { |
| // Return address is not from a Dart code object. Do not insert. |
| return; |
| } |
| |
| if (return_address != 0) { |
| InsertAt(1, return_address); |
| } |
| } |
| |
| ProcessedSampleBuffer::ProcessedSampleBuffer() |
| : code_lookup_table_(new CodeLookupTable(Thread::Current())) { |
| ASSERT(code_lookup_table_ != nullptr); |
| } |
| |
| #if defined(SUPPORT_TIMELINE) && defined(SUPPORT_PERFETTO) |
| void SampleBlockProcessor::Init() { |
| ASSERT(!initialized_); |
| monitor_ = new Monitor(); |
| initialized_ = true; |
| shutdown_ = true; |
| drain_ = false; |
| } |
| |
| void SampleBlockProcessor::Cleanup() { |
| Shutdown(); |
| initialized_ = false; |
| delete monitor_; |
| } |
| |
| void SampleBlockProcessor::Startup() { |
| ASSERT(initialized_); |
| ASSERT(processor_thread_id_ == OSThread::kInvalidThreadJoinId); |
| SafepointMonitorLocker startup_ml(monitor_); |
| shutdown_ = false; |
| drain_ = false; |
| OSThread::Start("Dart Profiler SampleBlockProcessor", ThreadMain, 0); |
| while (!thread_running_) { |
| startup_ml.Wait(); |
| } |
| ASSERT(processor_thread_id_ != OSThread::kInvalidThreadJoinId); |
| } |
| |
| void SampleBlockProcessor::Shutdown() { |
| { |
| SafepointMonitorLocker shutdown_ml(monitor_); |
| if (shutdown_) { |
| // Already shutdown. |
| return; |
| } |
| shutdown_ = true; |
| shutdown_ml.Notify(); |
| ASSERT(initialized_); |
| } |
| |
| // Join the thread. |
| ASSERT(processor_thread_id_ != OSThread::kInvalidThreadJoinId); |
| auto thread = Thread::Current(); |
| if (thread != nullptr) { |
| TransitionVMToBlocked transition(thread); |
| OSThread::Join(processor_thread_id_); |
| } else { |
| OSThread::Join(processor_thread_id_); |
| } |
| processor_thread_id_ = OSThread::kInvalidThreadJoinId; |
| ASSERT(!thread_running_); |
| } |
| |
| void Profiler::IsolateShutdown(Isolate* isolate) { |
| FlushSampleBlocks(isolate); |
| NOT_IN_PRECOMPILED(Timeline::DrainCompletedSampleBlocksIntoRecorder(isolate)); |
| } |
| |
| void Profiler::IsolateGroupShutdown(IsolateGroup* isolate_group) { |
| #if defined(SUPPORT_TIMELINE) |
| if (config_.enabled && config_.stream_to_timeline) { |
| Timeline::NotifyAboutIsolateGroupShutdown(isolate_group); |
| } |
| #endif // defined(SUPPORT_TIMELINE) |
| } |
| |
| void SampleBlockProcessor::ThreadMain(uword parameters) { |
| ASSERT(initialized_); |
| { |
| // Signal to main thread we are ready. |
| MonitorLocker startup_ml(monitor_); |
| OSThread* os_thread = OSThread::Current(); |
| ASSERT(os_thread != nullptr); |
| processor_thread_id_ = OSThread::GetCurrentThreadJoinId(os_thread); |
| thread_running_ = true; |
| startup_ml.Notify(); |
| } |
| |
| MonitorLocker wait_ml(monitor_); |
| // Wakeup every 100ms. |
| const int64_t wakeup_interval = 1000 * 100; |
| while (true) { |
| wait_ml.WaitMicros(wakeup_interval); |
| |
| #if defined(DART_PRECOMPILED_RUNTIME) |
| // If shutting down flush all sample blocks from all isolates. |
| if (shutdown_) { |
| IsolateGroup::ForEach([&](IsolateGroup* group) { |
| if (group == Dart::vm_isolate_group()) return; |
| |
| const bool kBypassSafepoint = false; |
| Thread::EnterIsolateGroupAsHelper(group, Thread::kSampleBlockTask, |
| kBypassSafepoint); |
| group->ForEachIsolate( |
| [&](Isolate* isolate) { FlushSampleBlocks(isolate); }); |
| Thread::ExitIsolateGroupAsHelper(kBypassSafepoint); |
| }); |
| } |
| Timeline::DrainCompletedSampleBlocksIntoRecorder(); |
| #else |
| IsolateGroup::ForEach([&](IsolateGroup* group) { |
| if (group == Dart::vm_isolate_group()) return; |
| |
| const bool kBypassSafepoint = false; |
| Thread::EnterIsolateGroupAsHelper(group, Thread::kSampleBlockTask, |
| kBypassSafepoint); |
| group->ForEachIsolate([&](Isolate* isolate) { |
| if (shutdown_) { |
| FlushSampleBlocks(isolate); |
| } |
| if (isolate->TakeHasCompletedBlocks()) { |
| Timeline::DrainCompletedSampleBlocksIntoRecorder(isolate); |
| } |
| }); |
| Thread::ExitIsolateGroupAsHelper(kBypassSafepoint); |
| }); |
| #endif |
| |
| if (shutdown_) { |
| break; |
| } |
| } |
| // Signal to main thread we are exiting. |
| thread_running_ = false; |
| } |
| #endif |
| |
| #endif // defined(DART_INCLUDE_PROFILER) |
| |
| } // namespace dart |