diff -r 000000000000 -r 6474c204b198 xpcom/threads/HangMonitor.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xpcom/threads/HangMonitor.cpp Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,388 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/HangMonitor.h" +#include "mozilla/BackgroundHangMonitor.h" +#include "mozilla/Monitor.h" +#include "mozilla/Preferences.h" +#include "mozilla/Telemetry.h" +#include "mozilla/ProcessedStack.h" +#include "mozilla/Atomics.h" +#include "nsXULAppAPI.h" +#include "nsThreadUtils.h" +#include "nsStackWalk.h" + +#ifdef MOZ_CRASHREPORTER +#include "nsExceptionHandler.h" +#endif + +#ifdef XP_WIN +#include +#endif + +#if defined(MOZ_ENABLE_PROFILER_SPS) && defined(MOZ_PROFILING) && defined(XP_WIN) + #define REPORT_CHROME_HANGS +#endif + +namespace mozilla { namespace HangMonitor { + +/** + * A flag which may be set from within a debugger to disable the hang + * monitor. + */ +volatile bool gDebugDisableHangMonitor = false; + +const char kHangMonitorPrefName[] = "hangmonitor.timeout"; + +const char kTelemetryPrefName[] = "toolkit.telemetry.enabled"; + +// Monitor protects gShutdown and gTimeout, but not gTimestamp which rely on +// being atomically set by the processor; synchronization doesn't really matter +// in this use case. +Monitor* gMonitor; + +// The timeout preference, in seconds. +int32_t gTimeout; + +PRThread* gThread; + +// Set when shutdown begins to signal the thread to exit immediately. +bool gShutdown; + +// The timestamp of the last event notification, or PR_INTERVAL_NO_WAIT if +// we're currently not processing events. +Atomic gTimestamp(PR_INTERVAL_NO_WAIT); + +#ifdef REPORT_CHROME_HANGS +// Main thread ID used in reporting chrome hangs under Windows +static HANDLE winMainThreadHandle = nullptr; + +// Default timeout for reporting chrome hangs to Telemetry (5 seconds) +static const int32_t DEFAULT_CHROME_HANG_INTERVAL = 5; + +// Maximum number of PCs to gather from the stack +static const int32_t MAX_CALL_STACK_PCS = 400; +#endif + +// PrefChangedFunc +void +PrefChanged(const char*, void*) +{ + int32_t newval = Preferences::GetInt(kHangMonitorPrefName); +#ifdef REPORT_CHROME_HANGS + // Monitor chrome hangs on the profiling branch if Telemetry enabled + if (newval == 0) { + bool telemetryEnabled = Preferences::GetBool(kTelemetryPrefName); + if (telemetryEnabled) { + newval = DEFAULT_CHROME_HANG_INTERVAL; + } + } +#endif + MonitorAutoLock lock(*gMonitor); + if (newval != gTimeout) { + gTimeout = newval; + lock.Notify(); + } +} + +void +Crash() +{ + if (gDebugDisableHangMonitor) { + return; + } + +#ifdef XP_WIN + if (::IsDebuggerPresent()) { + return; + } +#endif + +#ifdef MOZ_CRASHREPORTER + CrashReporter::AnnotateCrashReport(NS_LITERAL_CSTRING("Hang"), + NS_LITERAL_CSTRING("1")); +#endif + + NS_RUNTIMEABORT("HangMonitor triggered"); +} + +#ifdef REPORT_CHROME_HANGS +static void +ChromeStackWalker(void *aPC, void *aSP, void *aClosure) +{ + MOZ_ASSERT(aClosure); + std::vector *stack = + static_cast*>(aClosure); + if (stack->size() == MAX_CALL_STACK_PCS) + return; + MOZ_ASSERT(stack->size() < MAX_CALL_STACK_PCS); + stack->push_back(reinterpret_cast(aPC)); +} + +static void +GetChromeHangReport(Telemetry::ProcessedStack &aStack, + int32_t &aSystemUptime, + int32_t &aFirefoxUptime) +{ + MOZ_ASSERT(winMainThreadHandle); + + // The thread we're about to suspend might have the alloc lock + // so allocate ahead of time + std::vector rawStack; + rawStack.reserve(MAX_CALL_STACK_PCS); + DWORD ret = ::SuspendThread(winMainThreadHandle); + if (ret == -1) + return; + NS_StackWalk(ChromeStackWalker, /* skipFrames */ 0, /* maxFrames */ 0, + reinterpret_cast(&rawStack), + reinterpret_cast(winMainThreadHandle), nullptr); + ret = ::ResumeThread(winMainThreadHandle); + if (ret == -1) + return; + aStack = Telemetry::GetStackAndModules(rawStack); + + // Record system uptime (in minutes) at the time of the hang + aSystemUptime = ((GetTickCount() / 1000) - (gTimeout * 2)) / 60; + + // Record Firefox uptime (in minutes) at the time of the hang + bool error; + TimeStamp processCreation = TimeStamp::ProcessCreation(error); + if (!error) { + TimeDuration td = TimeStamp::Now() - processCreation; + aFirefoxUptime = (static_cast(td.ToSeconds()) - (gTimeout * 2)) / 60; + } else { + aFirefoxUptime = -1; + } +} +#endif + +void +ThreadMain(void*) +{ + PR_SetCurrentThreadName("Hang Monitor"); + + MonitorAutoLock lock(*gMonitor); + + // In order to avoid issues with the hang monitor incorrectly triggering + // during a general system stop such as sleeping, the monitor thread must + // run twice to trigger hang protection. + PRIntervalTime lastTimestamp = 0; + int waitCount = 0; + +#ifdef REPORT_CHROME_HANGS + Telemetry::ProcessedStack stack; + int32_t systemUptime = -1; + int32_t firefoxUptime = -1; +#endif + + while (true) { + if (gShutdown) { + return; // Exit the thread + } + + // avoid rereading the volatile value in this loop + PRIntervalTime timestamp = gTimestamp; + + PRIntervalTime now = PR_IntervalNow(); + + if (timestamp != PR_INTERVAL_NO_WAIT && + now < timestamp) { + // 32-bit overflow, reset for another waiting period + timestamp = 1; // lowest legal PRInterval value + } + + if (timestamp != PR_INTERVAL_NO_WAIT && + timestamp == lastTimestamp && + gTimeout > 0) { + ++waitCount; +#ifdef REPORT_CHROME_HANGS + // Capture the chrome-hang stack + Firefox & system uptimes after + // the minimum hang duration has been reached (not when the hang ends) + if (waitCount == 2) { + GetChromeHangReport(stack, systemUptime, firefoxUptime); + } +#else + // This is the crash-on-hang feature. + // See bug 867313 for the quirk in the waitCount comparison + if (waitCount >= 2) { + int32_t delay = + int32_t(PR_IntervalToSeconds(now - timestamp)); + if (delay >= gTimeout) { + MonitorAutoUnlock unlock(*gMonitor); + Crash(); + } + } +#endif + } + else { +#ifdef REPORT_CHROME_HANGS + if (waitCount >= 2) { + uint32_t hangDuration = PR_IntervalToSeconds(now - lastTimestamp); + Telemetry::RecordChromeHang(hangDuration, stack, + systemUptime, firefoxUptime); + stack.Clear(); + } +#endif + lastTimestamp = timestamp; + waitCount = 0; + } + + PRIntervalTime timeout; + if (gTimeout <= 0) { + timeout = PR_INTERVAL_NO_TIMEOUT; + } + else { + timeout = PR_MillisecondsToInterval(gTimeout * 500); + } + lock.Wait(timeout); + } +} + +void +Startup() +{ + // The hang detector only runs in chrome processes. If you change this, + // you must also deal with the threadsafety of AnnotateCrashReport in + // non-chrome processes! + if (GeckoProcessType_Default != XRE_GetProcessType()) + return; + + MOZ_ASSERT(!gMonitor, "Hang monitor already initialized"); + gMonitor = new Monitor("HangMonitor"); + + Preferences::RegisterCallback(PrefChanged, kHangMonitorPrefName, nullptr); + PrefChanged(nullptr, nullptr); + +#ifdef REPORT_CHROME_HANGS + Preferences::RegisterCallback(PrefChanged, kTelemetryPrefName, nullptr); + winMainThreadHandle = + OpenThread(THREAD_ALL_ACCESS, FALSE, GetCurrentThreadId()); + if (!winMainThreadHandle) + return; +#endif + + // Don't actually start measuring hangs until we hit the main event loop. + // This potentially misses a small class of really early startup hangs, + // but avoids dealing with some xpcshell tests and other situations which + // start XPCOM but don't ever start the event loop. + Suspend(); + + gThread = PR_CreateThread(PR_USER_THREAD, + ThreadMain, + nullptr, PR_PRIORITY_LOW, PR_GLOBAL_THREAD, + PR_JOINABLE_THREAD, 0); +} + +void +Shutdown() +{ + if (GeckoProcessType_Default != XRE_GetProcessType()) + return; + + MOZ_ASSERT(gMonitor, "Hang monitor not started"); + + { // Scope the lock we're going to delete later + MonitorAutoLock lock(*gMonitor); + gShutdown = true; + lock.Notify(); + } + + // thread creation could theoretically fail + if (gThread) { + PR_JoinThread(gThread); + gThread = nullptr; + } + + delete gMonitor; + gMonitor = nullptr; +} + +static bool +IsUIMessageWaiting() +{ +#ifndef XP_WIN + return false; +#else + #define NS_WM_IMEFIRST WM_IME_SETCONTEXT + #define NS_WM_IMELAST WM_IME_KEYUP + BOOL haveUIMessageWaiting = FALSE; + MSG msg; + haveUIMessageWaiting |= ::PeekMessageW(&msg, nullptr, WM_KEYFIRST, + WM_IME_KEYLAST, PM_NOREMOVE); + haveUIMessageWaiting |= ::PeekMessageW(&msg, nullptr, NS_WM_IMEFIRST, + NS_WM_IMELAST, PM_NOREMOVE); + haveUIMessageWaiting |= ::PeekMessageW(&msg, nullptr, WM_MOUSEFIRST, + WM_MOUSELAST, PM_NOREMOVE); + return haveUIMessageWaiting; +#endif +} + +void +NotifyActivity(ActivityType activityType) +{ + MOZ_ASSERT(NS_IsMainThread(), + "HangMonitor::Notify called from off the main thread."); + + // Determine the activity type more specifically + if (activityType == kGeneralActivity) { + activityType = IsUIMessageWaiting() ? kActivityUIAVail : + kActivityNoUIAVail; + } + + // Calculate the cumulative amount of lag time since the last UI message + static uint32_t cumulativeUILagMS = 0; + switch(activityType) { + case kActivityNoUIAVail: + cumulativeUILagMS = 0; + break; + case kActivityUIAVail: + case kUIActivity: + if (gTimestamp != PR_INTERVAL_NO_WAIT) { + cumulativeUILagMS += PR_IntervalToMilliseconds(PR_IntervalNow() - + gTimestamp); + } + break; + default: + break; + } + + // This is not a locked activity because PRTimeStamp is a 32-bit quantity + // which can be read/written atomically, and we don't want to pay locking + // penalties here. + gTimestamp = PR_IntervalNow(); + + // If we have UI activity we should reset the timer and report it if it is + // significant enough. + if (activityType == kUIActivity) { + // The minimum amount of lag time that we should report for telemetry data. + // Mozilla's UI responsiveness goal is 50ms + static const uint32_t kUIResponsivenessThresholdMS = 50; + if (cumulativeUILagMS > kUIResponsivenessThresholdMS) { + mozilla::Telemetry::Accumulate(mozilla::Telemetry::EVENTLOOP_UI_LAG_EXP_MS, + cumulativeUILagMS); + } + cumulativeUILagMS = 0; + } + + if (gThread && !gShutdown) { + mozilla::BackgroundHangMonitor().NotifyActivity(); + } +} + +void +Suspend() +{ + MOZ_ASSERT(NS_IsMainThread(), + "HangMonitor::Suspend called from off the main thread."); + + // Because gTimestamp changes this resets the wait count. + gTimestamp = PR_INTERVAL_NO_WAIT; + + if (gThread && !gShutdown) { + mozilla::BackgroundHangMonitor().NotifyWait(); + } +} + +} } // namespace mozilla::HangMonitor