michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "mozilla/ArrayUtils.h" michael@0: #include "mozilla/BackgroundHangMonitor.h" michael@0: #include "mozilla/LinkedList.h" michael@0: #include "mozilla/Monitor.h" michael@0: #include "mozilla/Move.h" michael@0: #include "mozilla/StaticPtr.h" michael@0: #include "mozilla/Telemetry.h" michael@0: #include "mozilla/ThreadHangStats.h" michael@0: #include "mozilla/ThreadLocal.h" michael@0: #ifdef MOZ_NUWA_PROCESS michael@0: #include "ipc/Nuwa.h" michael@0: #endif michael@0: michael@0: #include "prinrval.h" michael@0: #include "prthread.h" michael@0: #include "ThreadStackHelper.h" michael@0: michael@0: #include michael@0: michael@0: namespace mozilla { michael@0: michael@0: /** michael@0: * BackgroundHangManager is the global object that michael@0: * manages all instances of BackgroundHangThread. michael@0: */ michael@0: class BackgroundHangManager michael@0: { michael@0: private: michael@0: // Background hang monitor thread function michael@0: static void MonitorThread(void* aData) michael@0: { michael@0: PR_SetCurrentThreadName("BgHangManager"); michael@0: michael@0: #ifdef MOZ_NUWA_PROCESS michael@0: if (IsNuwaProcess()) { michael@0: NS_ASSERTION(NuwaMarkCurrentThread != nullptr, michael@0: "NuwaMarkCurrentThread is undefined!"); michael@0: NuwaMarkCurrentThread(nullptr, nullptr); michael@0: } michael@0: #endif michael@0: michael@0: /* We do not hold a reference to BackgroundHangManager here michael@0: because the monitor thread only exists as long as the michael@0: BackgroundHangManager instance exists. We stop the monitor michael@0: thread in the BackgroundHangManager destructor, and we can michael@0: only get to the destructor if we don't hold a reference here. */ michael@0: static_cast(aData)->RunMonitorThread(); michael@0: } michael@0: michael@0: // Hang monitor thread michael@0: PRThread* mHangMonitorThread; michael@0: // Stop hang monitoring michael@0: bool mShutdown; michael@0: michael@0: BackgroundHangManager(const BackgroundHangManager&); michael@0: BackgroundHangManager& operator=(const BackgroundHangManager&); michael@0: void RunMonitorThread(); michael@0: michael@0: public: michael@0: NS_INLINE_DECL_THREADSAFE_REFCOUNTING(BackgroundHangManager) michael@0: static StaticRefPtr sInstance; michael@0: michael@0: // Lock for access to members of this class michael@0: Monitor mLock; michael@0: // Current time as seen by hang monitors michael@0: PRIntervalTime mIntervalNow; michael@0: // List of BackgroundHangThread instances associated with each thread michael@0: LinkedList mHangThreads; michael@0: michael@0: void Shutdown() michael@0: { michael@0: MonitorAutoLock autoLock(mLock); michael@0: mShutdown = true; michael@0: autoLock.Notify(); michael@0: } michael@0: michael@0: void Wakeup() michael@0: { michael@0: // PR_CreateThread could have failed earlier michael@0: if (mHangMonitorThread) { michael@0: // Use PR_Interrupt to avoid potentially taking a lock michael@0: PR_Interrupt(mHangMonitorThread); michael@0: } michael@0: } michael@0: michael@0: BackgroundHangManager(); michael@0: ~BackgroundHangManager(); michael@0: }; michael@0: michael@0: /** michael@0: * BackgroundHangThread is a per-thread object that is used michael@0: * by all instances of BackgroundHangMonitor to monitor hangs. michael@0: */ michael@0: class BackgroundHangThread : public LinkedListElement michael@0: { michael@0: private: michael@0: static ThreadLocal sTlsKey; michael@0: michael@0: BackgroundHangThread(const BackgroundHangThread&); michael@0: BackgroundHangThread& operator=(const BackgroundHangThread&); michael@0: ~BackgroundHangThread(); michael@0: michael@0: /* Keep a reference to the manager, so we can keep going even michael@0: after BackgroundHangManager::Shutdown is called. */ michael@0: const RefPtr mManager; michael@0: // Unique thread ID for identification michael@0: const PRThread* mThreadID; michael@0: michael@0: public: michael@0: NS_INLINE_DECL_REFCOUNTING(BackgroundHangThread) michael@0: static BackgroundHangThread* FindThread(); michael@0: michael@0: static void Startup() michael@0: { michael@0: /* We can tolerate init() failing. michael@0: The if block turns off warn_unused_result. */ michael@0: if (!sTlsKey.init()) {} michael@0: } michael@0: michael@0: // Hang timeout in ticks michael@0: const PRIntervalTime mTimeout; michael@0: // PermaHang timeout in ticks michael@0: const PRIntervalTime mMaxTimeout; michael@0: // Time at last activity michael@0: PRIntervalTime mInterval; michael@0: // Time when a hang started michael@0: PRIntervalTime mHangStart; michael@0: // Is the thread in a hang michael@0: bool mHanging; michael@0: // Is the thread in a waiting state michael@0: bool mWaiting; michael@0: // Platform-specific helper to get hang stacks michael@0: ThreadStackHelper mStackHelper; michael@0: // Stack of current hang michael@0: Telemetry::HangHistogram::Stack mHangStack; michael@0: // Statistics for telemetry michael@0: Telemetry::ThreadHangStats mStats; michael@0: michael@0: BackgroundHangThread(const char* aName, michael@0: uint32_t aTimeoutMs, michael@0: uint32_t aMaxTimeoutMs); michael@0: michael@0: // Report a hang; aManager->mLock IS locked michael@0: void ReportHang(PRIntervalTime aHangTime); michael@0: // Report a permanent hang; aManager->mLock IS locked michael@0: void ReportPermaHang(); michael@0: // Called by BackgroundHangMonitor::NotifyActivity michael@0: void NotifyActivity(); michael@0: // Called by BackgroundHangMonitor::NotifyWait michael@0: void NotifyWait() michael@0: { michael@0: NotifyActivity(); michael@0: mWaiting = true; michael@0: } michael@0: }; michael@0: michael@0: michael@0: StaticRefPtr BackgroundHangManager::sInstance; michael@0: michael@0: ThreadLocal BackgroundHangThread::sTlsKey; michael@0: michael@0: michael@0: BackgroundHangManager::BackgroundHangManager() michael@0: : mShutdown(false) michael@0: , mLock("BackgroundHangManager") michael@0: , mIntervalNow(0) michael@0: { michael@0: // Lock so we don't race against the new monitor thread michael@0: MonitorAutoLock autoLock(mLock); michael@0: mHangMonitorThread = PR_CreateThread( michael@0: PR_USER_THREAD, MonitorThread, this, michael@0: PR_PRIORITY_LOW, PR_GLOBAL_THREAD, PR_JOINABLE_THREAD, 0); michael@0: michael@0: MOZ_ASSERT(mHangMonitorThread, michael@0: "Failed to create monitor thread"); michael@0: } michael@0: michael@0: BackgroundHangManager::~BackgroundHangManager() michael@0: { michael@0: MOZ_ASSERT(mShutdown, michael@0: "Destruction without Shutdown call"); michael@0: MOZ_ASSERT(mHangThreads.isEmpty(), michael@0: "Destruction with outstanding monitors"); michael@0: MOZ_ASSERT(mHangMonitorThread, michael@0: "No monitor thread"); michael@0: michael@0: // PR_CreateThread could have failed above due to resource limitation michael@0: if (mHangMonitorThread) { michael@0: // The monitor thread can only live as long as the instance lives michael@0: PR_JoinThread(mHangMonitorThread); michael@0: } michael@0: } michael@0: michael@0: void michael@0: BackgroundHangManager::RunMonitorThread() michael@0: { michael@0: // Keep us locked except when waiting michael@0: MonitorAutoLock autoLock(mLock); michael@0: michael@0: /* mIntervalNow is updated at various intervals determined by waitTime. michael@0: However, if an update latency is too long (due to CPU scheduling, system michael@0: sleep, etc.), we don't update mIntervalNow at all. This is done so that michael@0: long latencies in our timing are not detected as hangs. systemTime is michael@0: used to track PR_IntervalNow() and determine our latency. */ michael@0: michael@0: PRIntervalTime systemTime = PR_IntervalNow(); michael@0: // Default values for the first iteration of thread loop michael@0: PRIntervalTime waitTime = PR_INTERVAL_NO_WAIT; michael@0: PRIntervalTime recheckTimeout = PR_INTERVAL_NO_WAIT; michael@0: michael@0: while (!mShutdown) { michael@0: michael@0: PR_ClearInterrupt(); michael@0: nsresult rv = autoLock.Wait(waitTime); michael@0: michael@0: PRIntervalTime newTime = PR_IntervalNow(); michael@0: PRIntervalTime systemInterval = newTime - systemTime; michael@0: systemTime = newTime; michael@0: michael@0: /* waitTime is a quarter of the shortest timeout value; If our timing michael@0: latency is low enough (less than half the shortest timeout value), michael@0: we can update mIntervalNow. */ michael@0: if (MOZ_LIKELY(waitTime != PR_INTERVAL_NO_TIMEOUT && michael@0: systemInterval < 2 * waitTime)) { michael@0: mIntervalNow += systemInterval; michael@0: } michael@0: michael@0: /* If it's before the next recheck timeout, and our wait did not michael@0: get interrupted (either through Notify or PR_Interrupt), we can michael@0: keep the current waitTime and skip iterating through hang monitors. */ michael@0: if (MOZ_LIKELY(systemInterval < recheckTimeout && michael@0: systemInterval >= waitTime && michael@0: rv == NS_OK)) { michael@0: recheckTimeout -= systemInterval; michael@0: continue; michael@0: } michael@0: michael@0: /* We are in one of the following scenarios, michael@0: - Hang or permahang recheck timeout michael@0: - Thread added/removed michael@0: - Thread wait or hang ended michael@0: In all cases, we want to go through our list of hang michael@0: monitors and update waitTime and recheckTimeout. */ michael@0: waitTime = PR_INTERVAL_NO_TIMEOUT; michael@0: recheckTimeout = PR_INTERVAL_NO_TIMEOUT; michael@0: michael@0: // Locally hold mIntervalNow michael@0: PRIntervalTime intervalNow = mIntervalNow; michael@0: michael@0: // iterate through hang monitors michael@0: for (BackgroundHangThread* currentThread = mHangThreads.getFirst(); michael@0: currentThread; currentThread = currentThread->getNext()) { michael@0: michael@0: if (currentThread->mWaiting) { michael@0: // Thread is waiting, not hanging michael@0: continue; michael@0: } michael@0: PRIntervalTime interval = currentThread->mInterval; michael@0: PRIntervalTime hangTime = intervalNow - interval; michael@0: if (MOZ_UNLIKELY(hangTime >= currentThread->mMaxTimeout)) { michael@0: // A permahang started michael@0: // Skip subsequent iterations and tolerate a race on mWaiting here michael@0: currentThread->mWaiting = true; michael@0: currentThread->mHanging = false; michael@0: currentThread->ReportPermaHang(); michael@0: continue; michael@0: } michael@0: michael@0: if (MOZ_LIKELY(!currentThread->mHanging)) { michael@0: if (MOZ_UNLIKELY(hangTime >= currentThread->mTimeout)) { michael@0: // A hang started michael@0: currentThread->mStackHelper.GetStack(currentThread->mHangStack); michael@0: currentThread->mHangStart = interval; michael@0: currentThread->mHanging = true; michael@0: } michael@0: } else { michael@0: if (MOZ_LIKELY(interval != currentThread->mHangStart)) { michael@0: // A hang ended michael@0: currentThread->ReportHang(intervalNow - currentThread->mHangStart); michael@0: currentThread->mHanging = false; michael@0: } michael@0: } michael@0: michael@0: /* If we are hanging, the next time we check for hang status is when michael@0: the hang turns into a permahang. If we're not hanging, the next michael@0: recheck timeout is when we may be entering a hang. */ michael@0: PRIntervalTime nextRecheck; michael@0: if (currentThread->mHanging) { michael@0: nextRecheck = currentThread->mMaxTimeout; michael@0: } else { michael@0: nextRecheck = currentThread->mTimeout; michael@0: } michael@0: recheckTimeout = std::min(recheckTimeout, nextRecheck - hangTime); michael@0: michael@0: /* We wait for a quarter of the shortest timeout michael@0: value to give mIntervalNow enough granularity. */ michael@0: waitTime = std::min(waitTime, currentThread->mTimeout / 4); michael@0: } michael@0: } michael@0: michael@0: /* We are shutting down now. michael@0: Wait for all outstanding monitors to unregister. */ michael@0: while (!mHangThreads.isEmpty()) { michael@0: autoLock.Wait(PR_INTERVAL_NO_TIMEOUT); michael@0: } michael@0: } michael@0: michael@0: michael@0: BackgroundHangThread::BackgroundHangThread(const char* aName, michael@0: uint32_t aTimeoutMs, michael@0: uint32_t aMaxTimeoutMs) michael@0: : mManager(BackgroundHangManager::sInstance) michael@0: , mThreadID(PR_GetCurrentThread()) michael@0: , mTimeout(aTimeoutMs == BackgroundHangMonitor::kNoTimeout michael@0: ? PR_INTERVAL_NO_TIMEOUT michael@0: : PR_MillisecondsToInterval(aTimeoutMs)) michael@0: , mMaxTimeout(aMaxTimeoutMs == BackgroundHangMonitor::kNoTimeout michael@0: ? PR_INTERVAL_NO_TIMEOUT michael@0: : PR_MillisecondsToInterval(aMaxTimeoutMs)) michael@0: , mInterval(mManager->mIntervalNow) michael@0: , mHangStart(mInterval) michael@0: , mHanging(false) michael@0: , mWaiting(true) michael@0: , mStats(aName) michael@0: { michael@0: if (sTlsKey.initialized()) { michael@0: sTlsKey.set(this); michael@0: } michael@0: // Lock here because LinkedList is not thread-safe michael@0: MonitorAutoLock autoLock(mManager->mLock); michael@0: // Add to thread list michael@0: mManager->mHangThreads.insertBack(this); michael@0: // Wake up monitor thread to process new thread michael@0: autoLock.Notify(); michael@0: } michael@0: michael@0: BackgroundHangThread::~BackgroundHangThread() michael@0: { michael@0: // Lock here because LinkedList is not thread-safe michael@0: MonitorAutoLock autoLock(mManager->mLock); michael@0: // Remove from thread list michael@0: remove(); michael@0: // Wake up monitor thread to process removed thread michael@0: autoLock.Notify(); michael@0: michael@0: // We no longer have a thread michael@0: if (sTlsKey.initialized()) { michael@0: sTlsKey.set(nullptr); michael@0: } michael@0: michael@0: // Move our copy of ThreadHangStats to Telemetry storage michael@0: Telemetry::RecordThreadHangStats(mStats); michael@0: } michael@0: michael@0: void michael@0: BackgroundHangThread::ReportHang(PRIntervalTime aHangTime) michael@0: { michael@0: // Recovered from a hang; called on the monitor thread michael@0: // mManager->mLock IS locked michael@0: michael@0: Telemetry::HangHistogram newHistogram(Move(mHangStack)); michael@0: for (Telemetry::HangHistogram* oldHistogram = mStats.mHangs.begin(); michael@0: oldHistogram != mStats.mHangs.end(); oldHistogram++) { michael@0: if (newHistogram == *oldHistogram) { michael@0: // New histogram matches old one michael@0: oldHistogram->Add(aHangTime); michael@0: return; michael@0: } michael@0: } michael@0: // Add new histogram michael@0: newHistogram.Add(aHangTime); michael@0: mStats.mHangs.append(Move(newHistogram)); michael@0: } michael@0: michael@0: void michael@0: BackgroundHangThread::ReportPermaHang() michael@0: { michael@0: // Permanently hanged; called on the monitor thread michael@0: // mManager->mLock IS locked michael@0: michael@0: // TODO: Add more detailed analysis for perma-hangs michael@0: ReportHang(mMaxTimeout); michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE void michael@0: BackgroundHangThread::NotifyActivity() michael@0: { michael@0: PRIntervalTime intervalNow = mManager->mIntervalNow; michael@0: if (mWaiting) { michael@0: mInterval = intervalNow; michael@0: mWaiting = false; michael@0: /* We have to wake up the manager thread because when all threads michael@0: are waiting, the manager thread waits indefinitely as well. */ michael@0: mManager->Wakeup(); michael@0: } else { michael@0: PRIntervalTime duration = intervalNow - mInterval; michael@0: mStats.mActivity.Add(duration); michael@0: if (MOZ_UNLIKELY(duration >= mTimeout)) { michael@0: /* Wake up the manager thread to tell it that a hang ended */ michael@0: mManager->Wakeup(); michael@0: } michael@0: mInterval = intervalNow; michael@0: } michael@0: } michael@0: michael@0: BackgroundHangThread* michael@0: BackgroundHangThread::FindThread() michael@0: { michael@0: #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR michael@0: if (sTlsKey.initialized()) { michael@0: // Use TLS if available michael@0: return sTlsKey.get(); michael@0: } michael@0: // If TLS is unavailable, we can search through the thread list michael@0: RefPtr manager(BackgroundHangManager::sInstance); michael@0: MOZ_ASSERT(manager, "Creating BackgroundHangMonitor after shutdown"); michael@0: michael@0: PRThread* threadID = PR_GetCurrentThread(); michael@0: // Lock thread list for traversal michael@0: MonitorAutoLock autoLock(manager->mLock); michael@0: for (BackgroundHangThread* thread = manager->mHangThreads.getFirst(); michael@0: thread; thread = thread->getNext()) { michael@0: if (thread->mThreadID == threadID) { michael@0: return thread; michael@0: } michael@0: } michael@0: #endif michael@0: // Current thread is not initialized michael@0: return nullptr; michael@0: } michael@0: michael@0: michael@0: void michael@0: BackgroundHangMonitor::Startup() michael@0: { michael@0: #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR michael@0: MOZ_ASSERT(!BackgroundHangManager::sInstance, "Already initialized"); michael@0: ThreadStackHelper::Startup(); michael@0: BackgroundHangThread::Startup(); michael@0: BackgroundHangManager::sInstance = new BackgroundHangManager(); michael@0: #endif michael@0: } michael@0: michael@0: void michael@0: BackgroundHangMonitor::Shutdown() michael@0: { michael@0: #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR michael@0: MOZ_ASSERT(BackgroundHangManager::sInstance, "Not initialized"); michael@0: /* Scope our lock inside Shutdown() because the sInstance object can michael@0: be destroyed as soon as we set sInstance to nullptr below, and michael@0: we don't want to hold the lock when it's being destroyed. */ michael@0: BackgroundHangManager::sInstance->Shutdown(); michael@0: BackgroundHangManager::sInstance = nullptr; michael@0: ThreadStackHelper::Shutdown(); michael@0: #endif michael@0: } michael@0: michael@0: BackgroundHangMonitor::BackgroundHangMonitor(const char* aName, michael@0: uint32_t aTimeoutMs, michael@0: uint32_t aMaxTimeoutMs) michael@0: : mThread(BackgroundHangThread::FindThread()) michael@0: { michael@0: #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR michael@0: if (!mThread) { michael@0: mThread = new BackgroundHangThread(aName, aTimeoutMs, aMaxTimeoutMs); michael@0: } michael@0: #endif michael@0: } michael@0: michael@0: BackgroundHangMonitor::BackgroundHangMonitor() michael@0: : mThread(BackgroundHangThread::FindThread()) michael@0: { michael@0: #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR michael@0: MOZ_ASSERT(mThread, "Thread not initialized for hang monitoring"); michael@0: #endif michael@0: } michael@0: michael@0: BackgroundHangMonitor::~BackgroundHangMonitor() michael@0: { michael@0: } michael@0: michael@0: void michael@0: BackgroundHangMonitor::NotifyActivity() michael@0: { michael@0: #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR michael@0: mThread->NotifyActivity(); michael@0: #endif michael@0: } michael@0: michael@0: void michael@0: BackgroundHangMonitor::NotifyWait() michael@0: { michael@0: #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR michael@0: mThread->NotifyWait(); michael@0: #endif michael@0: } michael@0: michael@0: michael@0: /* Because we are iterating through the BackgroundHangThread linked list, michael@0: we need to take a lock. Using MonitorAutoLock as a base class makes michael@0: sure all of that is taken care of for us. */ michael@0: BackgroundHangMonitor::ThreadHangStatsIterator::ThreadHangStatsIterator() michael@0: : MonitorAutoLock(BackgroundHangManager::sInstance->mLock) michael@0: , mThread(BackgroundHangManager::sInstance->mHangThreads.getFirst()) michael@0: { michael@0: } michael@0: michael@0: Telemetry::ThreadHangStats* michael@0: BackgroundHangMonitor::ThreadHangStatsIterator::GetNext() michael@0: { michael@0: if (!mThread) { michael@0: return nullptr; michael@0: } michael@0: Telemetry::ThreadHangStats* stats = &mThread->mStats; michael@0: mThread = mThread->getNext(); michael@0: return stats; michael@0: } michael@0: michael@0: } // namespace mozilla