xpcom/threads/BackgroundHangMonitor.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #include "mozilla/ArrayUtils.h"
     7 #include "mozilla/BackgroundHangMonitor.h"
     8 #include "mozilla/LinkedList.h"
     9 #include "mozilla/Monitor.h"
    10 #include "mozilla/Move.h"
    11 #include "mozilla/StaticPtr.h"
    12 #include "mozilla/Telemetry.h"
    13 #include "mozilla/ThreadHangStats.h"
    14 #include "mozilla/ThreadLocal.h"
    15 #ifdef MOZ_NUWA_PROCESS
    16 #include "ipc/Nuwa.h"
    17 #endif
    19 #include "prinrval.h"
    20 #include "prthread.h"
    21 #include "ThreadStackHelper.h"
    23 #include <algorithm>
    25 namespace mozilla {
    27 /**
    28  * BackgroundHangManager is the global object that
    29  * manages all instances of BackgroundHangThread.
    30  */
    31 class BackgroundHangManager
    32 {
    33 private:
    34   // Background hang monitor thread function
    35   static void MonitorThread(void* aData)
    36   {
    37     PR_SetCurrentThreadName("BgHangManager");
    39 #ifdef MOZ_NUWA_PROCESS
    40     if (IsNuwaProcess()) {
    41       NS_ASSERTION(NuwaMarkCurrentThread != nullptr,
    42                    "NuwaMarkCurrentThread is undefined!");
    43       NuwaMarkCurrentThread(nullptr, nullptr);
    44     }
    45 #endif
    47     /* We do not hold a reference to BackgroundHangManager here
    48        because the monitor thread only exists as long as the
    49        BackgroundHangManager instance exists. We stop the monitor
    50        thread in the BackgroundHangManager destructor, and we can
    51        only get to the destructor if we don't hold a reference here. */
    52     static_cast<BackgroundHangManager*>(aData)->RunMonitorThread();
    53   }
    55   // Hang monitor thread
    56   PRThread* mHangMonitorThread;
    57   // Stop hang monitoring
    58   bool mShutdown;
    60   BackgroundHangManager(const BackgroundHangManager&);
    61   BackgroundHangManager& operator=(const BackgroundHangManager&);
    62   void RunMonitorThread();
    64 public:
    65   NS_INLINE_DECL_THREADSAFE_REFCOUNTING(BackgroundHangManager)
    66   static StaticRefPtr<BackgroundHangManager> sInstance;
    68   // Lock for access to members of this class
    69   Monitor mLock;
    70   // Current time as seen by hang monitors
    71   PRIntervalTime mIntervalNow;
    72   // List of BackgroundHangThread instances associated with each thread
    73   LinkedList<BackgroundHangThread> mHangThreads;
    75   void Shutdown()
    76   {
    77     MonitorAutoLock autoLock(mLock);
    78     mShutdown = true;
    79     autoLock.Notify();
    80   }
    82   void Wakeup()
    83   {
    84     // PR_CreateThread could have failed earlier
    85     if (mHangMonitorThread) {
    86       // Use PR_Interrupt to avoid potentially taking a lock
    87       PR_Interrupt(mHangMonitorThread);
    88     }
    89   }
    91   BackgroundHangManager();
    92   ~BackgroundHangManager();
    93 };
    95 /**
    96  * BackgroundHangThread is a per-thread object that is used
    97  * by all instances of BackgroundHangMonitor to monitor hangs.
    98  */
    99 class BackgroundHangThread : public LinkedListElement<BackgroundHangThread>
   100 {
   101 private:
   102   static ThreadLocal<BackgroundHangThread*> sTlsKey;
   104   BackgroundHangThread(const BackgroundHangThread&);
   105   BackgroundHangThread& operator=(const BackgroundHangThread&);
   106   ~BackgroundHangThread();
   108   /* Keep a reference to the manager, so we can keep going even
   109      after BackgroundHangManager::Shutdown is called. */
   110   const RefPtr<BackgroundHangManager> mManager;
   111   // Unique thread ID for identification
   112   const PRThread* mThreadID;
   114 public:
   115   NS_INLINE_DECL_REFCOUNTING(BackgroundHangThread)
   116   static BackgroundHangThread* FindThread();
   118   static void Startup()
   119   {
   120     /* We can tolerate init() failing.
   121        The if block turns off warn_unused_result. */
   122     if (!sTlsKey.init()) {}
   123   }
   125   // Hang timeout in ticks
   126   const PRIntervalTime mTimeout;
   127   // PermaHang timeout in ticks
   128   const PRIntervalTime mMaxTimeout;
   129   // Time at last activity
   130   PRIntervalTime mInterval;
   131   // Time when a hang started
   132   PRIntervalTime mHangStart;
   133   // Is the thread in a hang
   134   bool mHanging;
   135   // Is the thread in a waiting state
   136   bool mWaiting;
   137   // Platform-specific helper to get hang stacks
   138   ThreadStackHelper mStackHelper;
   139   // Stack of current hang
   140   Telemetry::HangHistogram::Stack mHangStack;
   141   // Statistics for telemetry
   142   Telemetry::ThreadHangStats mStats;
   144   BackgroundHangThread(const char* aName,
   145                        uint32_t aTimeoutMs,
   146                        uint32_t aMaxTimeoutMs);
   148   // Report a hang; aManager->mLock IS locked
   149   void ReportHang(PRIntervalTime aHangTime);
   150   // Report a permanent hang; aManager->mLock IS locked
   151   void ReportPermaHang();
   152   // Called by BackgroundHangMonitor::NotifyActivity
   153   void NotifyActivity();
   154   // Called by BackgroundHangMonitor::NotifyWait
   155   void NotifyWait()
   156   {
   157     NotifyActivity();
   158     mWaiting = true;
   159   }
   160 };
   163 StaticRefPtr<BackgroundHangManager> BackgroundHangManager::sInstance;
   165 ThreadLocal<BackgroundHangThread*> BackgroundHangThread::sTlsKey;
   168 BackgroundHangManager::BackgroundHangManager()
   169   : mShutdown(false)
   170   , mLock("BackgroundHangManager")
   171   , mIntervalNow(0)
   172 {
   173   // Lock so we don't race against the new monitor thread
   174   MonitorAutoLock autoLock(mLock);
   175   mHangMonitorThread = PR_CreateThread(
   176     PR_USER_THREAD, MonitorThread, this,
   177     PR_PRIORITY_LOW, PR_GLOBAL_THREAD, PR_JOINABLE_THREAD, 0);
   179   MOZ_ASSERT(mHangMonitorThread,
   180     "Failed to create monitor thread");
   181 }
   183 BackgroundHangManager::~BackgroundHangManager()
   184 {
   185   MOZ_ASSERT(mShutdown,
   186     "Destruction without Shutdown call");
   187   MOZ_ASSERT(mHangThreads.isEmpty(),
   188     "Destruction with outstanding monitors");
   189   MOZ_ASSERT(mHangMonitorThread,
   190     "No monitor thread");
   192   // PR_CreateThread could have failed above due to resource limitation
   193   if (mHangMonitorThread) {
   194     // The monitor thread can only live as long as the instance lives
   195     PR_JoinThread(mHangMonitorThread);
   196   }
   197 }
   199 void
   200 BackgroundHangManager::RunMonitorThread()
   201 {
   202   // Keep us locked except when waiting
   203   MonitorAutoLock autoLock(mLock);
   205   /* mIntervalNow is updated at various intervals determined by waitTime.
   206      However, if an update latency is too long (due to CPU scheduling, system
   207      sleep, etc.), we don't update mIntervalNow at all. This is done so that
   208      long latencies in our timing are not detected as hangs. systemTime is
   209      used to track PR_IntervalNow() and determine our latency. */
   211   PRIntervalTime systemTime = PR_IntervalNow();
   212   // Default values for the first iteration of thread loop
   213   PRIntervalTime waitTime = PR_INTERVAL_NO_WAIT;
   214   PRIntervalTime recheckTimeout = PR_INTERVAL_NO_WAIT;
   216   while (!mShutdown) {
   218     PR_ClearInterrupt();
   219     nsresult rv = autoLock.Wait(waitTime);
   221     PRIntervalTime newTime = PR_IntervalNow();
   222     PRIntervalTime systemInterval = newTime - systemTime;
   223     systemTime = newTime;
   225     /* waitTime is a quarter of the shortest timeout value; If our timing
   226        latency is low enough (less than half the shortest timeout value),
   227        we can update mIntervalNow. */
   228     if (MOZ_LIKELY(waitTime != PR_INTERVAL_NO_TIMEOUT &&
   229                    systemInterval < 2 * waitTime)) {
   230       mIntervalNow += systemInterval;
   231     }
   233     /* If it's before the next recheck timeout, and our wait did not
   234        get interrupted (either through Notify or PR_Interrupt), we can
   235        keep the current waitTime and skip iterating through hang monitors. */
   236     if (MOZ_LIKELY(systemInterval < recheckTimeout &&
   237                    systemInterval >= waitTime &&
   238                    rv == NS_OK)) {
   239       recheckTimeout -= systemInterval;
   240       continue;
   241     }
   243     /* We are in one of the following scenarios,
   244      - Hang or permahang recheck timeout
   245      - Thread added/removed
   246      - Thread wait or hang ended
   247        In all cases, we want to go through our list of hang
   248        monitors and update waitTime and recheckTimeout. */
   249     waitTime = PR_INTERVAL_NO_TIMEOUT;
   250     recheckTimeout = PR_INTERVAL_NO_TIMEOUT;
   252     // Locally hold mIntervalNow
   253     PRIntervalTime intervalNow = mIntervalNow;
   255     // iterate through hang monitors
   256     for (BackgroundHangThread* currentThread = mHangThreads.getFirst();
   257          currentThread; currentThread = currentThread->getNext()) {
   259       if (currentThread->mWaiting) {
   260         // Thread is waiting, not hanging
   261         continue;
   262       }
   263       PRIntervalTime interval = currentThread->mInterval;
   264       PRIntervalTime hangTime = intervalNow - interval;
   265       if (MOZ_UNLIKELY(hangTime >= currentThread->mMaxTimeout)) {
   266         // A permahang started
   267         // Skip subsequent iterations and tolerate a race on mWaiting here
   268         currentThread->mWaiting = true;
   269         currentThread->mHanging = false;
   270         currentThread->ReportPermaHang();
   271         continue;
   272       }
   274       if (MOZ_LIKELY(!currentThread->mHanging)) {
   275         if (MOZ_UNLIKELY(hangTime >= currentThread->mTimeout)) {
   276           // A hang started
   277           currentThread->mStackHelper.GetStack(currentThread->mHangStack);
   278           currentThread->mHangStart = interval;
   279           currentThread->mHanging = true;
   280         }
   281       } else {
   282         if (MOZ_LIKELY(interval != currentThread->mHangStart)) {
   283           // A hang ended
   284           currentThread->ReportHang(intervalNow - currentThread->mHangStart);
   285           currentThread->mHanging = false;
   286         }
   287       }
   289       /* If we are hanging, the next time we check for hang status is when
   290          the hang turns into a permahang. If we're not hanging, the next
   291          recheck timeout is when we may be entering a hang. */
   292       PRIntervalTime nextRecheck;
   293       if (currentThread->mHanging) {
   294         nextRecheck = currentThread->mMaxTimeout;
   295       } else {
   296         nextRecheck = currentThread->mTimeout;
   297       }
   298       recheckTimeout = std::min(recheckTimeout, nextRecheck - hangTime);
   300       /* We wait for a quarter of the shortest timeout
   301          value to give mIntervalNow enough granularity. */
   302       waitTime = std::min(waitTime, currentThread->mTimeout / 4);
   303     }
   304   }
   306   /* We are shutting down now.
   307      Wait for all outstanding monitors to unregister. */
   308   while (!mHangThreads.isEmpty()) {
   309     autoLock.Wait(PR_INTERVAL_NO_TIMEOUT);
   310   }
   311 }
   314 BackgroundHangThread::BackgroundHangThread(const char* aName,
   315                                            uint32_t aTimeoutMs,
   316                                            uint32_t aMaxTimeoutMs)
   317   : mManager(BackgroundHangManager::sInstance)
   318   , mThreadID(PR_GetCurrentThread())
   319   , mTimeout(aTimeoutMs == BackgroundHangMonitor::kNoTimeout
   320              ? PR_INTERVAL_NO_TIMEOUT
   321              : PR_MillisecondsToInterval(aTimeoutMs))
   322   , mMaxTimeout(aMaxTimeoutMs == BackgroundHangMonitor::kNoTimeout
   323                 ? PR_INTERVAL_NO_TIMEOUT
   324                 : PR_MillisecondsToInterval(aMaxTimeoutMs))
   325   , mInterval(mManager->mIntervalNow)
   326   , mHangStart(mInterval)
   327   , mHanging(false)
   328   , mWaiting(true)
   329   , mStats(aName)
   330 {
   331   if (sTlsKey.initialized()) {
   332     sTlsKey.set(this);
   333   }
   334   // Lock here because LinkedList is not thread-safe
   335   MonitorAutoLock autoLock(mManager->mLock);
   336   // Add to thread list
   337   mManager->mHangThreads.insertBack(this);
   338   // Wake up monitor thread to process new thread
   339   autoLock.Notify();
   340 }
   342 BackgroundHangThread::~BackgroundHangThread()
   343 {
   344   // Lock here because LinkedList is not thread-safe
   345   MonitorAutoLock autoLock(mManager->mLock);
   346   // Remove from thread list
   347   remove();
   348   // Wake up monitor thread to process removed thread
   349   autoLock.Notify();
   351   // We no longer have a thread
   352   if (sTlsKey.initialized()) {
   353     sTlsKey.set(nullptr);
   354   }
   356   // Move our copy of ThreadHangStats to Telemetry storage
   357   Telemetry::RecordThreadHangStats(mStats);
   358 }
   360 void
   361 BackgroundHangThread::ReportHang(PRIntervalTime aHangTime)
   362 {
   363   // Recovered from a hang; called on the monitor thread
   364   // mManager->mLock IS locked
   366   Telemetry::HangHistogram newHistogram(Move(mHangStack));
   367   for (Telemetry::HangHistogram* oldHistogram = mStats.mHangs.begin();
   368        oldHistogram != mStats.mHangs.end(); oldHistogram++) {
   369     if (newHistogram == *oldHistogram) {
   370       // New histogram matches old one
   371       oldHistogram->Add(aHangTime);
   372       return;
   373     }
   374   }
   375   // Add new histogram
   376   newHistogram.Add(aHangTime);
   377   mStats.mHangs.append(Move(newHistogram));
   378 }
   380 void
   381 BackgroundHangThread::ReportPermaHang()
   382 {
   383   // Permanently hanged; called on the monitor thread
   384   // mManager->mLock IS locked
   386   // TODO: Add more detailed analysis for perma-hangs
   387   ReportHang(mMaxTimeout);
   388 }
   390 MOZ_ALWAYS_INLINE void
   391 BackgroundHangThread::NotifyActivity()
   392 {
   393   PRIntervalTime intervalNow = mManager->mIntervalNow;
   394   if (mWaiting) {
   395     mInterval = intervalNow;
   396     mWaiting = false;
   397     /* We have to wake up the manager thread because when all threads
   398        are waiting, the manager thread waits indefinitely as well. */
   399     mManager->Wakeup();
   400   } else {
   401     PRIntervalTime duration = intervalNow - mInterval;
   402     mStats.mActivity.Add(duration);
   403     if (MOZ_UNLIKELY(duration >= mTimeout)) {
   404       /* Wake up the manager thread to tell it that a hang ended */
   405       mManager->Wakeup();
   406     }
   407     mInterval = intervalNow;
   408   }
   409 }
   411 BackgroundHangThread*
   412 BackgroundHangThread::FindThread()
   413 {
   414 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
   415   if (sTlsKey.initialized()) {
   416     // Use TLS if available
   417     return sTlsKey.get();
   418   }
   419   // If TLS is unavailable, we can search through the thread list
   420   RefPtr<BackgroundHangManager> manager(BackgroundHangManager::sInstance);
   421   MOZ_ASSERT(manager, "Creating BackgroundHangMonitor after shutdown");
   423   PRThread* threadID = PR_GetCurrentThread();
   424   // Lock thread list for traversal
   425   MonitorAutoLock autoLock(manager->mLock);
   426   for (BackgroundHangThread* thread = manager->mHangThreads.getFirst();
   427        thread; thread = thread->getNext()) {
   428     if (thread->mThreadID == threadID) {
   429       return thread;
   430     }
   431   }
   432 #endif
   433   // Current thread is not initialized
   434   return nullptr;
   435 }
   438 void
   439 BackgroundHangMonitor::Startup()
   440 {
   441 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
   442   MOZ_ASSERT(!BackgroundHangManager::sInstance, "Already initialized");
   443   ThreadStackHelper::Startup();
   444   BackgroundHangThread::Startup();
   445   BackgroundHangManager::sInstance = new BackgroundHangManager();
   446 #endif
   447 }
   449 void
   450 BackgroundHangMonitor::Shutdown()
   451 {
   452 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
   453   MOZ_ASSERT(BackgroundHangManager::sInstance, "Not initialized");
   454   /* Scope our lock inside Shutdown() because the sInstance object can
   455      be destroyed as soon as we set sInstance to nullptr below, and
   456      we don't want to hold the lock when it's being destroyed. */
   457   BackgroundHangManager::sInstance->Shutdown();
   458   BackgroundHangManager::sInstance = nullptr;
   459   ThreadStackHelper::Shutdown();
   460 #endif
   461 }
   463 BackgroundHangMonitor::BackgroundHangMonitor(const char* aName,
   464                                              uint32_t aTimeoutMs,
   465                                              uint32_t aMaxTimeoutMs)
   466   : mThread(BackgroundHangThread::FindThread())
   467 {
   468 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
   469   if (!mThread) {
   470     mThread = new BackgroundHangThread(aName, aTimeoutMs, aMaxTimeoutMs);
   471   }
   472 #endif
   473 }
   475 BackgroundHangMonitor::BackgroundHangMonitor()
   476   : mThread(BackgroundHangThread::FindThread())
   477 {
   478 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
   479   MOZ_ASSERT(mThread, "Thread not initialized for hang monitoring");
   480 #endif
   481 }
   483 BackgroundHangMonitor::~BackgroundHangMonitor()
   484 {
   485 }
   487 void
   488 BackgroundHangMonitor::NotifyActivity()
   489 {
   490 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
   491   mThread->NotifyActivity();
   492 #endif
   493 }
   495 void
   496 BackgroundHangMonitor::NotifyWait()
   497 {
   498 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
   499   mThread->NotifyWait();
   500 #endif
   501 }
   504 /* Because we are iterating through the BackgroundHangThread linked list,
   505    we need to take a lock. Using MonitorAutoLock as a base class makes
   506    sure all of that is taken care of for us. */
   507 BackgroundHangMonitor::ThreadHangStatsIterator::ThreadHangStatsIterator()
   508   : MonitorAutoLock(BackgroundHangManager::sInstance->mLock)
   509   , mThread(BackgroundHangManager::sInstance->mHangThreads.getFirst())
   510 {
   511 }
   513 Telemetry::ThreadHangStats*
   514 BackgroundHangMonitor::ThreadHangStatsIterator::GetNext()
   515 {
   516   if (!mThread) {
   517     return nullptr;
   518   }
   519   Telemetry::ThreadHangStats* stats = &mThread->mStats;
   520   mThread = mThread->getNext();
   521   return stats;
   522 }
   524 } // namespace mozilla

mercurial