xpcom/threads/BackgroundHangMonitor.cpp

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:69d00ce1db28
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6 #include "mozilla/ArrayUtils.h"
7 #include "mozilla/BackgroundHangMonitor.h"
8 #include "mozilla/LinkedList.h"
9 #include "mozilla/Monitor.h"
10 #include "mozilla/Move.h"
11 #include "mozilla/StaticPtr.h"
12 #include "mozilla/Telemetry.h"
13 #include "mozilla/ThreadHangStats.h"
14 #include "mozilla/ThreadLocal.h"
15 #ifdef MOZ_NUWA_PROCESS
16 #include "ipc/Nuwa.h"
17 #endif
18
19 #include "prinrval.h"
20 #include "prthread.h"
21 #include "ThreadStackHelper.h"
22
23 #include <algorithm>
24
25 namespace mozilla {
26
27 /**
28 * BackgroundHangManager is the global object that
29 * manages all instances of BackgroundHangThread.
30 */
31 class BackgroundHangManager
32 {
33 private:
34 // Background hang monitor thread function
35 static void MonitorThread(void* aData)
36 {
37 PR_SetCurrentThreadName("BgHangManager");
38
39 #ifdef MOZ_NUWA_PROCESS
40 if (IsNuwaProcess()) {
41 NS_ASSERTION(NuwaMarkCurrentThread != nullptr,
42 "NuwaMarkCurrentThread is undefined!");
43 NuwaMarkCurrentThread(nullptr, nullptr);
44 }
45 #endif
46
47 /* We do not hold a reference to BackgroundHangManager here
48 because the monitor thread only exists as long as the
49 BackgroundHangManager instance exists. We stop the monitor
50 thread in the BackgroundHangManager destructor, and we can
51 only get to the destructor if we don't hold a reference here. */
52 static_cast<BackgroundHangManager*>(aData)->RunMonitorThread();
53 }
54
55 // Hang monitor thread
56 PRThread* mHangMonitorThread;
57 // Stop hang monitoring
58 bool mShutdown;
59
60 BackgroundHangManager(const BackgroundHangManager&);
61 BackgroundHangManager& operator=(const BackgroundHangManager&);
62 void RunMonitorThread();
63
64 public:
65 NS_INLINE_DECL_THREADSAFE_REFCOUNTING(BackgroundHangManager)
66 static StaticRefPtr<BackgroundHangManager> sInstance;
67
68 // Lock for access to members of this class
69 Monitor mLock;
70 // Current time as seen by hang monitors
71 PRIntervalTime mIntervalNow;
72 // List of BackgroundHangThread instances associated with each thread
73 LinkedList<BackgroundHangThread> mHangThreads;
74
75 void Shutdown()
76 {
77 MonitorAutoLock autoLock(mLock);
78 mShutdown = true;
79 autoLock.Notify();
80 }
81
82 void Wakeup()
83 {
84 // PR_CreateThread could have failed earlier
85 if (mHangMonitorThread) {
86 // Use PR_Interrupt to avoid potentially taking a lock
87 PR_Interrupt(mHangMonitorThread);
88 }
89 }
90
91 BackgroundHangManager();
92 ~BackgroundHangManager();
93 };
94
95 /**
96 * BackgroundHangThread is a per-thread object that is used
97 * by all instances of BackgroundHangMonitor to monitor hangs.
98 */
99 class BackgroundHangThread : public LinkedListElement<BackgroundHangThread>
100 {
101 private:
102 static ThreadLocal<BackgroundHangThread*> sTlsKey;
103
104 BackgroundHangThread(const BackgroundHangThread&);
105 BackgroundHangThread& operator=(const BackgroundHangThread&);
106 ~BackgroundHangThread();
107
108 /* Keep a reference to the manager, so we can keep going even
109 after BackgroundHangManager::Shutdown is called. */
110 const RefPtr<BackgroundHangManager> mManager;
111 // Unique thread ID for identification
112 const PRThread* mThreadID;
113
114 public:
115 NS_INLINE_DECL_REFCOUNTING(BackgroundHangThread)
116 static BackgroundHangThread* FindThread();
117
118 static void Startup()
119 {
120 /* We can tolerate init() failing.
121 The if block turns off warn_unused_result. */
122 if (!sTlsKey.init()) {}
123 }
124
125 // Hang timeout in ticks
126 const PRIntervalTime mTimeout;
127 // PermaHang timeout in ticks
128 const PRIntervalTime mMaxTimeout;
129 // Time at last activity
130 PRIntervalTime mInterval;
131 // Time when a hang started
132 PRIntervalTime mHangStart;
133 // Is the thread in a hang
134 bool mHanging;
135 // Is the thread in a waiting state
136 bool mWaiting;
137 // Platform-specific helper to get hang stacks
138 ThreadStackHelper mStackHelper;
139 // Stack of current hang
140 Telemetry::HangHistogram::Stack mHangStack;
141 // Statistics for telemetry
142 Telemetry::ThreadHangStats mStats;
143
144 BackgroundHangThread(const char* aName,
145 uint32_t aTimeoutMs,
146 uint32_t aMaxTimeoutMs);
147
148 // Report a hang; aManager->mLock IS locked
149 void ReportHang(PRIntervalTime aHangTime);
150 // Report a permanent hang; aManager->mLock IS locked
151 void ReportPermaHang();
152 // Called by BackgroundHangMonitor::NotifyActivity
153 void NotifyActivity();
154 // Called by BackgroundHangMonitor::NotifyWait
155 void NotifyWait()
156 {
157 NotifyActivity();
158 mWaiting = true;
159 }
160 };
161
162
163 StaticRefPtr<BackgroundHangManager> BackgroundHangManager::sInstance;
164
165 ThreadLocal<BackgroundHangThread*> BackgroundHangThread::sTlsKey;
166
167
168 BackgroundHangManager::BackgroundHangManager()
169 : mShutdown(false)
170 , mLock("BackgroundHangManager")
171 , mIntervalNow(0)
172 {
173 // Lock so we don't race against the new monitor thread
174 MonitorAutoLock autoLock(mLock);
175 mHangMonitorThread = PR_CreateThread(
176 PR_USER_THREAD, MonitorThread, this,
177 PR_PRIORITY_LOW, PR_GLOBAL_THREAD, PR_JOINABLE_THREAD, 0);
178
179 MOZ_ASSERT(mHangMonitorThread,
180 "Failed to create monitor thread");
181 }
182
183 BackgroundHangManager::~BackgroundHangManager()
184 {
185 MOZ_ASSERT(mShutdown,
186 "Destruction without Shutdown call");
187 MOZ_ASSERT(mHangThreads.isEmpty(),
188 "Destruction with outstanding monitors");
189 MOZ_ASSERT(mHangMonitorThread,
190 "No monitor thread");
191
192 // PR_CreateThread could have failed above due to resource limitation
193 if (mHangMonitorThread) {
194 // The monitor thread can only live as long as the instance lives
195 PR_JoinThread(mHangMonitorThread);
196 }
197 }
198
199 void
200 BackgroundHangManager::RunMonitorThread()
201 {
202 // Keep us locked except when waiting
203 MonitorAutoLock autoLock(mLock);
204
205 /* mIntervalNow is updated at various intervals determined by waitTime.
206 However, if an update latency is too long (due to CPU scheduling, system
207 sleep, etc.), we don't update mIntervalNow at all. This is done so that
208 long latencies in our timing are not detected as hangs. systemTime is
209 used to track PR_IntervalNow() and determine our latency. */
210
211 PRIntervalTime systemTime = PR_IntervalNow();
212 // Default values for the first iteration of thread loop
213 PRIntervalTime waitTime = PR_INTERVAL_NO_WAIT;
214 PRIntervalTime recheckTimeout = PR_INTERVAL_NO_WAIT;
215
216 while (!mShutdown) {
217
218 PR_ClearInterrupt();
219 nsresult rv = autoLock.Wait(waitTime);
220
221 PRIntervalTime newTime = PR_IntervalNow();
222 PRIntervalTime systemInterval = newTime - systemTime;
223 systemTime = newTime;
224
225 /* waitTime is a quarter of the shortest timeout value; If our timing
226 latency is low enough (less than half the shortest timeout value),
227 we can update mIntervalNow. */
228 if (MOZ_LIKELY(waitTime != PR_INTERVAL_NO_TIMEOUT &&
229 systemInterval < 2 * waitTime)) {
230 mIntervalNow += systemInterval;
231 }
232
233 /* If it's before the next recheck timeout, and our wait did not
234 get interrupted (either through Notify or PR_Interrupt), we can
235 keep the current waitTime and skip iterating through hang monitors. */
236 if (MOZ_LIKELY(systemInterval < recheckTimeout &&
237 systemInterval >= waitTime &&
238 rv == NS_OK)) {
239 recheckTimeout -= systemInterval;
240 continue;
241 }
242
243 /* We are in one of the following scenarios,
244 - Hang or permahang recheck timeout
245 - Thread added/removed
246 - Thread wait or hang ended
247 In all cases, we want to go through our list of hang
248 monitors and update waitTime and recheckTimeout. */
249 waitTime = PR_INTERVAL_NO_TIMEOUT;
250 recheckTimeout = PR_INTERVAL_NO_TIMEOUT;
251
252 // Locally hold mIntervalNow
253 PRIntervalTime intervalNow = mIntervalNow;
254
255 // iterate through hang monitors
256 for (BackgroundHangThread* currentThread = mHangThreads.getFirst();
257 currentThread; currentThread = currentThread->getNext()) {
258
259 if (currentThread->mWaiting) {
260 // Thread is waiting, not hanging
261 continue;
262 }
263 PRIntervalTime interval = currentThread->mInterval;
264 PRIntervalTime hangTime = intervalNow - interval;
265 if (MOZ_UNLIKELY(hangTime >= currentThread->mMaxTimeout)) {
266 // A permahang started
267 // Skip subsequent iterations and tolerate a race on mWaiting here
268 currentThread->mWaiting = true;
269 currentThread->mHanging = false;
270 currentThread->ReportPermaHang();
271 continue;
272 }
273
274 if (MOZ_LIKELY(!currentThread->mHanging)) {
275 if (MOZ_UNLIKELY(hangTime >= currentThread->mTimeout)) {
276 // A hang started
277 currentThread->mStackHelper.GetStack(currentThread->mHangStack);
278 currentThread->mHangStart = interval;
279 currentThread->mHanging = true;
280 }
281 } else {
282 if (MOZ_LIKELY(interval != currentThread->mHangStart)) {
283 // A hang ended
284 currentThread->ReportHang(intervalNow - currentThread->mHangStart);
285 currentThread->mHanging = false;
286 }
287 }
288
289 /* If we are hanging, the next time we check for hang status is when
290 the hang turns into a permahang. If we're not hanging, the next
291 recheck timeout is when we may be entering a hang. */
292 PRIntervalTime nextRecheck;
293 if (currentThread->mHanging) {
294 nextRecheck = currentThread->mMaxTimeout;
295 } else {
296 nextRecheck = currentThread->mTimeout;
297 }
298 recheckTimeout = std::min(recheckTimeout, nextRecheck - hangTime);
299
300 /* We wait for a quarter of the shortest timeout
301 value to give mIntervalNow enough granularity. */
302 waitTime = std::min(waitTime, currentThread->mTimeout / 4);
303 }
304 }
305
306 /* We are shutting down now.
307 Wait for all outstanding monitors to unregister. */
308 while (!mHangThreads.isEmpty()) {
309 autoLock.Wait(PR_INTERVAL_NO_TIMEOUT);
310 }
311 }
312
313
314 BackgroundHangThread::BackgroundHangThread(const char* aName,
315 uint32_t aTimeoutMs,
316 uint32_t aMaxTimeoutMs)
317 : mManager(BackgroundHangManager::sInstance)
318 , mThreadID(PR_GetCurrentThread())
319 , mTimeout(aTimeoutMs == BackgroundHangMonitor::kNoTimeout
320 ? PR_INTERVAL_NO_TIMEOUT
321 : PR_MillisecondsToInterval(aTimeoutMs))
322 , mMaxTimeout(aMaxTimeoutMs == BackgroundHangMonitor::kNoTimeout
323 ? PR_INTERVAL_NO_TIMEOUT
324 : PR_MillisecondsToInterval(aMaxTimeoutMs))
325 , mInterval(mManager->mIntervalNow)
326 , mHangStart(mInterval)
327 , mHanging(false)
328 , mWaiting(true)
329 , mStats(aName)
330 {
331 if (sTlsKey.initialized()) {
332 sTlsKey.set(this);
333 }
334 // Lock here because LinkedList is not thread-safe
335 MonitorAutoLock autoLock(mManager->mLock);
336 // Add to thread list
337 mManager->mHangThreads.insertBack(this);
338 // Wake up monitor thread to process new thread
339 autoLock.Notify();
340 }
341
342 BackgroundHangThread::~BackgroundHangThread()
343 {
344 // Lock here because LinkedList is not thread-safe
345 MonitorAutoLock autoLock(mManager->mLock);
346 // Remove from thread list
347 remove();
348 // Wake up monitor thread to process removed thread
349 autoLock.Notify();
350
351 // We no longer have a thread
352 if (sTlsKey.initialized()) {
353 sTlsKey.set(nullptr);
354 }
355
356 // Move our copy of ThreadHangStats to Telemetry storage
357 Telemetry::RecordThreadHangStats(mStats);
358 }
359
360 void
361 BackgroundHangThread::ReportHang(PRIntervalTime aHangTime)
362 {
363 // Recovered from a hang; called on the monitor thread
364 // mManager->mLock IS locked
365
366 Telemetry::HangHistogram newHistogram(Move(mHangStack));
367 for (Telemetry::HangHistogram* oldHistogram = mStats.mHangs.begin();
368 oldHistogram != mStats.mHangs.end(); oldHistogram++) {
369 if (newHistogram == *oldHistogram) {
370 // New histogram matches old one
371 oldHistogram->Add(aHangTime);
372 return;
373 }
374 }
375 // Add new histogram
376 newHistogram.Add(aHangTime);
377 mStats.mHangs.append(Move(newHistogram));
378 }
379
380 void
381 BackgroundHangThread::ReportPermaHang()
382 {
383 // Permanently hanged; called on the monitor thread
384 // mManager->mLock IS locked
385
386 // TODO: Add more detailed analysis for perma-hangs
387 ReportHang(mMaxTimeout);
388 }
389
390 MOZ_ALWAYS_INLINE void
391 BackgroundHangThread::NotifyActivity()
392 {
393 PRIntervalTime intervalNow = mManager->mIntervalNow;
394 if (mWaiting) {
395 mInterval = intervalNow;
396 mWaiting = false;
397 /* We have to wake up the manager thread because when all threads
398 are waiting, the manager thread waits indefinitely as well. */
399 mManager->Wakeup();
400 } else {
401 PRIntervalTime duration = intervalNow - mInterval;
402 mStats.mActivity.Add(duration);
403 if (MOZ_UNLIKELY(duration >= mTimeout)) {
404 /* Wake up the manager thread to tell it that a hang ended */
405 mManager->Wakeup();
406 }
407 mInterval = intervalNow;
408 }
409 }
410
411 BackgroundHangThread*
412 BackgroundHangThread::FindThread()
413 {
414 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
415 if (sTlsKey.initialized()) {
416 // Use TLS if available
417 return sTlsKey.get();
418 }
419 // If TLS is unavailable, we can search through the thread list
420 RefPtr<BackgroundHangManager> manager(BackgroundHangManager::sInstance);
421 MOZ_ASSERT(manager, "Creating BackgroundHangMonitor after shutdown");
422
423 PRThread* threadID = PR_GetCurrentThread();
424 // Lock thread list for traversal
425 MonitorAutoLock autoLock(manager->mLock);
426 for (BackgroundHangThread* thread = manager->mHangThreads.getFirst();
427 thread; thread = thread->getNext()) {
428 if (thread->mThreadID == threadID) {
429 return thread;
430 }
431 }
432 #endif
433 // Current thread is not initialized
434 return nullptr;
435 }
436
437
438 void
439 BackgroundHangMonitor::Startup()
440 {
441 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
442 MOZ_ASSERT(!BackgroundHangManager::sInstance, "Already initialized");
443 ThreadStackHelper::Startup();
444 BackgroundHangThread::Startup();
445 BackgroundHangManager::sInstance = new BackgroundHangManager();
446 #endif
447 }
448
449 void
450 BackgroundHangMonitor::Shutdown()
451 {
452 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
453 MOZ_ASSERT(BackgroundHangManager::sInstance, "Not initialized");
454 /* Scope our lock inside Shutdown() because the sInstance object can
455 be destroyed as soon as we set sInstance to nullptr below, and
456 we don't want to hold the lock when it's being destroyed. */
457 BackgroundHangManager::sInstance->Shutdown();
458 BackgroundHangManager::sInstance = nullptr;
459 ThreadStackHelper::Shutdown();
460 #endif
461 }
462
463 BackgroundHangMonitor::BackgroundHangMonitor(const char* aName,
464 uint32_t aTimeoutMs,
465 uint32_t aMaxTimeoutMs)
466 : mThread(BackgroundHangThread::FindThread())
467 {
468 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
469 if (!mThread) {
470 mThread = new BackgroundHangThread(aName, aTimeoutMs, aMaxTimeoutMs);
471 }
472 #endif
473 }
474
475 BackgroundHangMonitor::BackgroundHangMonitor()
476 : mThread(BackgroundHangThread::FindThread())
477 {
478 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
479 MOZ_ASSERT(mThread, "Thread not initialized for hang monitoring");
480 #endif
481 }
482
483 BackgroundHangMonitor::~BackgroundHangMonitor()
484 {
485 }
486
487 void
488 BackgroundHangMonitor::NotifyActivity()
489 {
490 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
491 mThread->NotifyActivity();
492 #endif
493 }
494
495 void
496 BackgroundHangMonitor::NotifyWait()
497 {
498 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
499 mThread->NotifyWait();
500 #endif
501 }
502
503
504 /* Because we are iterating through the BackgroundHangThread linked list,
505 we need to take a lock. Using MonitorAutoLock as a base class makes
506 sure all of that is taken care of for us. */
507 BackgroundHangMonitor::ThreadHangStatsIterator::ThreadHangStatsIterator()
508 : MonitorAutoLock(BackgroundHangManager::sInstance->mLock)
509 , mThread(BackgroundHangManager::sInstance->mHangThreads.getFirst())
510 {
511 }
512
513 Telemetry::ThreadHangStats*
514 BackgroundHangMonitor::ThreadHangStatsIterator::GetNext()
515 {
516 if (!mThread) {
517 return nullptr;
518 }
519 Telemetry::ThreadHangStats* stats = &mThread->mStats;
520 mThread = mThread->getNext();
521 return stats;
522 }
523
524 } // namespace mozilla

mercurial