|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "mozilla/ArrayUtils.h" |
|
7 #include "mozilla/BackgroundHangMonitor.h" |
|
8 #include "mozilla/LinkedList.h" |
|
9 #include "mozilla/Monitor.h" |
|
10 #include "mozilla/Move.h" |
|
11 #include "mozilla/StaticPtr.h" |
|
12 #include "mozilla/Telemetry.h" |
|
13 #include "mozilla/ThreadHangStats.h" |
|
14 #include "mozilla/ThreadLocal.h" |
|
15 #ifdef MOZ_NUWA_PROCESS |
|
16 #include "ipc/Nuwa.h" |
|
17 #endif |
|
18 |
|
19 #include "prinrval.h" |
|
20 #include "prthread.h" |
|
21 #include "ThreadStackHelper.h" |
|
22 |
|
23 #include <algorithm> |
|
24 |
|
25 namespace mozilla { |
|
26 |
|
27 /** |
|
28 * BackgroundHangManager is the global object that |
|
29 * manages all instances of BackgroundHangThread. |
|
30 */ |
|
31 class BackgroundHangManager |
|
32 { |
|
33 private: |
|
34 // Background hang monitor thread function |
|
35 static void MonitorThread(void* aData) |
|
36 { |
|
37 PR_SetCurrentThreadName("BgHangManager"); |
|
38 |
|
39 #ifdef MOZ_NUWA_PROCESS |
|
40 if (IsNuwaProcess()) { |
|
41 NS_ASSERTION(NuwaMarkCurrentThread != nullptr, |
|
42 "NuwaMarkCurrentThread is undefined!"); |
|
43 NuwaMarkCurrentThread(nullptr, nullptr); |
|
44 } |
|
45 #endif |
|
46 |
|
47 /* We do not hold a reference to BackgroundHangManager here |
|
48 because the monitor thread only exists as long as the |
|
49 BackgroundHangManager instance exists. We stop the monitor |
|
50 thread in the BackgroundHangManager destructor, and we can |
|
51 only get to the destructor if we don't hold a reference here. */ |
|
52 static_cast<BackgroundHangManager*>(aData)->RunMonitorThread(); |
|
53 } |
|
54 |
|
55 // Hang monitor thread |
|
56 PRThread* mHangMonitorThread; |
|
57 // Stop hang monitoring |
|
58 bool mShutdown; |
|
59 |
|
60 BackgroundHangManager(const BackgroundHangManager&); |
|
61 BackgroundHangManager& operator=(const BackgroundHangManager&); |
|
62 void RunMonitorThread(); |
|
63 |
|
64 public: |
|
65 NS_INLINE_DECL_THREADSAFE_REFCOUNTING(BackgroundHangManager) |
|
66 static StaticRefPtr<BackgroundHangManager> sInstance; |
|
67 |
|
68 // Lock for access to members of this class |
|
69 Monitor mLock; |
|
70 // Current time as seen by hang monitors |
|
71 PRIntervalTime mIntervalNow; |
|
72 // List of BackgroundHangThread instances associated with each thread |
|
73 LinkedList<BackgroundHangThread> mHangThreads; |
|
74 |
|
75 void Shutdown() |
|
76 { |
|
77 MonitorAutoLock autoLock(mLock); |
|
78 mShutdown = true; |
|
79 autoLock.Notify(); |
|
80 } |
|
81 |
|
82 void Wakeup() |
|
83 { |
|
84 // PR_CreateThread could have failed earlier |
|
85 if (mHangMonitorThread) { |
|
86 // Use PR_Interrupt to avoid potentially taking a lock |
|
87 PR_Interrupt(mHangMonitorThread); |
|
88 } |
|
89 } |
|
90 |
|
91 BackgroundHangManager(); |
|
92 ~BackgroundHangManager(); |
|
93 }; |
|
94 |
|
95 /** |
|
96 * BackgroundHangThread is a per-thread object that is used |
|
97 * by all instances of BackgroundHangMonitor to monitor hangs. |
|
98 */ |
|
99 class BackgroundHangThread : public LinkedListElement<BackgroundHangThread> |
|
100 { |
|
101 private: |
|
102 static ThreadLocal<BackgroundHangThread*> sTlsKey; |
|
103 |
|
104 BackgroundHangThread(const BackgroundHangThread&); |
|
105 BackgroundHangThread& operator=(const BackgroundHangThread&); |
|
106 ~BackgroundHangThread(); |
|
107 |
|
108 /* Keep a reference to the manager, so we can keep going even |
|
109 after BackgroundHangManager::Shutdown is called. */ |
|
110 const RefPtr<BackgroundHangManager> mManager; |
|
111 // Unique thread ID for identification |
|
112 const PRThread* mThreadID; |
|
113 |
|
114 public: |
|
115 NS_INLINE_DECL_REFCOUNTING(BackgroundHangThread) |
|
116 static BackgroundHangThread* FindThread(); |
|
117 |
|
118 static void Startup() |
|
119 { |
|
120 /* We can tolerate init() failing. |
|
121 The if block turns off warn_unused_result. */ |
|
122 if (!sTlsKey.init()) {} |
|
123 } |
|
124 |
|
125 // Hang timeout in ticks |
|
126 const PRIntervalTime mTimeout; |
|
127 // PermaHang timeout in ticks |
|
128 const PRIntervalTime mMaxTimeout; |
|
129 // Time at last activity |
|
130 PRIntervalTime mInterval; |
|
131 // Time when a hang started |
|
132 PRIntervalTime mHangStart; |
|
133 // Is the thread in a hang |
|
134 bool mHanging; |
|
135 // Is the thread in a waiting state |
|
136 bool mWaiting; |
|
137 // Platform-specific helper to get hang stacks |
|
138 ThreadStackHelper mStackHelper; |
|
139 // Stack of current hang |
|
140 Telemetry::HangHistogram::Stack mHangStack; |
|
141 // Statistics for telemetry |
|
142 Telemetry::ThreadHangStats mStats; |
|
143 |
|
144 BackgroundHangThread(const char* aName, |
|
145 uint32_t aTimeoutMs, |
|
146 uint32_t aMaxTimeoutMs); |
|
147 |
|
148 // Report a hang; aManager->mLock IS locked |
|
149 void ReportHang(PRIntervalTime aHangTime); |
|
150 // Report a permanent hang; aManager->mLock IS locked |
|
151 void ReportPermaHang(); |
|
152 // Called by BackgroundHangMonitor::NotifyActivity |
|
153 void NotifyActivity(); |
|
154 // Called by BackgroundHangMonitor::NotifyWait |
|
155 void NotifyWait() |
|
156 { |
|
157 NotifyActivity(); |
|
158 mWaiting = true; |
|
159 } |
|
160 }; |
|
161 |
|
162 |
|
163 StaticRefPtr<BackgroundHangManager> BackgroundHangManager::sInstance; |
|
164 |
|
165 ThreadLocal<BackgroundHangThread*> BackgroundHangThread::sTlsKey; |
|
166 |
|
167 |
|
168 BackgroundHangManager::BackgroundHangManager() |
|
169 : mShutdown(false) |
|
170 , mLock("BackgroundHangManager") |
|
171 , mIntervalNow(0) |
|
172 { |
|
173 // Lock so we don't race against the new monitor thread |
|
174 MonitorAutoLock autoLock(mLock); |
|
175 mHangMonitorThread = PR_CreateThread( |
|
176 PR_USER_THREAD, MonitorThread, this, |
|
177 PR_PRIORITY_LOW, PR_GLOBAL_THREAD, PR_JOINABLE_THREAD, 0); |
|
178 |
|
179 MOZ_ASSERT(mHangMonitorThread, |
|
180 "Failed to create monitor thread"); |
|
181 } |
|
182 |
|
183 BackgroundHangManager::~BackgroundHangManager() |
|
184 { |
|
185 MOZ_ASSERT(mShutdown, |
|
186 "Destruction without Shutdown call"); |
|
187 MOZ_ASSERT(mHangThreads.isEmpty(), |
|
188 "Destruction with outstanding monitors"); |
|
189 MOZ_ASSERT(mHangMonitorThread, |
|
190 "No monitor thread"); |
|
191 |
|
192 // PR_CreateThread could have failed above due to resource limitation |
|
193 if (mHangMonitorThread) { |
|
194 // The monitor thread can only live as long as the instance lives |
|
195 PR_JoinThread(mHangMonitorThread); |
|
196 } |
|
197 } |
|
198 |
|
199 void |
|
200 BackgroundHangManager::RunMonitorThread() |
|
201 { |
|
202 // Keep us locked except when waiting |
|
203 MonitorAutoLock autoLock(mLock); |
|
204 |
|
205 /* mIntervalNow is updated at various intervals determined by waitTime. |
|
206 However, if an update latency is too long (due to CPU scheduling, system |
|
207 sleep, etc.), we don't update mIntervalNow at all. This is done so that |
|
208 long latencies in our timing are not detected as hangs. systemTime is |
|
209 used to track PR_IntervalNow() and determine our latency. */ |
|
210 |
|
211 PRIntervalTime systemTime = PR_IntervalNow(); |
|
212 // Default values for the first iteration of thread loop |
|
213 PRIntervalTime waitTime = PR_INTERVAL_NO_WAIT; |
|
214 PRIntervalTime recheckTimeout = PR_INTERVAL_NO_WAIT; |
|
215 |
|
216 while (!mShutdown) { |
|
217 |
|
218 PR_ClearInterrupt(); |
|
219 nsresult rv = autoLock.Wait(waitTime); |
|
220 |
|
221 PRIntervalTime newTime = PR_IntervalNow(); |
|
222 PRIntervalTime systemInterval = newTime - systemTime; |
|
223 systemTime = newTime; |
|
224 |
|
225 /* waitTime is a quarter of the shortest timeout value; If our timing |
|
226 latency is low enough (less than half the shortest timeout value), |
|
227 we can update mIntervalNow. */ |
|
228 if (MOZ_LIKELY(waitTime != PR_INTERVAL_NO_TIMEOUT && |
|
229 systemInterval < 2 * waitTime)) { |
|
230 mIntervalNow += systemInterval; |
|
231 } |
|
232 |
|
233 /* If it's before the next recheck timeout, and our wait did not |
|
234 get interrupted (either through Notify or PR_Interrupt), we can |
|
235 keep the current waitTime and skip iterating through hang monitors. */ |
|
236 if (MOZ_LIKELY(systemInterval < recheckTimeout && |
|
237 systemInterval >= waitTime && |
|
238 rv == NS_OK)) { |
|
239 recheckTimeout -= systemInterval; |
|
240 continue; |
|
241 } |
|
242 |
|
243 /* We are in one of the following scenarios, |
|
244 - Hang or permahang recheck timeout |
|
245 - Thread added/removed |
|
246 - Thread wait or hang ended |
|
247 In all cases, we want to go through our list of hang |
|
248 monitors and update waitTime and recheckTimeout. */ |
|
249 waitTime = PR_INTERVAL_NO_TIMEOUT; |
|
250 recheckTimeout = PR_INTERVAL_NO_TIMEOUT; |
|
251 |
|
252 // Locally hold mIntervalNow |
|
253 PRIntervalTime intervalNow = mIntervalNow; |
|
254 |
|
255 // iterate through hang monitors |
|
256 for (BackgroundHangThread* currentThread = mHangThreads.getFirst(); |
|
257 currentThread; currentThread = currentThread->getNext()) { |
|
258 |
|
259 if (currentThread->mWaiting) { |
|
260 // Thread is waiting, not hanging |
|
261 continue; |
|
262 } |
|
263 PRIntervalTime interval = currentThread->mInterval; |
|
264 PRIntervalTime hangTime = intervalNow - interval; |
|
265 if (MOZ_UNLIKELY(hangTime >= currentThread->mMaxTimeout)) { |
|
266 // A permahang started |
|
267 // Skip subsequent iterations and tolerate a race on mWaiting here |
|
268 currentThread->mWaiting = true; |
|
269 currentThread->mHanging = false; |
|
270 currentThread->ReportPermaHang(); |
|
271 continue; |
|
272 } |
|
273 |
|
274 if (MOZ_LIKELY(!currentThread->mHanging)) { |
|
275 if (MOZ_UNLIKELY(hangTime >= currentThread->mTimeout)) { |
|
276 // A hang started |
|
277 currentThread->mStackHelper.GetStack(currentThread->mHangStack); |
|
278 currentThread->mHangStart = interval; |
|
279 currentThread->mHanging = true; |
|
280 } |
|
281 } else { |
|
282 if (MOZ_LIKELY(interval != currentThread->mHangStart)) { |
|
283 // A hang ended |
|
284 currentThread->ReportHang(intervalNow - currentThread->mHangStart); |
|
285 currentThread->mHanging = false; |
|
286 } |
|
287 } |
|
288 |
|
289 /* If we are hanging, the next time we check for hang status is when |
|
290 the hang turns into a permahang. If we're not hanging, the next |
|
291 recheck timeout is when we may be entering a hang. */ |
|
292 PRIntervalTime nextRecheck; |
|
293 if (currentThread->mHanging) { |
|
294 nextRecheck = currentThread->mMaxTimeout; |
|
295 } else { |
|
296 nextRecheck = currentThread->mTimeout; |
|
297 } |
|
298 recheckTimeout = std::min(recheckTimeout, nextRecheck - hangTime); |
|
299 |
|
300 /* We wait for a quarter of the shortest timeout |
|
301 value to give mIntervalNow enough granularity. */ |
|
302 waitTime = std::min(waitTime, currentThread->mTimeout / 4); |
|
303 } |
|
304 } |
|
305 |
|
306 /* We are shutting down now. |
|
307 Wait for all outstanding monitors to unregister. */ |
|
308 while (!mHangThreads.isEmpty()) { |
|
309 autoLock.Wait(PR_INTERVAL_NO_TIMEOUT); |
|
310 } |
|
311 } |
|
312 |
|
313 |
|
314 BackgroundHangThread::BackgroundHangThread(const char* aName, |
|
315 uint32_t aTimeoutMs, |
|
316 uint32_t aMaxTimeoutMs) |
|
317 : mManager(BackgroundHangManager::sInstance) |
|
318 , mThreadID(PR_GetCurrentThread()) |
|
319 , mTimeout(aTimeoutMs == BackgroundHangMonitor::kNoTimeout |
|
320 ? PR_INTERVAL_NO_TIMEOUT |
|
321 : PR_MillisecondsToInterval(aTimeoutMs)) |
|
322 , mMaxTimeout(aMaxTimeoutMs == BackgroundHangMonitor::kNoTimeout |
|
323 ? PR_INTERVAL_NO_TIMEOUT |
|
324 : PR_MillisecondsToInterval(aMaxTimeoutMs)) |
|
325 , mInterval(mManager->mIntervalNow) |
|
326 , mHangStart(mInterval) |
|
327 , mHanging(false) |
|
328 , mWaiting(true) |
|
329 , mStats(aName) |
|
330 { |
|
331 if (sTlsKey.initialized()) { |
|
332 sTlsKey.set(this); |
|
333 } |
|
334 // Lock here because LinkedList is not thread-safe |
|
335 MonitorAutoLock autoLock(mManager->mLock); |
|
336 // Add to thread list |
|
337 mManager->mHangThreads.insertBack(this); |
|
338 // Wake up monitor thread to process new thread |
|
339 autoLock.Notify(); |
|
340 } |
|
341 |
|
342 BackgroundHangThread::~BackgroundHangThread() |
|
343 { |
|
344 // Lock here because LinkedList is not thread-safe |
|
345 MonitorAutoLock autoLock(mManager->mLock); |
|
346 // Remove from thread list |
|
347 remove(); |
|
348 // Wake up monitor thread to process removed thread |
|
349 autoLock.Notify(); |
|
350 |
|
351 // We no longer have a thread |
|
352 if (sTlsKey.initialized()) { |
|
353 sTlsKey.set(nullptr); |
|
354 } |
|
355 |
|
356 // Move our copy of ThreadHangStats to Telemetry storage |
|
357 Telemetry::RecordThreadHangStats(mStats); |
|
358 } |
|
359 |
|
360 void |
|
361 BackgroundHangThread::ReportHang(PRIntervalTime aHangTime) |
|
362 { |
|
363 // Recovered from a hang; called on the monitor thread |
|
364 // mManager->mLock IS locked |
|
365 |
|
366 Telemetry::HangHistogram newHistogram(Move(mHangStack)); |
|
367 for (Telemetry::HangHistogram* oldHistogram = mStats.mHangs.begin(); |
|
368 oldHistogram != mStats.mHangs.end(); oldHistogram++) { |
|
369 if (newHistogram == *oldHistogram) { |
|
370 // New histogram matches old one |
|
371 oldHistogram->Add(aHangTime); |
|
372 return; |
|
373 } |
|
374 } |
|
375 // Add new histogram |
|
376 newHistogram.Add(aHangTime); |
|
377 mStats.mHangs.append(Move(newHistogram)); |
|
378 } |
|
379 |
|
380 void |
|
381 BackgroundHangThread::ReportPermaHang() |
|
382 { |
|
383 // Permanently hanged; called on the monitor thread |
|
384 // mManager->mLock IS locked |
|
385 |
|
386 // TODO: Add more detailed analysis for perma-hangs |
|
387 ReportHang(mMaxTimeout); |
|
388 } |
|
389 |
|
390 MOZ_ALWAYS_INLINE void |
|
391 BackgroundHangThread::NotifyActivity() |
|
392 { |
|
393 PRIntervalTime intervalNow = mManager->mIntervalNow; |
|
394 if (mWaiting) { |
|
395 mInterval = intervalNow; |
|
396 mWaiting = false; |
|
397 /* We have to wake up the manager thread because when all threads |
|
398 are waiting, the manager thread waits indefinitely as well. */ |
|
399 mManager->Wakeup(); |
|
400 } else { |
|
401 PRIntervalTime duration = intervalNow - mInterval; |
|
402 mStats.mActivity.Add(duration); |
|
403 if (MOZ_UNLIKELY(duration >= mTimeout)) { |
|
404 /* Wake up the manager thread to tell it that a hang ended */ |
|
405 mManager->Wakeup(); |
|
406 } |
|
407 mInterval = intervalNow; |
|
408 } |
|
409 } |
|
410 |
|
411 BackgroundHangThread* |
|
412 BackgroundHangThread::FindThread() |
|
413 { |
|
414 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
|
415 if (sTlsKey.initialized()) { |
|
416 // Use TLS if available |
|
417 return sTlsKey.get(); |
|
418 } |
|
419 // If TLS is unavailable, we can search through the thread list |
|
420 RefPtr<BackgroundHangManager> manager(BackgroundHangManager::sInstance); |
|
421 MOZ_ASSERT(manager, "Creating BackgroundHangMonitor after shutdown"); |
|
422 |
|
423 PRThread* threadID = PR_GetCurrentThread(); |
|
424 // Lock thread list for traversal |
|
425 MonitorAutoLock autoLock(manager->mLock); |
|
426 for (BackgroundHangThread* thread = manager->mHangThreads.getFirst(); |
|
427 thread; thread = thread->getNext()) { |
|
428 if (thread->mThreadID == threadID) { |
|
429 return thread; |
|
430 } |
|
431 } |
|
432 #endif |
|
433 // Current thread is not initialized |
|
434 return nullptr; |
|
435 } |
|
436 |
|
437 |
|
438 void |
|
439 BackgroundHangMonitor::Startup() |
|
440 { |
|
441 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
|
442 MOZ_ASSERT(!BackgroundHangManager::sInstance, "Already initialized"); |
|
443 ThreadStackHelper::Startup(); |
|
444 BackgroundHangThread::Startup(); |
|
445 BackgroundHangManager::sInstance = new BackgroundHangManager(); |
|
446 #endif |
|
447 } |
|
448 |
|
449 void |
|
450 BackgroundHangMonitor::Shutdown() |
|
451 { |
|
452 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
|
453 MOZ_ASSERT(BackgroundHangManager::sInstance, "Not initialized"); |
|
454 /* Scope our lock inside Shutdown() because the sInstance object can |
|
455 be destroyed as soon as we set sInstance to nullptr below, and |
|
456 we don't want to hold the lock when it's being destroyed. */ |
|
457 BackgroundHangManager::sInstance->Shutdown(); |
|
458 BackgroundHangManager::sInstance = nullptr; |
|
459 ThreadStackHelper::Shutdown(); |
|
460 #endif |
|
461 } |
|
462 |
|
463 BackgroundHangMonitor::BackgroundHangMonitor(const char* aName, |
|
464 uint32_t aTimeoutMs, |
|
465 uint32_t aMaxTimeoutMs) |
|
466 : mThread(BackgroundHangThread::FindThread()) |
|
467 { |
|
468 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
|
469 if (!mThread) { |
|
470 mThread = new BackgroundHangThread(aName, aTimeoutMs, aMaxTimeoutMs); |
|
471 } |
|
472 #endif |
|
473 } |
|
474 |
|
475 BackgroundHangMonitor::BackgroundHangMonitor() |
|
476 : mThread(BackgroundHangThread::FindThread()) |
|
477 { |
|
478 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
|
479 MOZ_ASSERT(mThread, "Thread not initialized for hang monitoring"); |
|
480 #endif |
|
481 } |
|
482 |
|
483 BackgroundHangMonitor::~BackgroundHangMonitor() |
|
484 { |
|
485 } |
|
486 |
|
487 void |
|
488 BackgroundHangMonitor::NotifyActivity() |
|
489 { |
|
490 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
|
491 mThread->NotifyActivity(); |
|
492 #endif |
|
493 } |
|
494 |
|
495 void |
|
496 BackgroundHangMonitor::NotifyWait() |
|
497 { |
|
498 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR |
|
499 mThread->NotifyWait(); |
|
500 #endif |
|
501 } |
|
502 |
|
503 |
|
504 /* Because we are iterating through the BackgroundHangThread linked list, |
|
505 we need to take a lock. Using MonitorAutoLock as a base class makes |
|
506 sure all of that is taken care of for us. */ |
|
507 BackgroundHangMonitor::ThreadHangStatsIterator::ThreadHangStatsIterator() |
|
508 : MonitorAutoLock(BackgroundHangManager::sInstance->mLock) |
|
509 , mThread(BackgroundHangManager::sInstance->mHangThreads.getFirst()) |
|
510 { |
|
511 } |
|
512 |
|
513 Telemetry::ThreadHangStats* |
|
514 BackgroundHangMonitor::ThreadHangStatsIterator::GetNext() |
|
515 { |
|
516 if (!mThread) { |
|
517 return nullptr; |
|
518 } |
|
519 Telemetry::ThreadHangStats* stats = &mThread->mStats; |
|
520 mThread = mThread->getNext(); |
|
521 return stats; |
|
522 } |
|
523 |
|
524 } // namespace mozilla |