Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include <map>
7 #include <memory>
9 #include <dlfcn.h>
10 #include <errno.h>
11 #include <fcntl.h>
12 #include <setjmp.h>
13 #include <signal.h>
14 #include <poll.h>
15 #include <pthread.h>
16 #include <alloca.h>
17 #include <sys/epoll.h>
18 #include <sys/mman.h>
19 #include <sys/prctl.h>
20 #include <sys/types.h>
21 #include <sys/socket.h>
22 #include <sys/stat.h>
23 #include <sys/syscall.h>
24 #include <vector>
26 #include "mozilla/LinkedList.h"
27 #include "Nuwa.h"
29 using namespace mozilla;
31 extern "C" MFBT_API int tgkill(pid_t tgid, pid_t tid, int signalno) {
32 return syscall(__NR_tgkill, tgid, tid, signalno);
33 }
35 /**
36 * Provides the wrappers to a selected set of pthread and system-level functions
37 * as the basis for implementing Zygote-like preforking mechanism.
38 */
40 /**
41 * Real functions for the wrappers.
42 */
43 extern "C" {
44 int __real_pthread_create(pthread_t *thread,
45 const pthread_attr_t *attr,
46 void *(*start_routine) (void *),
47 void *arg);
48 int __real_pthread_key_create(pthread_key_t *key, void (*destructor)(void*));
49 int __real_pthread_key_delete(pthread_key_t key);
50 pthread_t __real_pthread_self();
51 int __real_pthread_join(pthread_t thread, void **retval);
52 int __real_epoll_wait(int epfd,
53 struct epoll_event *events,
54 int maxevents,
55 int timeout);
56 int __real_pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mtx);
57 int __real_pthread_cond_timedwait(pthread_cond_t *cond,
58 pthread_mutex_t *mtx,
59 const struct timespec *abstime);
60 int __real___pthread_cond_timedwait(pthread_cond_t *cond,
61 pthread_mutex_t *mtx,
62 const struct timespec *abstime,
63 clockid_t clock);
64 int __real_pthread_mutex_lock(pthread_mutex_t *mtx);
65 int __real_poll(struct pollfd *fds, nfds_t nfds, int timeout);
66 int __real_epoll_create(int size);
67 int __real_socketpair(int domain, int type, int protocol, int sv[2]);
68 int __real_pipe2(int __pipedes[2], int flags);
69 int __real_pipe(int __pipedes[2]);
70 int __real_epoll_ctl(int aEpollFd, int aOp, int aFd, struct epoll_event *aEvent);
71 int __real_close(int aFd);
72 }
74 #define REAL(s) __real_##s
76 /**
77 * A Nuwa process is started by preparing. After preparing, it waits
78 * for all threads becoming frozen. Then, it is ready while all
79 * threads are frozen.
80 */
81 static bool sIsNuwaProcess = false; // This process is a Nuwa process.
82 static bool sIsFreezing = false; // Waiting for all threads getting frozen.
83 static bool sNuwaReady = false; // Nuwa process is ready.
84 static bool sNuwaPendingSpawn = false; // Are there any pending spawn requests?
85 static bool sNuwaForking = false;
87 // Fds of transports of top level protocols.
88 static NuwaProtoFdInfo sProtoFdInfos[NUWA_TOPLEVEL_MAX];
89 static int sProtoFdInfosSize = 0;
91 template <typename T>
92 struct LibcAllocator: public std::allocator<T>
93 {
94 LibcAllocator()
95 {
96 void* libcHandle = dlopen("libc.so", RTLD_LAZY);
97 mMallocImpl = reinterpret_cast<void*(*)(size_t)>(dlsym(libcHandle, "malloc"));
98 mFreeImpl = reinterpret_cast<void(*)(void*)>(dlsym(libcHandle, "free"));
100 if (!(mMallocImpl && mFreeImpl)) {
101 // libc should be available, or we'll deadlock in using TLSInfoList.
102 abort();
103 }
104 }
106 inline typename std::allocator<T>::pointer
107 allocate(typename std::allocator<T>::size_type n,
108 const void * = 0)
109 {
110 return reinterpret_cast<T *>(mMallocImpl(sizeof(T) * n));
111 }
113 inline void
114 deallocate(typename std::allocator<T>::pointer p,
115 typename std::allocator<T>::size_type n)
116 {
117 mFreeImpl(p);
118 }
120 template<typename U>
121 struct rebind
122 {
123 typedef LibcAllocator<U> other;
124 };
125 private:
126 void* (*mMallocImpl)(size_t);
127 void (*mFreeImpl)(void*);
128 };
130 /**
131 * TLSInfoList should use malloc() and free() in libc to avoid the deadlock that
132 * jemalloc calls into __wrap_pthread_mutex_lock() and then deadlocks while
133 * the same thread already acquired sThreadCountLock.
134 */
135 typedef std::vector<std::pair<pthread_key_t, void *>,
136 LibcAllocator<std::pair<pthread_key_t, void *> > >
137 TLSInfoList;
139 /**
140 * Return the system's page size
141 */
142 static size_t getPageSize(void) {
143 #ifdef HAVE_GETPAGESIZE
144 return getpagesize();
145 #elif defined(_SC_PAGESIZE)
146 return sysconf(_SC_PAGESIZE);
147 #elif defined(PAGE_SIZE)
148 return PAGE_SIZE;
149 #else
150 #warning "Hard-coding page size to 4096 bytes"
151 return 4096
152 #endif
153 }
155 /**
156 * Align the pointer to the next page boundary unless it's already aligned
157 */
158 static uintptr_t ceilToPage(uintptr_t aPtr) {
159 size_t pageSize = getPageSize();
161 return ((aPtr + pageSize - 1) / pageSize) * pageSize;
162 }
164 /**
165 * The stack size is chosen carefully so the frozen threads doesn't consume too
166 * much memory in the Nuwa process. The threads shouldn't run deep recursive
167 * methods or do large allocations on the stack to avoid stack overflow.
168 */
169 #ifndef NUWA_STACK_SIZE
170 #define NUWA_STACK_SIZE (1024 * 128)
171 #endif
173 #define NATIVE_THREAD_NAME_LENGTH 16
175 struct thread_info : public mozilla::LinkedListElement<thread_info> {
176 pthread_t origThreadID;
177 pthread_t recreatedThreadID;
178 pthread_attr_t threadAttr;
179 jmp_buf jmpEnv;
180 jmp_buf retEnv;
182 int flags;
184 void *(*startupFunc)(void *arg);
185 void *startupArg;
187 // The thread specific function to recreate the new thread. It's executed
188 // after the thread is recreated.
189 void (*recrFunc)(void *arg);
190 void *recrArg;
192 TLSInfoList tlsInfo;
194 pthread_mutex_t *reacquireMutex;
195 void *stk;
197 pid_t origNativeThreadID;
198 pid_t recreatedNativeThreadID;
199 char nativeThreadName[NATIVE_THREAD_NAME_LENGTH];
200 };
202 typedef struct thread_info thread_info_t;
204 static thread_info_t *sCurrentRecreatingThread = nullptr;
206 /**
207 * This function runs the custom recreation function registered when calling
208 * NuwaMarkCurrentThread() after thread stack is restored.
209 */
210 static void
211 RunCustomRecreation() {
212 thread_info_t *tinfo = sCurrentRecreatingThread;
213 if (tinfo->recrFunc != nullptr) {
214 tinfo->recrFunc(tinfo->recrArg);
215 }
216 }
218 /**
219 * Every thread should be marked as either TINFO_FLAG_NUWA_SUPPORT or
220 * TINFO_FLAG_NUWA_SKIP, or it means a potential error. We force
221 * Gecko code to mark every single thread to make sure there are no accidents
222 * when recreating threads with Nuwa.
223 *
224 * Threads marked as TINFO_FLAG_NUWA_SUPPORT can be checkpointed explicitly, by
225 * calling NuwaCheckpointCurrentThread(), or implicitly when they call into wrapped
226 * functions like pthread_mutex_lock(), epoll_wait(), etc.
227 * TINFO_FLAG_NUWA_EXPLICIT_CHECKPOINT denotes the explicitly checkpointed thread.
228 */
229 #define TINFO_FLAG_NUWA_SUPPORT 0x1
230 #define TINFO_FLAG_NUWA_SKIP 0x2
231 #define TINFO_FLAG_NUWA_EXPLICIT_CHECKPOINT 0x4
233 typedef struct nuwa_construct {
234 void (*construct)(void *);
235 void *arg;
236 } nuwa_construct_t;
238 static std::vector<nuwa_construct_t> sConstructors;
239 static std::vector<nuwa_construct_t> sFinalConstructors;
241 typedef std::map<pthread_key_t, void (*)(void *)> TLSKeySet;
242 static TLSKeySet sTLSKeys;
244 /**
245 * This mutex is used to block the running threads and freeze their contexts.
246 * PrepareNuwaProcess() is the first one to acquire the lock. Further attempts
247 * to acquire this mutex (in the freeze point macros) will block and freeze the
248 * calling thread.
249 */
250 static pthread_mutex_t sThreadFreezeLock = PTHREAD_MUTEX_INITIALIZER;
252 static thread_info_t sMainThread;
253 static LinkedList<thread_info_t> sAllThreads;
254 static int sThreadCount = 0;
255 static int sThreadFreezeCount = 0;
256 /**
257 * This mutex protects the access to thread info:
258 * sAllThreads, sThreadCount, sThreadFreezeCount, sRecreateVIPCount.
259 */
260 static pthread_mutex_t sThreadCountLock = PTHREAD_MUTEX_INITIALIZER;
261 /**
262 * This condition variable lets MakeNuwaProcess() wait until all recreated
263 * threads are frozen.
264 */
265 static pthread_cond_t sThreadChangeCond = PTHREAD_COND_INITIALIZER;
267 /**
268 * This mutex and condition variable is used to serialize the fork requests
269 * from the parent process.
270 */
271 static pthread_mutex_t sForkLock = PTHREAD_MUTEX_INITIALIZER;
272 static pthread_cond_t sForkWaitCond = PTHREAD_COND_INITIALIZER;
274 /**
275 * sForkWaitCondChanged will be reset to false on the IPC thread before
276 * and will be changed to true on the main thread to indicate that the condition
277 * that the IPC thread is waiting for has already changed.
278 */
279 static bool sForkWaitCondChanged = false;
281 /**
282 * This mutex protects the access to sTLSKeys, which keeps track of existing
283 * TLS Keys.
284 */
285 static pthread_mutex_t sTLSKeyLock = PTHREAD_MUTEX_INITIALIZER;
286 static int sThreadSkipCount = 0;
288 static thread_info_t *
289 GetThreadInfoInner(pthread_t threadID) {
290 for (thread_info_t *tinfo = sAllThreads.getFirst();
291 tinfo;
292 tinfo = tinfo->getNext()) {
293 if (pthread_equal(tinfo->origThreadID, threadID)) {
294 return tinfo;
295 }
296 }
298 return nullptr;
299 }
301 /**
302 * Get thread info using the specified thread ID.
303 *
304 * @return thread_info_t which has threadID == specified threadID
305 */
306 static thread_info_t *
307 GetThreadInfo(pthread_t threadID) {
308 if (sIsNuwaProcess) {
309 REAL(pthread_mutex_lock)(&sThreadCountLock);
310 }
311 thread_info_t *tinfo = GetThreadInfoInner(threadID);
312 if (sIsNuwaProcess) {
313 pthread_mutex_unlock(&sThreadCountLock);
314 }
315 return tinfo;
316 }
318 /**
319 * Get thread info using the specified native thread ID.
320 *
321 * @return thread_info_t with nativeThreadID == specified threadID
322 */
323 static thread_info_t*
324 GetThreadInfo(pid_t threadID) {
325 if (sIsNuwaProcess) {
326 REAL(pthread_mutex_lock)(&sThreadCountLock);
327 }
328 thread_info_t *thrinfo = nullptr;
329 for (thread_info_t *tinfo = sAllThreads.getFirst();
330 tinfo;
331 tinfo = tinfo->getNext()) {
332 if (tinfo->origNativeThreadID == threadID) {
333 thrinfo = tinfo;
334 break;
335 }
336 }
337 if (sIsNuwaProcess) {
338 pthread_mutex_unlock(&sThreadCountLock);
339 }
341 return thrinfo;
342 }
344 #if !defined(HAVE_THREAD_TLS_KEYWORD)
345 /**
346 * Get thread info of the current thread.
347 *
348 * @return thread_info_t for the current thread.
349 */
350 static thread_info_t *
351 GetCurThreadInfo() {
352 pthread_t threadID = REAL(pthread_self)();
353 pthread_t thread_info_t::*threadIDptr =
354 (sIsNuwaProcess ?
355 &thread_info_t::origThreadID :
356 &thread_info_t::recreatedThreadID);
358 REAL(pthread_mutex_lock)(&sThreadCountLock);
359 thread_info_t *tinfo;
360 for (tinfo = sAllThreads.getFirst();
361 tinfo;
362 tinfo = tinfo->getNext()) {
363 if (pthread_equal(tinfo->*threadIDptr, threadID)) {
364 break;
365 }
366 }
367 pthread_mutex_unlock(&sThreadCountLock);
368 return tinfo;
369 }
370 #define CUR_THREAD_INFO GetCurThreadInfo()
371 #define SET_THREAD_INFO(x) /* Nothing to do. */
372 #else
373 // Is not nullptr only for threads created by pthread_create() in an Nuwa process.
374 // It is always nullptr for the main thread.
375 static __thread thread_info_t *sCurThreadInfo = nullptr;
376 #define CUR_THREAD_INFO sCurThreadInfo
377 #define SET_THREAD_INFO(x) do { sCurThreadInfo = (x); } while(0)
378 #endif // HAVE_THREAD_TLS_KEYWORD
380 /*
381 * Track all epoll fds and handling events.
382 */
383 class EpollManager {
384 public:
385 class EpollInfo {
386 public:
387 typedef struct epoll_event Events;
388 typedef std::map<int, Events> EpollEventsMap;
389 typedef EpollEventsMap::iterator iterator;
390 typedef EpollEventsMap::const_iterator const_iterator;
392 EpollInfo(): mBackSize(0) {}
393 EpollInfo(int aBackSize): mBackSize(aBackSize) {}
394 EpollInfo(const EpollInfo &aOther): mEvents(aOther.mEvents)
395 , mBackSize(aOther.mBackSize) {
396 }
397 ~EpollInfo() {
398 mEvents.clear();
399 }
401 void AddEvents(int aFd, Events &aEvents) {
402 std::pair<iterator, bool> pair =
403 mEvents.insert(std::make_pair(aFd, aEvents));
404 if (!pair.second) {
405 abort();
406 }
407 }
409 void RemoveEvents(int aFd) {
410 if (!mEvents.erase(aFd)) {
411 abort();
412 }
413 }
415 void ModifyEvents(int aFd, Events &aEvents) {
416 iterator it = mEvents.find(aFd);
417 if (it == mEvents.end()) {
418 abort();
419 }
420 it->second = aEvents;
421 }
423 const Events &FindEvents(int aFd) const {
424 const_iterator it = mEvents.find(aFd);
425 if (it == mEvents.end()) {
426 abort();
427 }
428 return it->second;
429 }
431 int Size() const { return mEvents.size(); }
433 // Iterator with values of <fd, Events> pairs.
434 const_iterator begin() const { return mEvents.begin(); }
435 const_iterator end() const { return mEvents.end(); }
437 int BackSize() const { return mBackSize; }
439 private:
440 EpollEventsMap mEvents;
441 int mBackSize;
443 friend class EpollManager;
444 };
446 typedef std::map<int, EpollInfo> EpollInfoMap;
447 typedef EpollInfoMap::iterator iterator;
448 typedef EpollInfoMap::const_iterator const_iterator;
450 public:
451 void AddEpollInfo(int aEpollFd, int aBackSize) {
452 EpollInfo *oldinfo = FindEpollInfo(aEpollFd);
453 if (oldinfo != nullptr) {
454 abort();
455 }
456 mEpollFdsInfo[aEpollFd] = EpollInfo(aBackSize);
457 }
459 EpollInfo *FindEpollInfo(int aEpollFd) {
460 iterator it = mEpollFdsInfo.find(aEpollFd);
461 if (it == mEpollFdsInfo.end()) {
462 return nullptr;
463 }
464 return &it->second;
465 }
467 void RemoveEpollInfo(int aEpollFd) {
468 if (!mEpollFdsInfo.erase(aEpollFd)) {
469 abort();
470 }
471 }
473 int Size() const { return mEpollFdsInfo.size(); }
475 // Iterator of <epollfd, EpollInfo> pairs.
476 const_iterator begin() const { return mEpollFdsInfo.begin(); }
477 const_iterator end() const { return mEpollFdsInfo.end(); }
479 static EpollManager *Singleton() {
480 if (!sInstance) {
481 sInstance = new EpollManager();
482 }
483 return sInstance;
484 }
486 static void Shutdown() {
487 if (!sInstance) {
488 abort();
489 }
491 delete sInstance;
492 sInstance = nullptr;
493 }
495 private:
496 static EpollManager *sInstance;
497 ~EpollManager() {
498 mEpollFdsInfo.clear();
499 }
501 EpollInfoMap mEpollFdsInfo;
503 EpollManager() {}
504 };
506 EpollManager* EpollManager::sInstance;
508 static thread_info_t *
509 thread_info_new(void) {
510 /* link tinfo to sAllThreads */
511 thread_info_t *tinfo = new thread_info_t();
512 tinfo->flags = 0;
513 tinfo->recrFunc = nullptr;
514 tinfo->recrArg = nullptr;
515 tinfo->recreatedThreadID = 0;
516 tinfo->recreatedNativeThreadID = 0;
517 tinfo->reacquireMutex = nullptr;
518 tinfo->stk = malloc(NUWA_STACK_SIZE + getPageSize());
520 // We use a smaller stack size. Add protection to stack overflow: mprotect()
521 // stack top (the page at the lowest address) so we crash instead of corrupt
522 // other content that is malloc()'d.
523 uintptr_t pageGuard = ceilToPage((uintptr_t)tinfo->stk);
524 mprotect((void*)pageGuard, getPageSize(), PROT_READ);
526 pthread_attr_init(&tinfo->threadAttr);
528 REAL(pthread_mutex_lock)(&sThreadCountLock);
529 // Insert to the tail.
530 sAllThreads.insertBack(tinfo);
532 sThreadCount++;
533 pthread_cond_signal(&sThreadChangeCond);
534 pthread_mutex_unlock(&sThreadCountLock);
536 return tinfo;
537 }
539 static void
540 thread_info_cleanup(void *arg) {
541 if (sNuwaForking) {
542 // We shouldn't have any thread exiting when we are forking a new process.
543 abort();
544 }
546 thread_info_t *tinfo = (thread_info_t *)arg;
547 pthread_attr_destroy(&tinfo->threadAttr);
549 REAL(pthread_mutex_lock)(&sThreadCountLock);
550 /* unlink tinfo from sAllThreads */
551 tinfo->remove();
553 sThreadCount--;
554 pthread_cond_signal(&sThreadChangeCond);
555 pthread_mutex_unlock(&sThreadCountLock);
557 free(tinfo->stk);
558 delete tinfo;
559 }
561 static void *
562 _thread_create_startup(void *arg) {
563 thread_info_t *tinfo = (thread_info_t *)arg;
564 void *r;
566 // Save thread info; especially, stackaddr & stacksize.
567 // Reuse the stack in the new thread.
568 pthread_getattr_np(REAL(pthread_self)(), &tinfo->threadAttr);
570 SET_THREAD_INFO(tinfo);
571 tinfo->origThreadID = REAL(pthread_self)();
572 tinfo->origNativeThreadID = gettid();
574 pthread_cleanup_push(thread_info_cleanup, tinfo);
576 r = tinfo->startupFunc(tinfo->startupArg);
578 if (!sIsNuwaProcess) {
579 return r;
580 }
582 pthread_cleanup_pop(1);
584 return r;
585 }
587 // reserve STACK_RESERVED_SZ * 4 bytes for thread_recreate_startup().
588 #define STACK_RESERVED_SZ 64
589 #define STACK_SENTINEL(v) ((v)[0])
590 #define STACK_SENTINEL_VALUE(v) ((uint32_t)(v) ^ 0xdeadbeef)
592 static void *
593 thread_create_startup(void *arg) {
594 /*
595 * Dark Art!! Never try to do the same unless you are ABSOLUTELY sure of
596 * what you are doing!
597 *
598 * This function is here for reserving stack space before calling
599 * _thread_create_startup(). see also thread_create_startup();
600 */
601 void *r;
602 volatile uint32_t reserved[STACK_RESERVED_SZ];
604 // Reserve stack space.
605 STACK_SENTINEL(reserved) = STACK_SENTINEL_VALUE(reserved);
607 r = _thread_create_startup(arg);
609 // Check if the reservation is enough.
610 if (STACK_SENTINEL(reserved) != STACK_SENTINEL_VALUE(reserved)) {
611 abort(); // Did not reserve enough stack space.
612 }
614 thread_info_t *tinfo = CUR_THREAD_INFO;
615 if (!sIsNuwaProcess) {
616 longjmp(tinfo->retEnv, 1);
618 // Never go here!
619 abort();
620 }
622 return r;
623 }
625 extern "C" MFBT_API int
626 __wrap_pthread_create(pthread_t *thread,
627 const pthread_attr_t *attr,
628 void *(*start_routine) (void *),
629 void *arg) {
630 if (!sIsNuwaProcess) {
631 return REAL(pthread_create)(thread, attr, start_routine, arg);
632 }
634 thread_info_t *tinfo = thread_info_new();
635 tinfo->startupFunc = start_routine;
636 tinfo->startupArg = arg;
637 pthread_attr_setstack(&tinfo->threadAttr, tinfo->stk, NUWA_STACK_SIZE);
639 int rv = REAL(pthread_create)(thread,
640 &tinfo->threadAttr,
641 thread_create_startup,
642 tinfo);
643 if (rv) {
644 thread_info_cleanup(tinfo);
645 } else {
646 tinfo->origThreadID = *thread;
647 }
649 return rv;
650 }
652 // TLS related
654 /**
655 * Iterates over the existing TLS keys and store the TLS data for the current
656 * thread in tinfo.
657 */
658 static void
659 SaveTLSInfo(thread_info_t *tinfo) {
660 REAL(pthread_mutex_lock)(&sTLSKeyLock);
661 tinfo->tlsInfo.clear();
662 for (TLSKeySet::const_iterator it = sTLSKeys.begin();
663 it != sTLSKeys.end();
664 it++) {
665 void *value = pthread_getspecific(it->first);
666 if (value == nullptr) {
667 continue;
668 }
670 pthread_key_t key = it->first;
671 tinfo->tlsInfo.push_back(TLSInfoList::value_type(key, value));
672 }
673 pthread_mutex_unlock(&sTLSKeyLock);
674 }
676 /**
677 * Restores the TLS data for the current thread from tinfo.
678 */
679 static void
680 RestoreTLSInfo(thread_info_t *tinfo) {
681 for (TLSInfoList::const_iterator it = tinfo->tlsInfo.begin();
682 it != tinfo->tlsInfo.end();
683 it++) {
684 pthread_key_t key = it->first;
685 const void *value = it->second;
686 if (pthread_setspecific(key, value)) {
687 abort();
688 }
689 }
691 SET_THREAD_INFO(tinfo);
692 tinfo->recreatedThreadID = REAL(pthread_self)();
693 tinfo->recreatedNativeThreadID = gettid();
694 }
696 extern "C" MFBT_API int
697 __wrap_pthread_key_create(pthread_key_t *key, void (*destructor)(void*)) {
698 int rv = REAL(pthread_key_create)(key, destructor);
699 if (rv != 0) {
700 return rv;
701 }
702 REAL(pthread_mutex_lock)(&sTLSKeyLock);
703 sTLSKeys.insert(TLSKeySet::value_type(*key, destructor));
704 pthread_mutex_unlock(&sTLSKeyLock);
705 return 0;
706 }
708 extern "C" MFBT_API int
709 __wrap_pthread_key_delete(pthread_key_t key) {
710 if (!sIsNuwaProcess) {
711 return REAL(pthread_key_delete)(key);
712 }
713 int rv = REAL(pthread_key_delete)(key);
714 if (rv != 0) {
715 return rv;
716 }
717 REAL(pthread_mutex_lock)(&sTLSKeyLock);
718 sTLSKeys.erase(key);
719 pthread_mutex_unlock(&sTLSKeyLock);
720 return 0;
721 }
723 extern "C" MFBT_API pthread_t
724 __wrap_pthread_self() {
725 thread_info_t *tinfo = CUR_THREAD_INFO;
726 if (tinfo) {
727 // For recreated thread, masquerade as the original thread in the Nuwa
728 // process.
729 return tinfo->origThreadID;
730 }
731 return REAL(pthread_self)();
732 }
734 extern "C" MFBT_API int
735 __wrap_pthread_join(pthread_t thread, void **retval) {
736 thread_info_t *tinfo = GetThreadInfo(thread);
737 if (tinfo == nullptr) {
738 return REAL(pthread_join)(thread, retval);
739 }
740 // pthread_join() need to use the real thread ID in the spawned process.
741 return REAL(pthread_join)(tinfo->recreatedThreadID, retval);
742 }
744 /**
745 * The following are used to synchronize between the main thread and the
746 * thread being recreated. The main thread will wait until the thread is woken
747 * up from the freeze points or the blocking intercepted functions and then
748 * proceed to recreate the next frozen thread.
749 *
750 * In thread recreation, the main thread recreates the frozen threads one by
751 * one. The recreated threads will be "gated" until the main thread "opens the
752 * gate" to let them run freely as if they were created from scratch. The VIP
753 * threads gets the chance to run first after their thread stacks are recreated
754 * (using longjmp()) so they can adjust their contexts to a valid, consistent
755 * state. The threads frozen waiting for pthread condition variables are VIP
756 * threads. After woken up they need to run first to make the associated mutex
757 * in a valid state to maintain the semantics of the intercepted function calls
758 * (like pthread_cond_wait()).
759 */
761 // Used to synchronize the main thread and the thread being recreated so that
762 // only one thread is allowed to be recreated at a time.
763 static pthread_mutex_t sRecreateWaitLock = PTHREAD_MUTEX_INITIALIZER;
764 // Used to block recreated threads until the main thread "opens the gate".
765 static pthread_mutex_t sRecreateGateLock = PTHREAD_MUTEX_INITIALIZER;
766 // Used to block the main thread from "opening the gate" until all VIP threads
767 // have been recreated.
768 static pthread_mutex_t sRecreateVIPGateLock = PTHREAD_MUTEX_INITIALIZER;
769 static pthread_cond_t sRecreateVIPCond = PTHREAD_COND_INITIALIZER;
770 static int sRecreateVIPCount = 0;
771 static int sRecreateGatePassed = 0;
773 /**
774 * Thread recreation macros.
775 *
776 * The following macros are used in the forked process to synchronize and
777 * control the progress of thread recreation.
778 *
779 * 1. RECREATE_START() is first called in the beginning of thread
780 * recreation to set sRecreateWaitLock and sRecreateGateLock in locked
781 * state.
782 * 2. For each frozen thread:
783 * 2.1. RECREATE_BEFORE() to set the thread being recreated.
784 * 2.2. thread_recreate() to recreate the frozen thread.
785 * 2.3. Main thread calls RECREATE_WAIT() to wait on sRecreateWaitLock until
786 * the thread is recreated from the freeze point and calls
787 * RECREATE_CONTINUE() to release sRecreateWaitLock.
788 * 2.3. Non-VIP threads are blocked on RECREATE_GATE(). VIP threads calls
789 * RECREATE_PASS_VIP() to mark that a VIP thread is successfully
790 * recreated and then is blocked by calling RECREATE_GATE_VIP().
791 * 3. RECREATE_WAIT_ALL_VIP() to wait until all VIP threads passed, that is,
792 * VIP threads already has their contexts (mainly pthread mutex) in a valid
793 * state.
794 * 4. RECREATE_OPEN_GATE() to unblock threads blocked by sRecreateGateLock.
795 * 5. RECREATE_FINISH() to complete thread recreation.
796 */
797 #define RECREATE_START() \
798 do { \
799 REAL(pthread_mutex_lock)(&sRecreateWaitLock); \
800 REAL(pthread_mutex_lock)(&sRecreateGateLock); \
801 } while(0)
802 #define RECREATE_BEFORE(info) do { sCurrentRecreatingThread = info; } while(0)
803 #define RECREATE_WAIT() REAL(pthread_mutex_lock)(&sRecreateWaitLock)
804 #define RECREATE_CONTINUE() do { \
805 RunCustomRecreation(); \
806 pthread_mutex_unlock(&sRecreateWaitLock); \
807 } while(0)
808 #define RECREATE_FINISH() pthread_mutex_unlock(&sRecreateWaitLock)
809 #define RECREATE_GATE() \
810 do { \
811 REAL(pthread_mutex_lock)(&sRecreateGateLock); \
812 sRecreateGatePassed++; \
813 pthread_mutex_unlock(&sRecreateGateLock); \
814 } while(0)
815 #define RECREATE_OPEN_GATE() pthread_mutex_unlock(&sRecreateGateLock)
816 #define RECREATE_GATE_VIP() \
817 do { \
818 REAL(pthread_mutex_lock)(&sRecreateGateLock); \
819 pthread_mutex_unlock(&sRecreateGateLock); \
820 } while(0)
821 #define RECREATE_PASS_VIP() \
822 do { \
823 REAL(pthread_mutex_lock)(&sRecreateVIPGateLock); \
824 sRecreateGatePassed++; \
825 pthread_cond_signal(&sRecreateVIPCond); \
826 pthread_mutex_unlock(&sRecreateVIPGateLock); \
827 } while(0)
828 #define RECREATE_WAIT_ALL_VIP() \
829 do { \
830 REAL(pthread_mutex_lock)(&sRecreateVIPGateLock); \
831 while(sRecreateGatePassed < sRecreateVIPCount) { \
832 REAL(pthread_cond_wait)(&sRecreateVIPCond, \
833 &sRecreateVIPGateLock); \
834 } \
835 pthread_mutex_unlock(&sRecreateVIPGateLock); \
836 } while(0)
838 /**
839 * Thread freeze points. Note that the freeze points are implemented as macros
840 * so as not to garble the content of the stack after setjmp().
841 *
842 * In the nuwa process, when a thread supporting nuwa calls a wrapper
843 * function, freeze point 1 setjmp()s to save the state. We only allow the
844 * thread to be frozen in the wrapper functions. If thread freezing is not
845 * enabled yet, the wrapper functions act like their wrapped counterparts,
846 * except for the extra actions in the freeze points. If thread freezing is
847 * enabled, the thread will be frozen by calling one of the wrapper functions.
848 * The threads can be frozen in any of the following points:
849 *
850 * 1) Freeze point 1: this is the point where we setjmp() in the nuwa process
851 * and longjmp() in the spawned process. If freezing is enabled, then the
852 * current thread blocks by acquiring an already locked mutex,
853 * sThreadFreezeLock.
854 * 2) The wrapped function: the function that might block waiting for some
855 * resource or condition.
856 * 3) Freeze point 2: blocks the current thread by acquiring sThreadFreezeLock.
857 * If freezing is not enabled then revert the counter change in freeze
858 * point 1.
859 */
860 #define THREAD_FREEZE_POINT1() \
861 bool freezeCountChg = false; \
862 bool recreated = false; \
863 volatile bool freezePoint2 = false; \
864 thread_info_t *tinfo; \
865 if (sIsNuwaProcess && \
866 (tinfo = CUR_THREAD_INFO) && \
867 (tinfo->flags & TINFO_FLAG_NUWA_SUPPORT) && \
868 !(tinfo->flags & TINFO_FLAG_NUWA_EXPLICIT_CHECKPOINT)) { \
869 if (!setjmp(tinfo->jmpEnv)) { \
870 REAL(pthread_mutex_lock)(&sThreadCountLock); \
871 SaveTLSInfo(tinfo); \
872 sThreadFreezeCount++; \
873 freezeCountChg = true; \
874 pthread_cond_signal(&sThreadChangeCond); \
875 pthread_mutex_unlock(&sThreadCountLock); \
876 \
877 if (sIsFreezing) { \
878 REAL(pthread_mutex_lock)(&sThreadFreezeLock); \
879 /* Never return from the pthread_mutex_lock() call. */ \
880 abort(); \
881 } \
882 } else { \
883 RECREATE_CONTINUE(); \
884 RECREATE_GATE(); \
885 freezeCountChg = false; \
886 recreated = true; \
887 } \
888 }
890 #define THREAD_FREEZE_POINT1_VIP() \
891 bool freezeCountChg = false; \
892 bool recreated = false; \
893 volatile bool freezePoint1 = false; \
894 volatile bool freezePoint2 = false; \
895 thread_info_t *tinfo; \
896 if (sIsNuwaProcess && \
897 (tinfo = CUR_THREAD_INFO) && \
898 (tinfo->flags & TINFO_FLAG_NUWA_SUPPORT) && \
899 !(tinfo->flags & TINFO_FLAG_NUWA_EXPLICIT_CHECKPOINT)) { \
900 if (!setjmp(tinfo->jmpEnv)) { \
901 REAL(pthread_mutex_lock)(&sThreadCountLock); \
902 SaveTLSInfo(tinfo); \
903 sThreadFreezeCount++; \
904 sRecreateVIPCount++; \
905 freezeCountChg = true; \
906 pthread_cond_signal(&sThreadChangeCond); \
907 pthread_mutex_unlock(&sThreadCountLock); \
908 \
909 if (sIsFreezing) { \
910 freezePoint1 = true; \
911 REAL(pthread_mutex_lock)(&sThreadFreezeLock); \
912 /* Never return from the pthread_mutex_lock() call. */ \
913 abort(); \
914 } \
915 } else { \
916 freezeCountChg = false; \
917 recreated = true; \
918 } \
919 }
921 #define THREAD_FREEZE_POINT2() \
922 if (freezeCountChg) { \
923 REAL(pthread_mutex_lock)(&sThreadCountLock); \
924 if (sNuwaReady && sIsNuwaProcess) { \
925 pthread_mutex_unlock(&sThreadCountLock); \
926 freezePoint2 = true; \
927 REAL(pthread_mutex_lock)(&sThreadFreezeLock); \
928 /* Never return from the pthread_mutex_lock() call. */ \
929 abort(); \
930 } \
931 sThreadFreezeCount--; \
932 pthread_cond_signal(&sThreadChangeCond); \
933 pthread_mutex_unlock(&sThreadCountLock); \
934 }
936 #define THREAD_FREEZE_POINT2_VIP() \
937 if (freezeCountChg) { \
938 REAL(pthread_mutex_lock)(&sThreadCountLock); \
939 if (sNuwaReady && sIsNuwaProcess) { \
940 pthread_mutex_unlock(&sThreadCountLock); \
941 freezePoint2 = true; \
942 REAL(pthread_mutex_lock)(&sThreadFreezeLock); \
943 /* Never return from the pthread_mutex_lock() call. */ \
944 abort(); \
945 } \
946 sThreadFreezeCount--; \
947 sRecreateVIPCount--; \
948 pthread_cond_signal(&sThreadChangeCond); \
949 pthread_mutex_unlock(&sThreadCountLock); \
950 }
952 /**
953 * Wrapping the blocking functions: epoll_wait(), poll(), pthread_mutex_lock(),
954 * pthread_cond_wait() and pthread_cond_timedwait():
955 *
956 * These functions are wrapped by the above freeze point macros. Once a new
957 * process is forked, the recreated thread will be blocked in one of the wrapper
958 * functions. When recreating the thread, we longjmp() to
959 * THREAD_FREEZE_POINT1() to recover the thread stack. Care must be taken to
960 * maintain the semantics of the wrapped function:
961 *
962 * - epoll_wait() and poll(): just retry the function.
963 * - pthread_mutex_lock(): don't lock if frozen at freeze point 2 (lock is
964 * already acquired).
965 * - pthread_cond_wait() and pthread_cond_timedwait(): if the thread is frozen
966 * waiting the condition variable, the mutex is already released, we need to
967 * reacquire the mutex before calling the wrapped function again so the mutex
968 * will be in a valid state.
969 */
971 extern "C" MFBT_API int
972 __wrap_epoll_wait(int epfd,
973 struct epoll_event *events,
974 int maxevents,
975 int timeout) {
976 int rv;
978 THREAD_FREEZE_POINT1();
979 rv = REAL(epoll_wait)(epfd, events, maxevents, timeout);
980 THREAD_FREEZE_POINT2();
982 return rv;
983 }
985 extern "C" MFBT_API int
986 __wrap_pthread_cond_wait(pthread_cond_t *cond,
987 pthread_mutex_t *mtx) {
988 int rv = 0;
990 THREAD_FREEZE_POINT1_VIP();
991 if (freezePoint2) {
992 RECREATE_CONTINUE();
993 RECREATE_PASS_VIP();
994 RECREATE_GATE_VIP();
995 return rv;
996 }
997 if (recreated && mtx) {
998 if (!freezePoint1 && pthread_mutex_trylock(mtx)) {
999 // The thread was frozen in pthread_cond_wait() after releasing mtx in the
1000 // Nuwa process. In recreating this thread, We failed to reacquire mtx
1001 // with the pthread_mutex_trylock() call, that is, mtx was acquired by
1002 // another thread. Because of this, we need the main thread's help to
1003 // reacquire mtx so that it will be in a valid state.
1004 tinfo->reacquireMutex = mtx;
1005 }
1006 RECREATE_CONTINUE();
1007 RECREATE_PASS_VIP();
1008 }
1009 rv = REAL(pthread_cond_wait)(cond, mtx);
1010 if (recreated && mtx) {
1011 // We still need to be gated as not to acquire another mutex associated with
1012 // another VIP thread and interfere with it.
1013 RECREATE_GATE_VIP();
1014 }
1015 THREAD_FREEZE_POINT2_VIP();
1017 return rv;
1018 }
1020 extern "C" MFBT_API int
1021 __wrap_pthread_cond_timedwait(pthread_cond_t *cond,
1022 pthread_mutex_t *mtx,
1023 const struct timespec *abstime) {
1024 int rv = 0;
1026 THREAD_FREEZE_POINT1_VIP();
1027 if (freezePoint2) {
1028 RECREATE_CONTINUE();
1029 RECREATE_PASS_VIP();
1030 RECREATE_GATE_VIP();
1031 return rv;
1032 }
1033 if (recreated && mtx) {
1034 if (!freezePoint1 && pthread_mutex_trylock(mtx)) {
1035 tinfo->reacquireMutex = mtx;
1036 }
1037 RECREATE_CONTINUE();
1038 RECREATE_PASS_VIP();
1039 }
1040 rv = REAL(pthread_cond_timedwait)(cond, mtx, abstime);
1041 if (recreated && mtx) {
1042 RECREATE_GATE_VIP();
1043 }
1044 THREAD_FREEZE_POINT2_VIP();
1046 return rv;
1047 }
1049 extern "C" int __pthread_cond_timedwait(pthread_cond_t *cond,
1050 pthread_mutex_t *mtx,
1051 const struct timespec *abstime,
1052 clockid_t clock);
1054 extern "C" MFBT_API int
1055 __wrap___pthread_cond_timedwait(pthread_cond_t *cond,
1056 pthread_mutex_t *mtx,
1057 const struct timespec *abstime,
1058 clockid_t clock) {
1059 int rv = 0;
1061 THREAD_FREEZE_POINT1_VIP();
1062 if (freezePoint2) {
1063 RECREATE_CONTINUE();
1064 RECREATE_PASS_VIP();
1065 RECREATE_GATE_VIP();
1066 return rv;
1067 }
1068 if (recreated && mtx) {
1069 if (!freezePoint1 && pthread_mutex_trylock(mtx)) {
1070 tinfo->reacquireMutex = mtx;
1071 }
1072 RECREATE_CONTINUE();
1073 RECREATE_PASS_VIP();
1074 }
1075 rv = REAL(__pthread_cond_timedwait)(cond, mtx, abstime, clock);
1076 if (recreated && mtx) {
1077 RECREATE_GATE_VIP();
1078 }
1079 THREAD_FREEZE_POINT2_VIP();
1081 return rv;
1082 }
1084 extern "C" MFBT_API int
1085 __wrap_pthread_mutex_lock(pthread_mutex_t *mtx) {
1086 int rv = 0;
1088 THREAD_FREEZE_POINT1();
1089 if (freezePoint2) {
1090 return rv;
1091 }
1092 rv = REAL(pthread_mutex_lock)(mtx);
1093 THREAD_FREEZE_POINT2();
1095 return rv;
1096 }
1098 extern "C" MFBT_API int
1099 __wrap_poll(struct pollfd *fds, nfds_t nfds, int timeout) {
1100 int rv;
1102 THREAD_FREEZE_POINT1();
1103 rv = REAL(poll)(fds, nfds, timeout);
1104 THREAD_FREEZE_POINT2();
1106 return rv;
1107 }
1109 extern "C" MFBT_API int
1110 __wrap_epoll_create(int size) {
1111 int epollfd = REAL(epoll_create)(size);
1113 if (!sIsNuwaProcess) {
1114 return epollfd;
1115 }
1117 if (epollfd >= 0) {
1118 EpollManager::Singleton()->AddEpollInfo(epollfd, size);
1119 }
1121 return epollfd;
1122 }
1124 /**
1125 * Wrapping the functions to create file descriptor pairs. In the child process
1126 * FD pairs are created for intra-process signaling. The generation of FD pairs
1127 * need to be tracked in the nuwa process so they can be recreated in the
1128 * spawned process.
1129 */
1130 struct FdPairInfo {
1131 enum {
1132 kPipe,
1133 kSocketpair
1134 } call;
1136 int FDs[2];
1137 int flags;
1138 int domain;
1139 int type;
1140 int protocol;
1141 };
1143 /**
1144 * Protects the access to sSingalFds.
1145 */
1146 static pthread_mutex_t sSignalFdLock = PTHREAD_MUTEX_INITIALIZER;
1147 static std::vector<FdPairInfo> sSignalFds;
1149 extern "C" MFBT_API int
1150 __wrap_socketpair(int domain, int type, int protocol, int sv[2])
1151 {
1152 int rv = REAL(socketpair)(domain, type, protocol, sv);
1154 if (!sIsNuwaProcess || rv < 0) {
1155 return rv;
1156 }
1158 REAL(pthread_mutex_lock)(&sSignalFdLock);
1159 FdPairInfo signalFd;
1160 signalFd.call = FdPairInfo::kSocketpair;
1161 signalFd.FDs[0] = sv[0];
1162 signalFd.FDs[1] = sv[1];
1163 signalFd.domain = domain;
1164 signalFd.type = type;
1165 signalFd.protocol = protocol;
1167 sSignalFds.push_back(signalFd);
1168 pthread_mutex_unlock(&sSignalFdLock);
1170 return rv;
1171 }
1173 extern "C" MFBT_API int
1174 __wrap_pipe2(int __pipedes[2], int flags)
1175 {
1176 int rv = REAL(pipe2)(__pipedes, flags);
1177 if (!sIsNuwaProcess || rv < 0) {
1178 return rv;
1179 }
1181 REAL(pthread_mutex_lock)(&sSignalFdLock);
1182 FdPairInfo signalFd;
1183 signalFd.call = FdPairInfo::kPipe;
1184 signalFd.FDs[0] = __pipedes[0];
1185 signalFd.FDs[1] = __pipedes[1];
1186 signalFd.flags = flags;
1187 sSignalFds.push_back(signalFd);
1188 pthread_mutex_unlock(&sSignalFdLock);
1189 return rv;
1190 }
1192 extern "C" MFBT_API int
1193 __wrap_pipe(int __pipedes[2])
1194 {
1195 return __wrap_pipe2(__pipedes, 0);
1196 }
1198 static void
1199 DupeSingleFd(int newFd, int origFd)
1200 {
1201 struct stat sb;
1202 if (fstat(origFd, &sb)) {
1203 // Maybe the original FD is closed.
1204 return;
1205 }
1206 int fd = fcntl(origFd, F_GETFD);
1207 int fl = fcntl(origFd, F_GETFL);
1208 dup2(newFd, origFd);
1209 fcntl(origFd, F_SETFD, fd);
1210 fcntl(origFd, F_SETFL, fl);
1211 REAL(close)(newFd);
1212 }
1214 extern "C" MFBT_API void
1215 ReplaceSignalFds()
1216 {
1217 for (std::vector<FdPairInfo>::iterator it = sSignalFds.begin();
1218 it < sSignalFds.end(); ++it) {
1219 int fds[2];
1220 int rc = 0;
1221 switch (it->call) {
1222 case FdPairInfo::kPipe:
1223 rc = REAL(pipe2)(fds, it->flags);
1224 break;
1225 case FdPairInfo::kSocketpair:
1226 rc = REAL(socketpair)(it->domain, it->type, it->protocol, fds);
1227 break;
1228 default:
1229 continue;
1230 }
1232 if (rc == 0) {
1233 DupeSingleFd(fds[0], it->FDs[0]);
1234 DupeSingleFd(fds[1], it->FDs[1]);
1235 }
1236 }
1237 }
1239 extern "C" MFBT_API int
1240 __wrap_epoll_ctl(int aEpollFd, int aOp, int aFd, struct epoll_event *aEvent) {
1241 int rv = REAL(epoll_ctl)(aEpollFd, aOp, aFd, aEvent);
1243 if (!sIsNuwaProcess || rv == -1) {
1244 return rv;
1245 }
1247 EpollManager::EpollInfo *info =
1248 EpollManager::Singleton()->FindEpollInfo(aEpollFd);
1249 if (info == nullptr) {
1250 abort();
1251 }
1253 switch(aOp) {
1254 case EPOLL_CTL_ADD:
1255 info->AddEvents(aFd, *aEvent);
1256 break;
1258 case EPOLL_CTL_MOD:
1259 info->ModifyEvents(aFd, *aEvent);
1260 break;
1262 case EPOLL_CTL_DEL:
1263 info->RemoveEvents(aFd);
1264 break;
1266 default:
1267 abort();
1268 }
1270 return rv;
1271 }
1273 // XXX: thinker: Maybe, we should also track dup, dup2, and other functions.
1274 extern "C" MFBT_API int
1275 __wrap_close(int aFd) {
1276 int rv = REAL(close)(aFd);
1277 if (!sIsNuwaProcess || rv == -1) {
1278 return rv;
1279 }
1281 EpollManager::EpollInfo *info =
1282 EpollManager::Singleton()->FindEpollInfo(aFd);
1283 if (info) {
1284 EpollManager::Singleton()->RemoveEpollInfo(aFd);
1285 }
1287 return rv;
1288 }
1290 extern "C" MFBT_API int
1291 __wrap_tgkill(pid_t tgid, pid_t tid, int signalno)
1292 {
1293 if (sIsNuwaProcess) {
1294 return tgkill(tgid, tid, signalno);
1295 }
1297 if (tid == sMainThread.origNativeThreadID) {
1298 return tgkill(tgid, sMainThread.recreatedNativeThreadID, signalno);
1299 }
1301 thread_info_t *tinfo = (tid == sMainThread.origNativeThreadID ?
1302 &sMainThread :
1303 GetThreadInfo(tid));
1304 if (!tinfo) {
1305 return tgkill(tgid, tid, signalno);
1306 }
1308 return tgkill(tgid, tinfo->recreatedNativeThreadID, signalno);
1309 }
1311 static void *
1312 thread_recreate_startup(void *arg) {
1313 /*
1314 * Dark Art!! Never do the same unless you are ABSOLUTELY sure what you are
1315 * doing!
1316 *
1317 * The stack space collapsed by this frame had been reserved by
1318 * thread_create_startup(). And thread_create_startup() will
1319 * return immediately after returning from real start routine, so
1320 * all collapsed values does not affect the result.
1321 *
1322 * All outer frames of thread_create_startup() and
1323 * thread_recreate_startup() are equivalent, so
1324 * thread_create_startup() will return successfully.
1325 */
1326 thread_info_t *tinfo = (thread_info_t *)arg;
1328 prctl(PR_SET_NAME, (unsigned long)&tinfo->nativeThreadName, 0, 0, 0);
1329 RestoreTLSInfo(tinfo);
1331 if (setjmp(tinfo->retEnv) != 0) {
1332 return nullptr;
1333 }
1335 // longjump() to recreate the stack on the new thread.
1336 longjmp(tinfo->jmpEnv, 1);
1338 // Never go here!
1339 abort();
1341 return nullptr;
1342 }
1344 /**
1345 * Recreate the context given by tinfo at a new thread.
1346 */
1347 static void
1348 thread_recreate(thread_info_t *tinfo) {
1349 pthread_t thread;
1351 // Note that the thread_recreate_startup() runs on the stack specified by
1352 // tinfo.
1353 pthread_create(&thread, &tinfo->threadAttr, thread_recreate_startup, tinfo);
1354 }
1356 /**
1357 * Recreate all threads in a process forked from an Nuwa process.
1358 */
1359 static void
1360 RecreateThreads() {
1361 sIsNuwaProcess = false;
1362 sIsFreezing = false;
1364 sMainThread.recreatedThreadID = pthread_self();
1365 sMainThread.recreatedNativeThreadID = gettid();
1367 // Run registered constructors.
1368 for (std::vector<nuwa_construct_t>::iterator ctr = sConstructors.begin();
1369 ctr != sConstructors.end();
1370 ctr++) {
1371 (*ctr).construct((*ctr).arg);
1372 }
1373 sConstructors.clear();
1375 REAL(pthread_mutex_lock)(&sThreadCountLock);
1376 thread_info_t *tinfo = sAllThreads.getFirst();
1377 pthread_mutex_unlock(&sThreadCountLock);
1379 RECREATE_START();
1380 while (tinfo != nullptr) {
1381 if (tinfo->flags & TINFO_FLAG_NUWA_SUPPORT) {
1382 RECREATE_BEFORE(tinfo);
1383 thread_recreate(tinfo);
1384 RECREATE_WAIT();
1385 if (tinfo->reacquireMutex) {
1386 REAL(pthread_mutex_lock)(tinfo->reacquireMutex);
1387 }
1388 } else if(!(tinfo->flags & TINFO_FLAG_NUWA_SKIP)) {
1389 // An unmarked thread is found other than the main thread.
1391 // All threads should be marked as one of SUPPORT or SKIP, or
1392 // abort the process to make sure all threads in the Nuwa
1393 // process are Nuwa-aware.
1394 abort();
1395 }
1397 tinfo = tinfo->getNext();
1398 }
1399 RECREATE_WAIT_ALL_VIP();
1400 RECREATE_OPEN_GATE();
1402 RECREATE_FINISH();
1404 // Run registered final constructors.
1405 for (std::vector<nuwa_construct_t>::iterator ctr = sFinalConstructors.begin();
1406 ctr != sFinalConstructors.end();
1407 ctr++) {
1408 (*ctr).construct((*ctr).arg);
1409 }
1410 sFinalConstructors.clear();
1411 }
1413 extern "C" {
1415 /**
1416 * Recreate all epoll fds and restore status; include all events.
1417 */
1418 static void
1419 RecreateEpollFds() {
1420 EpollManager *man = EpollManager::Singleton();
1422 for (EpollManager::const_iterator info_it = man->begin();
1423 info_it != man->end();
1424 info_it++) {
1425 int epollfd = info_it->first;
1426 const EpollManager::EpollInfo *info = &info_it->second;
1428 int fdflags = fcntl(epollfd, F_GETFD);
1429 if (fdflags == -1) {
1430 abort();
1431 }
1432 int fl = fcntl(epollfd, F_GETFL);
1433 if (fl == -1) {
1434 abort();
1435 }
1437 int newepollfd = REAL(epoll_create)(info->BackSize());
1438 if (newepollfd == -1) {
1439 abort();
1440 }
1441 int rv = REAL(close)(epollfd);
1442 if (rv == -1) {
1443 abort();
1444 }
1445 rv = dup2(newepollfd, epollfd);
1446 if (rv == -1) {
1447 abort();
1448 }
1449 rv = REAL(close)(newepollfd);
1450 if (rv == -1) {
1451 abort();
1452 }
1454 rv = fcntl(epollfd, F_SETFD, fdflags);
1455 if (rv == -1) {
1456 abort();
1457 }
1458 rv = fcntl(epollfd, F_SETFL, fl);
1459 if (rv == -1) {
1460 abort();
1461 }
1463 for (EpollManager::EpollInfo::const_iterator events_it = info->begin();
1464 events_it != info->end();
1465 events_it++) {
1466 int fd = events_it->first;
1467 epoll_event events;
1468 events = events_it->second;
1469 rv = REAL(epoll_ctl)(epollfd, EPOLL_CTL_ADD, fd, &events);
1470 if (rv == -1) {
1471 abort();
1472 }
1473 }
1474 }
1476 // Shutdown EpollManager. It won't be needed in the spawned process.
1477 EpollManager::Shutdown();
1478 }
1480 /**
1481 * Fix IPC to make it ready.
1482 *
1483 * Especially, fix ContentChild.
1484 */
1485 static void
1486 ReplaceIPC(NuwaProtoFdInfo *aInfoList, int aInfoSize) {
1487 int i;
1488 int rv;
1490 for (i = 0; i < aInfoSize; i++) {
1491 int fd = fcntl(aInfoList[i].originFd, F_GETFD);
1492 if (fd == -1) {
1493 abort();
1494 }
1496 int fl = fcntl(aInfoList[i].originFd, F_GETFL);
1497 if (fl == -1) {
1498 abort();
1499 }
1501 rv = dup2(aInfoList[i].newFds[NUWA_NEWFD_CHILD], aInfoList[i].originFd);
1502 if (rv == -1) {
1503 abort();
1504 }
1506 rv = fcntl(aInfoList[i].originFd, F_SETFD, fd);
1507 if (rv == -1) {
1508 abort();
1509 }
1511 rv = fcntl(aInfoList[i].originFd, F_SETFL, fl);
1512 if (rv == -1) {
1513 abort();
1514 }
1515 }
1516 }
1518 /**
1519 * Add a new content process at the chrome process.
1520 */
1521 static void
1522 AddNewProcess(pid_t pid, NuwaProtoFdInfo *aInfoList, int aInfoSize) {
1523 static bool (*AddNewIPCProcess)(pid_t, NuwaProtoFdInfo *, int) = nullptr;
1525 if (AddNewIPCProcess == nullptr) {
1526 AddNewIPCProcess = (bool (*)(pid_t, NuwaProtoFdInfo *, int))
1527 dlsym(RTLD_DEFAULT, "AddNewIPCProcess");
1528 }
1529 AddNewIPCProcess(pid, aInfoList, aInfoSize);
1530 }
1532 static void
1533 PrepareProtoSockets(NuwaProtoFdInfo *aInfoList, int aInfoSize) {
1534 int i;
1535 int rv;
1537 for (i = 0; i < aInfoSize; i++) {
1538 rv = REAL(socketpair)(PF_UNIX, SOCK_STREAM, 0, aInfoList[i].newFds);
1539 if (rv == -1) {
1540 abort();
1541 }
1542 }
1543 }
1545 static void
1546 CloseAllProtoSockets(NuwaProtoFdInfo *aInfoList, int aInfoSize) {
1547 int i;
1549 for (i = 0; i < aInfoSize; i++) {
1550 REAL(close)(aInfoList[i].newFds[0]);
1551 REAL(close)(aInfoList[i].newFds[1]);
1552 }
1553 }
1555 static void
1556 AfterForkHook()
1557 {
1558 void (*AfterNuwaFork)();
1560 // This is defined in dom/ipc/ContentChild.cpp
1561 AfterNuwaFork = (void (*)())
1562 dlsym(RTLD_DEFAULT, "AfterNuwaFork");
1563 AfterNuwaFork();
1564 }
1566 /**
1567 * Fork a new process that is ready for running IPC.
1568 *
1569 * @return the PID of the new process.
1570 */
1571 static int
1572 ForkIPCProcess() {
1573 int pid;
1575 REAL(pthread_mutex_lock)(&sForkLock);
1577 PrepareProtoSockets(sProtoFdInfos, sProtoFdInfosSize);
1579 sNuwaForking = true;
1580 pid = fork();
1581 sNuwaForking = false;
1582 if (pid == -1) {
1583 abort();
1584 }
1586 if (pid > 0) {
1587 // in the parent
1588 AddNewProcess(pid, sProtoFdInfos, sProtoFdInfosSize);
1589 CloseAllProtoSockets(sProtoFdInfos, sProtoFdInfosSize);
1590 } else {
1591 // in the child
1592 if (getenv("MOZ_DEBUG_CHILD_PROCESS")) {
1593 printf("\n\nNUWA CHILDCHILDCHILDCHILD\n debug me @ %d\n\n", getpid());
1594 sleep(30);
1595 }
1596 AfterForkHook();
1597 ReplaceSignalFds();
1598 ReplaceIPC(sProtoFdInfos, sProtoFdInfosSize);
1599 RecreateEpollFds();
1600 RecreateThreads();
1601 CloseAllProtoSockets(sProtoFdInfos, sProtoFdInfosSize);
1602 }
1604 sForkWaitCondChanged = true;
1605 pthread_cond_signal(&sForkWaitCond);
1606 pthread_mutex_unlock(&sForkLock);
1608 return pid;
1609 }
1611 /**
1612 * Prepare for spawning a new process. Called on the IPC thread.
1613 */
1614 MFBT_API void
1615 NuwaSpawnPrepare() {
1616 REAL(pthread_mutex_lock)(&sForkLock);
1618 sForkWaitCondChanged = false; // Will be modified on the main thread.
1619 }
1621 /**
1622 * Let IPC thread wait until fork action on the main thread has completed.
1623 */
1624 MFBT_API void
1625 NuwaSpawnWait() {
1626 while (!sForkWaitCondChanged) {
1627 REAL(pthread_cond_wait)(&sForkWaitCond, &sForkLock);
1628 }
1629 pthread_mutex_unlock(&sForkLock);
1630 }
1632 /**
1633 * Spawn a new process. If not ready for spawn (still waiting for some threads
1634 * to freeze), postpone the spawn request until ready.
1635 *
1636 * @return the pid of the new process, or 0 if not ready.
1637 */
1638 MFBT_API pid_t
1639 NuwaSpawn() {
1640 if (gettid() != getpid()) {
1641 // Not the main thread.
1642 abort();
1643 }
1645 pid_t pid = 0;
1647 if (sNuwaReady) {
1648 pid = ForkIPCProcess();
1649 } else {
1650 sNuwaPendingSpawn = true;
1651 }
1653 return pid;
1654 }
1656 /**
1657 * Prepare to freeze the Nuwa-supporting threads.
1658 */
1659 MFBT_API void
1660 PrepareNuwaProcess() {
1661 sIsNuwaProcess = true;
1662 // Explicitly ignore SIGCHLD so we don't have to call watpid() to reap
1663 // dead child processes.
1664 signal(SIGCHLD, SIG_IGN);
1666 // Make marked threads block in one freeze point.
1667 REAL(pthread_mutex_lock)(&sThreadFreezeLock);
1669 // Populate sMainThread for mapping of tgkill.
1670 sMainThread.origThreadID = pthread_self();
1671 sMainThread.origNativeThreadID = gettid();
1672 }
1674 // Make current process as a Nuwa process.
1675 MFBT_API void
1676 MakeNuwaProcess() {
1677 void (*GetProtoFdInfos)(NuwaProtoFdInfo *, int, int *) = nullptr;
1678 void (*OnNuwaProcessReady)() = nullptr;
1679 sIsFreezing = true;
1681 REAL(pthread_mutex_lock)(&sThreadCountLock);
1683 // wait until all threads are frozen.
1684 while ((sThreadFreezeCount + sThreadSkipCount) != sThreadCount) {
1685 REAL(pthread_cond_wait)(&sThreadChangeCond, &sThreadCountLock);
1686 }
1688 GetProtoFdInfos = (void (*)(NuwaProtoFdInfo *, int, int *))
1689 dlsym(RTLD_DEFAULT, "GetProtoFdInfos");
1690 GetProtoFdInfos(sProtoFdInfos, NUWA_TOPLEVEL_MAX, &sProtoFdInfosSize);
1692 sNuwaReady = true;
1694 pthread_mutex_unlock(&sThreadCountLock);
1696 OnNuwaProcessReady = (void (*)())dlsym(RTLD_DEFAULT, "OnNuwaProcessReady");
1697 OnNuwaProcessReady();
1699 if (sNuwaPendingSpawn) {
1700 sNuwaPendingSpawn = false;
1701 NuwaSpawn();
1702 }
1703 }
1705 /**
1706 * Mark the current thread as supporting Nuwa. The thread will be recreated in
1707 * the spawned process.
1708 */
1709 MFBT_API void
1710 NuwaMarkCurrentThread(void (*recreate)(void *), void *arg) {
1711 if (!sIsNuwaProcess) {
1712 return;
1713 }
1715 thread_info_t *tinfo = CUR_THREAD_INFO;
1716 if (tinfo == nullptr) {
1717 abort();
1718 }
1720 tinfo->flags |= TINFO_FLAG_NUWA_SUPPORT;
1721 tinfo->recrFunc = recreate;
1722 tinfo->recrArg = arg;
1724 // XXX Thread name might be set later than this call. If this is the case, we
1725 // might need to delay getting the thread name.
1726 prctl(PR_GET_NAME, (unsigned long)&tinfo->nativeThreadName, 0, 0, 0);
1727 }
1729 /**
1730 * Mark the current thread as not supporting Nuwa. Don't recreate this thread in
1731 * the spawned process.
1732 */
1733 MFBT_API void
1734 NuwaSkipCurrentThread() {
1735 if (!sIsNuwaProcess) return;
1737 thread_info_t *tinfo = CUR_THREAD_INFO;
1738 if (tinfo == nullptr) {
1739 abort();
1740 }
1742 if (!(tinfo->flags & TINFO_FLAG_NUWA_SKIP)) {
1743 sThreadSkipCount++;
1744 }
1745 tinfo->flags |= TINFO_FLAG_NUWA_SKIP;
1746 }
1748 /**
1749 * Force to freeze the current thread.
1750 *
1751 * This method does not return in Nuwa process. It returns for the
1752 * recreated thread.
1753 */
1754 MFBT_API void
1755 NuwaFreezeCurrentThread() {
1756 thread_info_t *tinfo = CUR_THREAD_INFO;
1757 if (sIsNuwaProcess &&
1758 (tinfo = CUR_THREAD_INFO) &&
1759 (tinfo->flags & TINFO_FLAG_NUWA_SUPPORT)) {
1760 if (!setjmp(tinfo->jmpEnv)) {
1761 REAL(pthread_mutex_lock)(&sThreadCountLock);
1762 SaveTLSInfo(tinfo);
1763 sThreadFreezeCount++;
1764 pthread_cond_signal(&sThreadChangeCond);
1765 pthread_mutex_unlock(&sThreadCountLock);
1767 REAL(pthread_mutex_lock)(&sThreadFreezeLock);
1768 } else {
1769 RECREATE_CONTINUE();
1770 RECREATE_GATE();
1771 }
1772 }
1773 }
1775 /**
1776 * The caller of NuwaCheckpointCurrentThread() is at the line it wishes to
1777 * return after the thread is recreated.
1778 *
1779 * The checkpointed thread will restart at the calling line of
1780 * NuwaCheckpointCurrentThread(). This macro returns true in the Nuwa process
1781 * and false on the recreated thread in the forked process.
1782 *
1783 * NuwaCheckpointCurrentThread() is implemented as a macro so we can place the
1784 * setjmp() call in the calling method without changing its stack pointer. This
1785 * is essential for not corrupting the stack when the calling thread continues
1786 * to request the main thread for forking a new process. The caller of
1787 * NuwaCheckpointCurrentThread() should not return before the process forking
1788 * finishes.
1789 *
1790 * @return true for Nuwa process, and false in the forked process.
1791 */
1792 MFBT_API jmp_buf*
1793 NuwaCheckpointCurrentThread1() {
1794 thread_info_t *tinfo = CUR_THREAD_INFO;
1795 if (sIsNuwaProcess &&
1796 (tinfo = CUR_THREAD_INFO) &&
1797 (tinfo->flags & TINFO_FLAG_NUWA_SUPPORT)) {
1798 return &tinfo->jmpEnv;
1799 }
1800 abort();
1801 return nullptr;
1802 }
1804 MFBT_API bool
1805 NuwaCheckpointCurrentThread2(int setjmpCond) {
1806 thread_info_t *tinfo = CUR_THREAD_INFO;
1807 if (setjmpCond == 0) {
1808 REAL(pthread_mutex_lock)(&sThreadCountLock);
1809 if (!(tinfo->flags & TINFO_FLAG_NUWA_EXPLICIT_CHECKPOINT)) {
1810 tinfo->flags |= TINFO_FLAG_NUWA_EXPLICIT_CHECKPOINT;
1811 SaveTLSInfo(tinfo);
1812 sThreadFreezeCount++;
1813 }
1814 pthread_cond_signal(&sThreadChangeCond);
1815 pthread_mutex_unlock(&sThreadCountLock);
1816 return true;
1817 }
1818 RECREATE_CONTINUE();
1819 RECREATE_GATE();
1820 return false; // Recreated thread.
1821 }
1823 /**
1824 * Register methods to be invoked before recreating threads in the spawned
1825 * process.
1826 */
1827 MFBT_API void
1828 NuwaAddConstructor(void (*construct)(void *), void *arg) {
1829 nuwa_construct_t ctr;
1830 ctr.construct = construct;
1831 ctr.arg = arg;
1832 sConstructors.push_back(ctr);
1833 }
1835 /**
1836 * Register methods to be invoked after recreating threads in the spawned
1837 * process.
1838 */
1839 MFBT_API void
1840 NuwaAddFinalConstructor(void (*construct)(void *), void *arg) {
1841 nuwa_construct_t ctr;
1842 ctr.construct = construct;
1843 ctr.arg = arg;
1844 sFinalConstructors.push_back(ctr);
1845 }
1847 /**
1848 * @return if the current process is the nuwa process.
1849 */
1850 MFBT_API bool
1851 IsNuwaProcess() {
1852 return sIsNuwaProcess;
1853 }
1855 /**
1856 * @return if the nuwa process is ready for spawning new processes.
1857 */
1858 MFBT_API bool
1859 IsNuwaReady() {
1860 return sNuwaReady;
1861 }
1863 } // extern "C"