michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set ts=8 sts=2 et sw=2 tw=80: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this file, michael@0: * You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "mozilla/Sandbox.h" michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "mozilla/Atomics.h" michael@0: #include "mozilla/NullPtr.h" michael@0: #include "mozilla/unused.h" michael@0: #include "mozilla/dom/Exceptions.h" michael@0: #include "nsString.h" michael@0: #include "nsThreadUtils.h" michael@0: michael@0: #ifdef MOZ_CRASHREPORTER michael@0: #include "nsExceptionHandler.h" michael@0: #endif michael@0: michael@0: #if defined(ANDROID) michael@0: #include "android_ucontext.h" michael@0: #include michael@0: #endif michael@0: michael@0: #if defined(MOZ_CONTENT_SANDBOX) michael@0: #include "linux_seccomp.h" michael@0: #include "SandboxFilter.h" michael@0: #endif michael@0: michael@0: #ifdef MOZ_LOGGING michael@0: #define FORCE_PR_LOG 1 michael@0: #endif michael@0: #include "prlog.h" michael@0: #include "prenv.h" michael@0: michael@0: namespace mozilla { michael@0: #if defined(ANDROID) michael@0: #define LOG_ERROR(args...) __android_log_print(ANDROID_LOG_ERROR, "Sandbox", ## args) michael@0: #elif defined(PR_LOGGING) michael@0: static PRLogModuleInfo* gSeccompSandboxLog; michael@0: #define LOG_ERROR(args...) PR_LOG(gSeccompSandboxLog, PR_LOG_ERROR, (args)) michael@0: #else michael@0: #define LOG_ERROR(args...) michael@0: #endif michael@0: michael@0: /** michael@0: * Log JS stack info in the same place as the sandbox violation michael@0: * message. Useful in case the responsible code is JS and all we have michael@0: * are logs and a minidump with the C++ stacks (e.g., on TBPL). michael@0: */ michael@0: static void michael@0: SandboxLogJSStack(void) michael@0: { michael@0: if (!NS_IsMainThread()) { michael@0: // This might be a worker thread... or it might be a non-JS michael@0: // thread, or a non-NSPR thread. There's isn't a good API for michael@0: // dealing with this, yet. michael@0: return; michael@0: } michael@0: nsCOMPtr frame = dom::GetCurrentJSStack(); michael@0: for (int i = 0; frame != nullptr; ++i) { michael@0: nsAutoString fileName, funName; michael@0: int32_t lineNumber; michael@0: michael@0: // Don't stop unwinding if an attribute can't be read. michael@0: fileName.SetIsVoid(true); michael@0: unused << frame->GetFilename(fileName); michael@0: lineNumber = 0; michael@0: unused << frame->GetLineNumber(&lineNumber); michael@0: funName.SetIsVoid(true); michael@0: unused << frame->GetName(funName); michael@0: michael@0: if (!funName.IsVoid() || !fileName.IsVoid()) { michael@0: LOG_ERROR("JS frame %d: %s %s line %d", i, michael@0: funName.IsVoid() ? michael@0: "(anonymous)" : NS_ConvertUTF16toUTF8(funName).get(), michael@0: fileName.IsVoid() ? michael@0: "(no file)" : NS_ConvertUTF16toUTF8(fileName).get(), michael@0: lineNumber); michael@0: } michael@0: michael@0: nsCOMPtr nextFrame; michael@0: nsresult rv = frame->GetCaller(getter_AddRefs(nextFrame)); michael@0: NS_ENSURE_SUCCESS_VOID(rv); michael@0: frame = nextFrame; michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * This is the SIGSYS handler function. It is used to report to the user michael@0: * which system call has been denied by Seccomp. michael@0: * This function also makes the process exit as denying the system call michael@0: * will otherwise generally lead to unexpected behavior from the process, michael@0: * since we don't know if all functions will handle such denials gracefully. michael@0: * michael@0: * @see InstallSyscallReporter() function. michael@0: */ michael@0: #ifdef MOZ_CONTENT_SANDBOX_REPORTER michael@0: static void michael@0: Reporter(int nr, siginfo_t *info, void *void_context) michael@0: { michael@0: ucontext_t *ctx = static_cast(void_context); michael@0: unsigned long syscall_nr, args[6]; michael@0: pid_t pid = getpid(), tid = syscall(__NR_gettid); michael@0: michael@0: if (nr != SIGSYS) { michael@0: return; michael@0: } michael@0: if (info->si_code != SYS_SECCOMP) { michael@0: return; michael@0: } michael@0: if (!ctx) { michael@0: return; michael@0: } michael@0: michael@0: syscall_nr = SECCOMP_SYSCALL(ctx); michael@0: args[0] = SECCOMP_PARM1(ctx); michael@0: args[1] = SECCOMP_PARM2(ctx); michael@0: args[2] = SECCOMP_PARM3(ctx); michael@0: args[3] = SECCOMP_PARM4(ctx); michael@0: args[4] = SECCOMP_PARM5(ctx); michael@0: args[5] = SECCOMP_PARM6(ctx); michael@0: michael@0: LOG_ERROR("seccomp sandbox violation: pid %d, syscall %lu, args %lu %lu %lu" michael@0: " %lu %lu %lu. Killing process.", pid, syscall_nr, michael@0: args[0], args[1], args[2], args[3], args[4], args[5]); michael@0: michael@0: #ifdef MOZ_CRASHREPORTER michael@0: bool dumped = CrashReporter::WriteMinidumpForSigInfo(nr, info, void_context); michael@0: if (!dumped) { michael@0: LOG_ERROR("Failed to write minidump"); michael@0: } michael@0: #endif michael@0: michael@0: // Do this last, in case it crashes or deadlocks. michael@0: SandboxLogJSStack(); michael@0: michael@0: // Try to reraise, so the parent sees that this process crashed. michael@0: // (If tgkill is forbidden, then seccomp will raise SIGSYS, which michael@0: // also accomplishes that goal.) michael@0: signal(SIGSYS, SIG_DFL); michael@0: syscall(__NR_tgkill, pid, tid, nr); michael@0: _exit(127); michael@0: } michael@0: michael@0: /** michael@0: * The reporter is called when the process receives a SIGSYS signal. michael@0: * The signal is sent by the kernel when Seccomp encounter a system call michael@0: * that has not been allowed. michael@0: * We register an action for that signal (calling the Reporter function). michael@0: * michael@0: * This function should not be used in production and thus generally be michael@0: * called from debug code. In production, the process is directly killed. michael@0: * For this reason, the function is ifdef'd, as there is no reason to michael@0: * compile it while unused. michael@0: * michael@0: * @return 0 on success, -1 on failure. michael@0: * @see Reporter() function. michael@0: */ michael@0: static int michael@0: InstallSyscallReporter(void) michael@0: { michael@0: struct sigaction act; michael@0: sigset_t mask; michael@0: memset(&act, 0, sizeof(act)); michael@0: sigemptyset(&mask); michael@0: sigaddset(&mask, SIGSYS); michael@0: michael@0: act.sa_sigaction = &Reporter; michael@0: act.sa_flags = SA_SIGINFO | SA_NODEFER; michael@0: if (sigaction(SIGSYS, &act, nullptr) < 0) { michael@0: return -1; michael@0: } michael@0: if (sigemptyset(&mask) || michael@0: sigaddset(&mask, SIGSYS) || michael@0: sigprocmask(SIG_UNBLOCK, &mask, nullptr)) { michael@0: return -1; michael@0: } michael@0: return 0; michael@0: } michael@0: #endif michael@0: michael@0: /** michael@0: * This function installs the syscall filter, a.k.a. seccomp. michael@0: * PR_SET_NO_NEW_PRIVS ensures that it is impossible to grant more michael@0: * syscalls to the process beyond this point (even after fork()). michael@0: * SECCOMP_MODE_FILTER is the "bpf" mode of seccomp which allows michael@0: * to pass a bpf program (in our case, it contains a syscall michael@0: * whitelist). michael@0: * michael@0: * @return 0 on success, 1 on failure. michael@0: * @see sock_fprog (the seccomp_prog). michael@0: */ michael@0: static int michael@0: InstallSyscallFilter(void) michael@0: { michael@0: #ifdef MOZ_DMD michael@0: char* e = PR_GetEnv("DMD"); michael@0: if (e && strcmp(e, "") != 0 && strcmp(e, "0") != 0) { michael@0: LOG_ERROR("SANDBOX DISABLED FOR DMD! See bug 956961."); michael@0: // Must treat this as "failure" in order to prevent infinite loop; michael@0: // cf. the PR_GET_SECCOMP check below. michael@0: return 1; michael@0: } michael@0: #endif michael@0: if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { michael@0: return 1; michael@0: } michael@0: michael@0: const sock_fprog *filter = GetSandboxFilter(); michael@0: michael@0: if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, (unsigned long)filter, 0, 0)) { michael@0: return 1; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: // Use signals for permissions that need to be set per-thread. michael@0: // The communication channel from the signal handler back to the main thread. michael@0: static mozilla::Atomic sSetSandboxDone; michael@0: // about:memory has the first 3 RT signals. (We should allocate michael@0: // signals centrally instead of hard-coding them like this.) michael@0: static const int sSetSandboxSignum = SIGRTMIN + 3; michael@0: michael@0: static bool michael@0: SetThreadSandbox() michael@0: { michael@0: bool didAnything = false; michael@0: michael@0: if (PR_GetEnv("MOZ_DISABLE_CONTENT_SANDBOX") == nullptr && michael@0: prctl(PR_GET_SECCOMP, 0, 0, 0, 0) == 0) { michael@0: if (InstallSyscallFilter() == 0) { michael@0: didAnything = true; michael@0: } michael@0: /* michael@0: * Bug 880797: when all B2G devices are required to support michael@0: * seccomp-bpf, this should exit/crash if InstallSyscallFilter michael@0: * returns nonzero (ifdef MOZ_WIDGET_GONK). michael@0: */ michael@0: } michael@0: return didAnything; michael@0: } michael@0: michael@0: static void michael@0: SetThreadSandboxHandler(int signum) michael@0: { michael@0: // The non-zero number sent back to the main thread indicates michael@0: // whether action was taken. michael@0: if (SetThreadSandbox()) { michael@0: sSetSandboxDone = 2; michael@0: } else { michael@0: sSetSandboxDone = 1; michael@0: } michael@0: // Wake up the main thread. See the FUTEX_WAIT call, below, for an michael@0: // explanation. michael@0: syscall(__NR_futex, reinterpret_cast(&sSetSandboxDone), michael@0: FUTEX_WAKE, 1); michael@0: } michael@0: michael@0: static void michael@0: BroadcastSetThreadSandbox() michael@0: { michael@0: pid_t pid, tid; michael@0: DIR *taskdp; michael@0: struct dirent *de; michael@0: michael@0: static_assert(sizeof(mozilla::Atomic) == sizeof(int), michael@0: "mozilla::Atomic isn't represented by an int"); michael@0: MOZ_ASSERT(NS_IsMainThread()); michael@0: pid = getpid(); michael@0: taskdp = opendir("/proc/self/task"); michael@0: if (taskdp == nullptr) { michael@0: LOG_ERROR("opendir /proc/self/task: %s\n", strerror(errno)); michael@0: MOZ_CRASH(); michael@0: } michael@0: if (signal(sSetSandboxSignum, SetThreadSandboxHandler) != SIG_DFL) { michael@0: LOG_ERROR("signal %d in use!\n", sSetSandboxSignum); michael@0: MOZ_CRASH(); michael@0: } michael@0: michael@0: // In case this races with a not-yet-deprivileged thread cloning michael@0: // itself, repeat iterating over all threads until we find none michael@0: // that are still privileged. michael@0: bool sandboxProgress; michael@0: do { michael@0: sandboxProgress = false; michael@0: // For each thread... michael@0: while ((de = readdir(taskdp))) { michael@0: char *endptr; michael@0: tid = strtol(de->d_name, &endptr, 10); michael@0: if (*endptr != '\0' || tid <= 0) { michael@0: // Not a task ID. michael@0: continue; michael@0: } michael@0: if (tid == pid) { michael@0: // Drop the main thread's privileges last, below, so michael@0: // we can continue to signal other threads. michael@0: continue; michael@0: } michael@0: // Reset the futex cell and signal. michael@0: sSetSandboxDone = 0; michael@0: if (syscall(__NR_tgkill, pid, tid, sSetSandboxSignum) != 0) { michael@0: if (errno == ESRCH) { michael@0: LOG_ERROR("Thread %d unexpectedly exited.", tid); michael@0: // Rescan threads, in case it forked before exiting. michael@0: sandboxProgress = true; michael@0: continue; michael@0: } michael@0: LOG_ERROR("tgkill(%d,%d): %s\n", pid, tid, strerror(errno)); michael@0: MOZ_CRASH(); michael@0: } michael@0: // It's unlikely, but if the thread somehow manages to exit michael@0: // after receiving the signal but before entering the signal michael@0: // handler, we need to avoid blocking forever. michael@0: // michael@0: // Using futex directly lets the signal handler send the wakeup michael@0: // from an async signal handler (pthread mutex/condvar calls michael@0: // aren't allowed), and to use a relative timeout that isn't michael@0: // affected by changes to the system clock (not possible with michael@0: // POSIX semaphores). michael@0: // michael@0: // If a thread doesn't respond within a reasonable amount of michael@0: // time, but still exists, we crash -- the alternative is either michael@0: // blocking forever or silently losing security, and it michael@0: // shouldn't actually happen. michael@0: static const int crashDelay = 10; // seconds michael@0: struct timespec timeLimit; michael@0: clock_gettime(CLOCK_MONOTONIC, &timeLimit); michael@0: timeLimit.tv_sec += crashDelay; michael@0: while (true) { michael@0: static const struct timespec futexTimeout = { 0, 10*1000*1000 }; // 10ms michael@0: // Atomically: if sSetSandboxDone == 0, then sleep. michael@0: if (syscall(__NR_futex, reinterpret_cast(&sSetSandboxDone), michael@0: FUTEX_WAIT, 0, &futexTimeout) != 0) { michael@0: if (errno != EWOULDBLOCK && errno != ETIMEDOUT && errno != EINTR) { michael@0: LOG_ERROR("FUTEX_WAIT: %s\n", strerror(errno)); michael@0: MOZ_CRASH(); michael@0: } michael@0: } michael@0: // Did the handler finish? michael@0: if (sSetSandboxDone > 0) { michael@0: if (sSetSandboxDone == 2) { michael@0: sandboxProgress = true; michael@0: } michael@0: break; michael@0: } michael@0: // Has the thread ceased to exist? michael@0: if (syscall(__NR_tgkill, pid, tid, 0) != 0) { michael@0: if (errno == ESRCH) { michael@0: LOG_ERROR("Thread %d unexpectedly exited.", tid); michael@0: } michael@0: // Rescan threads, in case it forked before exiting. michael@0: // Also, if it somehow failed in a way that wasn't ESRCH, michael@0: // and still exists, that will be handled on the next pass. michael@0: sandboxProgress = true; michael@0: break; michael@0: } michael@0: struct timespec now; michael@0: clock_gettime(CLOCK_MONOTONIC, &now); michael@0: if (now.tv_sec > timeLimit.tv_nsec || michael@0: (now.tv_sec == timeLimit.tv_nsec && michael@0: now.tv_nsec > timeLimit.tv_nsec)) { michael@0: LOG_ERROR("Thread %d unresponsive for %d seconds. Killing process.", michael@0: tid, crashDelay); michael@0: MOZ_CRASH(); michael@0: } michael@0: } michael@0: } michael@0: rewinddir(taskdp); michael@0: } while (sandboxProgress); michael@0: unused << signal(sSetSandboxSignum, SIG_DFL); michael@0: unused << closedir(taskdp); michael@0: // And now, deprivilege the main thread: michael@0: SetThreadSandbox(); michael@0: } michael@0: michael@0: // This function can overapproximate (i.e., return true even if michael@0: // sandboxing isn't supported, but not the reverse). See bug 993145. michael@0: static bool michael@0: IsSandboxingSupported(void) michael@0: { michael@0: return prctl(PR_GET_SECCOMP) != -1; michael@0: } michael@0: michael@0: /** michael@0: * Starts the seccomp sandbox for this process and sets user/group-based privileges. michael@0: * Should be called only once, and before any potentially harmful content is loaded. michael@0: * michael@0: * Should normally make the process exit on failure. michael@0: */ michael@0: void michael@0: SetCurrentProcessSandbox() michael@0: { michael@0: #if !defined(ANDROID) && defined(PR_LOGGING) michael@0: if (!gSeccompSandboxLog) { michael@0: gSeccompSandboxLog = PR_NewLogModule("SeccompSandbox"); michael@0: } michael@0: PR_ASSERT(gSeccompSandboxLog); michael@0: #endif michael@0: michael@0: #if defined(MOZ_CONTENT_SANDBOX_REPORTER) michael@0: if (InstallSyscallReporter()) { michael@0: LOG_ERROR("install_syscall_reporter() failed\n"); michael@0: } michael@0: #endif michael@0: michael@0: if (IsSandboxingSupported()) { michael@0: BroadcastSetThreadSandbox(); michael@0: } michael@0: } michael@0: michael@0: } // namespace mozilla michael@0: