security/sandbox/linux/Sandbox.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/sandbox/linux/Sandbox.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,427 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this file,
     1.8 + * You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#include "mozilla/Sandbox.h"
    1.11 +
    1.12 +#include <unistd.h>
    1.13 +#include <stdio.h>
    1.14 +#include <sys/ptrace.h>
    1.15 +#include <sys/prctl.h>
    1.16 +#include <sys/syscall.h>
    1.17 +#include <signal.h>
    1.18 +#include <string.h>
    1.19 +#include <linux/futex.h>
    1.20 +#include <sys/time.h>
    1.21 +#include <dirent.h>
    1.22 +#include <stdlib.h>
    1.23 +#include <pthread.h>
    1.24 +#include <errno.h>
    1.25 +
    1.26 +#include "mozilla/Atomics.h"
    1.27 +#include "mozilla/NullPtr.h"
    1.28 +#include "mozilla/unused.h"
    1.29 +#include "mozilla/dom/Exceptions.h"
    1.30 +#include "nsString.h"
    1.31 +#include "nsThreadUtils.h"
    1.32 +
    1.33 +#ifdef MOZ_CRASHREPORTER
    1.34 +#include "nsExceptionHandler.h"
    1.35 +#endif
    1.36 +
    1.37 +#if defined(ANDROID)
    1.38 +#include "android_ucontext.h"
    1.39 +#include <android/log.h>
    1.40 +#endif
    1.41 +
    1.42 +#if defined(MOZ_CONTENT_SANDBOX)
    1.43 +#include "linux_seccomp.h"
    1.44 +#include "SandboxFilter.h"
    1.45 +#endif
    1.46 +
    1.47 +#ifdef MOZ_LOGGING
    1.48 +#define FORCE_PR_LOG 1
    1.49 +#endif
    1.50 +#include "prlog.h"
    1.51 +#include "prenv.h"
    1.52 +
    1.53 +namespace mozilla {
    1.54 +#if defined(ANDROID)
    1.55 +#define LOG_ERROR(args...) __android_log_print(ANDROID_LOG_ERROR, "Sandbox", ## args)
    1.56 +#elif defined(PR_LOGGING)
    1.57 +static PRLogModuleInfo* gSeccompSandboxLog;
    1.58 +#define LOG_ERROR(args...) PR_LOG(gSeccompSandboxLog, PR_LOG_ERROR, (args))
    1.59 +#else
    1.60 +#define LOG_ERROR(args...)
    1.61 +#endif
    1.62 +
    1.63 +/**
    1.64 + * Log JS stack info in the same place as the sandbox violation
    1.65 + * message.  Useful in case the responsible code is JS and all we have
    1.66 + * are logs and a minidump with the C++ stacks (e.g., on TBPL).
    1.67 + */
    1.68 +static void
    1.69 +SandboxLogJSStack(void)
    1.70 +{
    1.71 +  if (!NS_IsMainThread()) {
    1.72 +    // This might be a worker thread... or it might be a non-JS
    1.73 +    // thread, or a non-NSPR thread.  There's isn't a good API for
    1.74 +    // dealing with this, yet.
    1.75 +    return;
    1.76 +  }
    1.77 +  nsCOMPtr<nsIStackFrame> frame = dom::GetCurrentJSStack();
    1.78 +  for (int i = 0; frame != nullptr; ++i) {
    1.79 +    nsAutoString fileName, funName;
    1.80 +    int32_t lineNumber;
    1.81 +
    1.82 +    // Don't stop unwinding if an attribute can't be read.
    1.83 +    fileName.SetIsVoid(true);
    1.84 +    unused << frame->GetFilename(fileName);
    1.85 +    lineNumber = 0;
    1.86 +    unused << frame->GetLineNumber(&lineNumber);
    1.87 +    funName.SetIsVoid(true);
    1.88 +    unused << frame->GetName(funName);
    1.89 +
    1.90 +    if (!funName.IsVoid() || !fileName.IsVoid()) {
    1.91 +      LOG_ERROR("JS frame %d: %s %s line %d", i,
    1.92 +                funName.IsVoid() ?
    1.93 +                  "(anonymous)" : NS_ConvertUTF16toUTF8(funName).get(),
    1.94 +                fileName.IsVoid() ?
    1.95 +                  "(no file)" : NS_ConvertUTF16toUTF8(fileName).get(),
    1.96 +                lineNumber);
    1.97 +    }
    1.98 +
    1.99 +    nsCOMPtr<nsIStackFrame> nextFrame;
   1.100 +    nsresult rv = frame->GetCaller(getter_AddRefs(nextFrame));
   1.101 +    NS_ENSURE_SUCCESS_VOID(rv);
   1.102 +    frame = nextFrame;
   1.103 +  }
   1.104 +}
   1.105 +
   1.106 +/**
   1.107 + * This is the SIGSYS handler function. It is used to report to the user
   1.108 + * which system call has been denied by Seccomp.
   1.109 + * This function also makes the process exit as denying the system call
   1.110 + * will otherwise generally lead to unexpected behavior from the process,
   1.111 + * since we don't know if all functions will handle such denials gracefully.
   1.112 + *
   1.113 + * @see InstallSyscallReporter() function.
   1.114 + */
   1.115 +#ifdef MOZ_CONTENT_SANDBOX_REPORTER
   1.116 +static void
   1.117 +Reporter(int nr, siginfo_t *info, void *void_context)
   1.118 +{
   1.119 +  ucontext_t *ctx = static_cast<ucontext_t*>(void_context);
   1.120 +  unsigned long syscall_nr, args[6];
   1.121 +  pid_t pid = getpid(), tid = syscall(__NR_gettid);
   1.122 +
   1.123 +  if (nr != SIGSYS) {
   1.124 +    return;
   1.125 +  }
   1.126 +  if (info->si_code != SYS_SECCOMP) {
   1.127 +    return;
   1.128 +  }
   1.129 +  if (!ctx) {
   1.130 +    return;
   1.131 +  }
   1.132 +
   1.133 +  syscall_nr = SECCOMP_SYSCALL(ctx);
   1.134 +  args[0] = SECCOMP_PARM1(ctx);
   1.135 +  args[1] = SECCOMP_PARM2(ctx);
   1.136 +  args[2] = SECCOMP_PARM3(ctx);
   1.137 +  args[3] = SECCOMP_PARM4(ctx);
   1.138 +  args[4] = SECCOMP_PARM5(ctx);
   1.139 +  args[5] = SECCOMP_PARM6(ctx);
   1.140 +
   1.141 +  LOG_ERROR("seccomp sandbox violation: pid %d, syscall %lu, args %lu %lu %lu"
   1.142 +            " %lu %lu %lu.  Killing process.", pid, syscall_nr,
   1.143 +            args[0], args[1], args[2], args[3], args[4], args[5]);
   1.144 +
   1.145 +#ifdef MOZ_CRASHREPORTER
   1.146 +  bool dumped = CrashReporter::WriteMinidumpForSigInfo(nr, info, void_context);
   1.147 +  if (!dumped) {
   1.148 +    LOG_ERROR("Failed to write minidump");
   1.149 +  }
   1.150 +#endif
   1.151 +
   1.152 +  // Do this last, in case it crashes or deadlocks.
   1.153 +  SandboxLogJSStack();
   1.154 +
   1.155 +  // Try to reraise, so the parent sees that this process crashed.
   1.156 +  // (If tgkill is forbidden, then seccomp will raise SIGSYS, which
   1.157 +  // also accomplishes that goal.)
   1.158 +  signal(SIGSYS, SIG_DFL);
   1.159 +  syscall(__NR_tgkill, pid, tid, nr);
   1.160 +  _exit(127);
   1.161 +}
   1.162 +
   1.163 +/**
   1.164 + * The reporter is called when the process receives a SIGSYS signal.
   1.165 + * The signal is sent by the kernel when Seccomp encounter a system call
   1.166 + * that has not been allowed.
   1.167 + * We register an action for that signal (calling the Reporter function).
   1.168 + *
   1.169 + * This function should not be used in production and thus generally be
   1.170 + * called from debug code. In production, the process is directly killed.
   1.171 + * For this reason, the function is ifdef'd, as there is no reason to
   1.172 + * compile it while unused.
   1.173 + *
   1.174 + * @return 0 on success, -1 on failure.
   1.175 + * @see Reporter() function.
   1.176 + */
   1.177 +static int
   1.178 +InstallSyscallReporter(void)
   1.179 +{
   1.180 +  struct sigaction act;
   1.181 +  sigset_t mask;
   1.182 +  memset(&act, 0, sizeof(act));
   1.183 +  sigemptyset(&mask);
   1.184 +  sigaddset(&mask, SIGSYS);
   1.185 +
   1.186 +  act.sa_sigaction = &Reporter;
   1.187 +  act.sa_flags = SA_SIGINFO | SA_NODEFER;
   1.188 +  if (sigaction(SIGSYS, &act, nullptr) < 0) {
   1.189 +    return -1;
   1.190 +  }
   1.191 +  if (sigemptyset(&mask) ||
   1.192 +    sigaddset(&mask, SIGSYS) ||
   1.193 +    sigprocmask(SIG_UNBLOCK, &mask, nullptr)) {
   1.194 +      return -1;
   1.195 +  }
   1.196 +  return 0;
   1.197 +}
   1.198 +#endif
   1.199 +
   1.200 +/**
   1.201 + * This function installs the syscall filter, a.k.a. seccomp.
   1.202 + * PR_SET_NO_NEW_PRIVS ensures that it is impossible to grant more
   1.203 + * syscalls to the process beyond this point (even after fork()).
   1.204 + * SECCOMP_MODE_FILTER is the "bpf" mode of seccomp which allows
   1.205 + * to pass a bpf program (in our case, it contains a syscall
   1.206 + * whitelist).
   1.207 + *
   1.208 + * @return 0 on success, 1 on failure.
   1.209 + * @see sock_fprog (the seccomp_prog).
   1.210 + */
   1.211 +static int
   1.212 +InstallSyscallFilter(void)
   1.213 +{
   1.214 +#ifdef MOZ_DMD
   1.215 +  char* e = PR_GetEnv("DMD");
   1.216 +  if (e && strcmp(e, "") != 0 && strcmp(e, "0") != 0) {
   1.217 +    LOG_ERROR("SANDBOX DISABLED FOR DMD!  See bug 956961.");
   1.218 +    // Must treat this as "failure" in order to prevent infinite loop;
   1.219 +    // cf. the PR_GET_SECCOMP check below.
   1.220 +    return 1;
   1.221 +  }
   1.222 +#endif
   1.223 +  if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
   1.224 +    return 1;
   1.225 +  }
   1.226 +
   1.227 +  const sock_fprog *filter = GetSandboxFilter();
   1.228 +
   1.229 +  if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, (unsigned long)filter, 0, 0)) {
   1.230 +    return 1;
   1.231 +  }
   1.232 +  return 0;
   1.233 +}
   1.234 +
   1.235 +// Use signals for permissions that need to be set per-thread.
   1.236 +// The communication channel from the signal handler back to the main thread.
   1.237 +static mozilla::Atomic<int> sSetSandboxDone;
   1.238 +// about:memory has the first 3 RT signals.  (We should allocate
   1.239 +// signals centrally instead of hard-coding them like this.)
   1.240 +static const int sSetSandboxSignum = SIGRTMIN + 3;
   1.241 +
   1.242 +static bool
   1.243 +SetThreadSandbox()
   1.244 +{
   1.245 +  bool didAnything = false;
   1.246 +
   1.247 +  if (PR_GetEnv("MOZ_DISABLE_CONTENT_SANDBOX") == nullptr &&
   1.248 +      prctl(PR_GET_SECCOMP, 0, 0, 0, 0) == 0) {
   1.249 +    if (InstallSyscallFilter() == 0) {
   1.250 +      didAnything = true;
   1.251 +    }
   1.252 +    /*
   1.253 +     * Bug 880797: when all B2G devices are required to support
   1.254 +     * seccomp-bpf, this should exit/crash if InstallSyscallFilter
   1.255 +     * returns nonzero (ifdef MOZ_WIDGET_GONK).
   1.256 +     */
   1.257 +  }
   1.258 +  return didAnything;
   1.259 +}
   1.260 +
   1.261 +static void
   1.262 +SetThreadSandboxHandler(int signum)
   1.263 +{
   1.264 +  // The non-zero number sent back to the main thread indicates
   1.265 +  // whether action was taken.
   1.266 +  if (SetThreadSandbox()) {
   1.267 +    sSetSandboxDone = 2;
   1.268 +  } else {
   1.269 +    sSetSandboxDone = 1;
   1.270 +  }
   1.271 +  // Wake up the main thread.  See the FUTEX_WAIT call, below, for an
   1.272 +  // explanation.
   1.273 +  syscall(__NR_futex, reinterpret_cast<int*>(&sSetSandboxDone),
   1.274 +          FUTEX_WAKE, 1);
   1.275 +}
   1.276 +
   1.277 +static void
   1.278 +BroadcastSetThreadSandbox()
   1.279 +{
   1.280 +  pid_t pid, tid;
   1.281 +  DIR *taskdp;
   1.282 +  struct dirent *de;
   1.283 +
   1.284 +  static_assert(sizeof(mozilla::Atomic<int>) == sizeof(int),
   1.285 +                "mozilla::Atomic<int> isn't represented by an int");
   1.286 +  MOZ_ASSERT(NS_IsMainThread());
   1.287 +  pid = getpid();
   1.288 +  taskdp = opendir("/proc/self/task");
   1.289 +  if (taskdp == nullptr) {
   1.290 +    LOG_ERROR("opendir /proc/self/task: %s\n", strerror(errno));
   1.291 +    MOZ_CRASH();
   1.292 +  }
   1.293 +  if (signal(sSetSandboxSignum, SetThreadSandboxHandler) != SIG_DFL) {
   1.294 +    LOG_ERROR("signal %d in use!\n", sSetSandboxSignum);
   1.295 +    MOZ_CRASH();
   1.296 +  }
   1.297 +
   1.298 +  // In case this races with a not-yet-deprivileged thread cloning
   1.299 +  // itself, repeat iterating over all threads until we find none
   1.300 +  // that are still privileged.
   1.301 +  bool sandboxProgress;
   1.302 +  do {
   1.303 +    sandboxProgress = false;
   1.304 +    // For each thread...
   1.305 +    while ((de = readdir(taskdp))) {
   1.306 +      char *endptr;
   1.307 +      tid = strtol(de->d_name, &endptr, 10);
   1.308 +      if (*endptr != '\0' || tid <= 0) {
   1.309 +        // Not a task ID.
   1.310 +        continue;
   1.311 +      }
   1.312 +      if (tid == pid) {
   1.313 +        // Drop the main thread's privileges last, below, so
   1.314 +        // we can continue to signal other threads.
   1.315 +        continue;
   1.316 +      }
   1.317 +      // Reset the futex cell and signal.
   1.318 +      sSetSandboxDone = 0;
   1.319 +      if (syscall(__NR_tgkill, pid, tid, sSetSandboxSignum) != 0) {
   1.320 +        if (errno == ESRCH) {
   1.321 +          LOG_ERROR("Thread %d unexpectedly exited.", tid);
   1.322 +          // Rescan threads, in case it forked before exiting.
   1.323 +          sandboxProgress = true;
   1.324 +          continue;
   1.325 +        }
   1.326 +        LOG_ERROR("tgkill(%d,%d): %s\n", pid, tid, strerror(errno));
   1.327 +        MOZ_CRASH();
   1.328 +      }
   1.329 +      // It's unlikely, but if the thread somehow manages to exit
   1.330 +      // after receiving the signal but before entering the signal
   1.331 +      // handler, we need to avoid blocking forever.
   1.332 +      //
   1.333 +      // Using futex directly lets the signal handler send the wakeup
   1.334 +      // from an async signal handler (pthread mutex/condvar calls
   1.335 +      // aren't allowed), and to use a relative timeout that isn't
   1.336 +      // affected by changes to the system clock (not possible with
   1.337 +      // POSIX semaphores).
   1.338 +      //
   1.339 +      // If a thread doesn't respond within a reasonable amount of
   1.340 +      // time, but still exists, we crash -- the alternative is either
   1.341 +      // blocking forever or silently losing security, and it
   1.342 +      // shouldn't actually happen.
   1.343 +      static const int crashDelay = 10; // seconds
   1.344 +      struct timespec timeLimit;
   1.345 +      clock_gettime(CLOCK_MONOTONIC, &timeLimit);
   1.346 +      timeLimit.tv_sec += crashDelay;
   1.347 +      while (true) {
   1.348 +        static const struct timespec futexTimeout = { 0, 10*1000*1000 }; // 10ms
   1.349 +        // Atomically: if sSetSandboxDone == 0, then sleep.
   1.350 +        if (syscall(__NR_futex, reinterpret_cast<int*>(&sSetSandboxDone),
   1.351 +                  FUTEX_WAIT, 0, &futexTimeout) != 0) {
   1.352 +          if (errno != EWOULDBLOCK && errno != ETIMEDOUT && errno != EINTR) {
   1.353 +            LOG_ERROR("FUTEX_WAIT: %s\n", strerror(errno));
   1.354 +            MOZ_CRASH();
   1.355 +          }
   1.356 +        }
   1.357 +        // Did the handler finish?
   1.358 +        if (sSetSandboxDone > 0) {
   1.359 +          if (sSetSandboxDone == 2) {
   1.360 +            sandboxProgress = true;
   1.361 +          }
   1.362 +          break;
   1.363 +        }
   1.364 +        // Has the thread ceased to exist?
   1.365 +        if (syscall(__NR_tgkill, pid, tid, 0) != 0) {
   1.366 +          if (errno == ESRCH) {
   1.367 +            LOG_ERROR("Thread %d unexpectedly exited.", tid);
   1.368 +          }
   1.369 +          // Rescan threads, in case it forked before exiting.
   1.370 +          // Also, if it somehow failed in a way that wasn't ESRCH,
   1.371 +          // and still exists, that will be handled on the next pass.
   1.372 +          sandboxProgress = true;
   1.373 +          break;
   1.374 +        }
   1.375 +        struct timespec now;
   1.376 +        clock_gettime(CLOCK_MONOTONIC, &now);
   1.377 +        if (now.tv_sec > timeLimit.tv_nsec ||
   1.378 +            (now.tv_sec == timeLimit.tv_nsec &&
   1.379 +             now.tv_nsec > timeLimit.tv_nsec)) {
   1.380 +          LOG_ERROR("Thread %d unresponsive for %d seconds.  Killing process.",
   1.381 +                    tid, crashDelay);
   1.382 +          MOZ_CRASH();
   1.383 +        }
   1.384 +      }
   1.385 +    }
   1.386 +    rewinddir(taskdp);
   1.387 +  } while (sandboxProgress);
   1.388 +  unused << signal(sSetSandboxSignum, SIG_DFL);
   1.389 +  unused << closedir(taskdp);
   1.390 +  // And now, deprivilege the main thread:
   1.391 +  SetThreadSandbox();
   1.392 +}
   1.393 +
   1.394 +// This function can overapproximate (i.e., return true even if
   1.395 +// sandboxing isn't supported, but not the reverse).  See bug 993145.
   1.396 +static bool
   1.397 +IsSandboxingSupported(void)
   1.398 +{
   1.399 +  return prctl(PR_GET_SECCOMP) != -1;
   1.400 +}
   1.401 +
   1.402 +/**
   1.403 + * Starts the seccomp sandbox for this process and sets user/group-based privileges.
   1.404 + * Should be called only once, and before any potentially harmful content is loaded.
   1.405 + *
   1.406 + * Should normally make the process exit on failure.
   1.407 +*/
   1.408 +void
   1.409 +SetCurrentProcessSandbox()
   1.410 +{
   1.411 +#if !defined(ANDROID) && defined(PR_LOGGING)
   1.412 +  if (!gSeccompSandboxLog) {
   1.413 +    gSeccompSandboxLog = PR_NewLogModule("SeccompSandbox");
   1.414 +  }
   1.415 +  PR_ASSERT(gSeccompSandboxLog);
   1.416 +#endif
   1.417 +
   1.418 +#if defined(MOZ_CONTENT_SANDBOX_REPORTER)
   1.419 +  if (InstallSyscallReporter()) {
   1.420 +    LOG_ERROR("install_syscall_reporter() failed\n");
   1.421 +  }
   1.422 +#endif
   1.423 +
   1.424 +  if (IsSandboxingSupported()) {
   1.425 +    BroadcastSetThreadSandbox();
   1.426 +  }
   1.427 +}
   1.428 +
   1.429 +} // namespace mozilla
   1.430 +

mercurial