security/sandbox/linux/SandboxFilter.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/sandbox/linux/SandboxFilter.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,285 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this file,
     1.8 + * You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#include "SandboxFilter.h"
    1.11 +
    1.12 +#include "linux_seccomp.h"
    1.13 +#include "linux_syscalls.h"
    1.14 +
    1.15 +#include "mozilla/ArrayUtils.h"
    1.16 +
    1.17 +#include <errno.h>
    1.18 +
    1.19 +namespace mozilla {
    1.20 +
    1.21 +#define SYSCALL_EXISTS(name) defined(__NR_##name)
    1.22 +
    1.23 +static struct sock_filter seccomp_filter[] = {
    1.24 +  VALIDATE_ARCHITECTURE,
    1.25 +  EXAMINE_SYSCALL,
    1.26 +
    1.27 +  // Some architectures went through a transition from 32-bit to
    1.28 +  // 64-bit off_t and had to version all the syscalls that referenced
    1.29 +  // it; others (newer and/or 64-bit ones) didn't.  Adjust the
    1.30 +  // conditional as needed.
    1.31 +#if SYSCALL_EXISTS(stat64)
    1.32 +#define ALLOW_SYSCALL_LARGEFILE(plain, versioned) ALLOW_SYSCALL(versioned)
    1.33 +#else
    1.34 +#define ALLOW_SYSCALL_LARGEFILE(plain, versioned) ALLOW_SYSCALL(plain)
    1.35 +#endif
    1.36 +
    1.37 +  /* Most used system calls should be at the top of the whitelist
    1.38 +   * for performance reasons. The whitelist BPF filter exits after
    1.39 +   * processing any ALLOW_SYSCALL macro.
    1.40 +   *
    1.41 +   * How are those syscalls found?
    1.42 +   * 1) via strace -p <child pid> or/and
    1.43 +   * 2) with MOZ_CONTENT_SANDBOX_REPORTER set, the child will report which system call
    1.44 +   *    has been denied by seccomp-bpf, just before exiting, via NSPR.
    1.45 +   * System call number to name mapping is found in:
    1.46 +   * bionic/libc/kernel/arch-arm/asm/unistd.h
    1.47 +   * or your libc's unistd.h/kernel headers.
    1.48 +   *
    1.49 +   * Current list order has been optimized through manual guess-work.
    1.50 +   * It could be further optimized by analyzing the output of:
    1.51 +   * 'strace -c -p <child pid>' for most used web apps.
    1.52 +   */
    1.53 +
    1.54 +  ALLOW_SYSCALL(futex),
    1.55 +  // FIXME, bug 920372: i386 multiplexes all the socket-related
    1.56 +  // interfaces into a single syscall.  We should check the selector.
    1.57 +#if SYSCALL_EXISTS(socketcall)
    1.58 +  ALLOW_SYSCALL(socketcall),
    1.59 +#else
    1.60 +  ALLOW_SYSCALL(recvmsg),
    1.61 +  ALLOW_SYSCALL(sendmsg),
    1.62 +#endif
    1.63 +
    1.64 +  // mmap2 is a little different from most off_t users, because it's
    1.65 +  // passed in a register (so it's a problem for even a "new" 32-bit
    1.66 +  // arch) -- and the workaround, mmap2, passes a page offset instead.
    1.67 +#if SYSCALL_EXISTS(mmap2)
    1.68 +  ALLOW_SYSCALL(mmap2),
    1.69 +#else
    1.70 +  ALLOW_SYSCALL(mmap),
    1.71 +#endif
    1.72 +
    1.73 +  /* B2G specific high-frequency syscalls */
    1.74 +#ifdef MOZ_WIDGET_GONK
    1.75 +  ALLOW_SYSCALL(clock_gettime),
    1.76 +  ALLOW_SYSCALL(epoll_wait),
    1.77 +  ALLOW_SYSCALL(gettimeofday),
    1.78 +#endif
    1.79 +  ALLOW_SYSCALL(read),
    1.80 +  ALLOW_SYSCALL(write),
    1.81 +  // 32-bit lseek is used, at least on Android, to implement ANSI fseek.
    1.82 +#if SYSCALL_EXISTS(_llseek)
    1.83 +  ALLOW_SYSCALL(_llseek),
    1.84 +#endif
    1.85 +  ALLOW_SYSCALL(lseek),
    1.86 +  // Android also uses 32-bit ftruncate.
    1.87 +  ALLOW_SYSCALL(ftruncate),
    1.88 +#if SYSCALL_EXISTS(ftruncate64)
    1.89 +  ALLOW_SYSCALL(ftruncate64),
    1.90 +#endif
    1.91 +
    1.92 +  /* ioctl() is for GL. Remove when GL proxy is implemented.
    1.93 +   * Additionally ioctl() might be a place where we want to have
    1.94 +   * argument filtering */
    1.95 +  ALLOW_SYSCALL(ioctl),
    1.96 +  ALLOW_SYSCALL(close),
    1.97 +  ALLOW_SYSCALL(munmap),
    1.98 +  ALLOW_SYSCALL(mprotect),
    1.99 +  ALLOW_SYSCALL(writev),
   1.100 +  ALLOW_SYSCALL(clone),
   1.101 +  ALLOW_SYSCALL(brk),
   1.102 +#if SYSCALL_EXISTS(set_thread_area)
   1.103 +  ALLOW_SYSCALL(set_thread_area),
   1.104 +#endif
   1.105 +
   1.106 +  ALLOW_SYSCALL(getpid),
   1.107 +  ALLOW_SYSCALL(gettid),
   1.108 +  ALLOW_SYSCALL(getrusage),
   1.109 +  ALLOW_SYSCALL(madvise),
   1.110 +  ALLOW_SYSCALL(dup),
   1.111 +  ALLOW_SYSCALL(nanosleep),
   1.112 +  ALLOW_SYSCALL(poll),
   1.113 +  // select()'s arguments used to be passed by pointer as a struct.
   1.114 +#if SYSCALL_EXISTS(_newselect)
   1.115 +  ALLOW_SYSCALL(_newselect),
   1.116 +#else
   1.117 +  ALLOW_SYSCALL(select),
   1.118 +#endif
   1.119 +  // Some archs used to have 16-bit uid/gid instead of 32-bit.
   1.120 +#if SYSCALL_EXISTS(getuid32)
   1.121 +  ALLOW_SYSCALL(getuid32),
   1.122 +  ALLOW_SYSCALL(geteuid32),
   1.123 +#else
   1.124 +  ALLOW_SYSCALL(getuid),
   1.125 +  ALLOW_SYSCALL(geteuid),
   1.126 +#endif
   1.127 +  // Some newer archs (e.g., x64 and x32) have only rt_sigreturn, but
   1.128 +  // ARM has and uses both syscalls -- rt_sigreturn for SA_SIGINFO
   1.129 +  // handlers and classic sigreturn otherwise.
   1.130 +#if SYSCALL_EXISTS(sigreturn)
   1.131 +  ALLOW_SYSCALL(sigreturn),
   1.132 +#endif
   1.133 +  ALLOW_SYSCALL(rt_sigreturn),
   1.134 +  ALLOW_SYSCALL_LARGEFILE(fcntl, fcntl64),
   1.135 +
   1.136 +  /* Must remove all of the following in the future, when no longer used */
   1.137 +  /* open() is for some legacy APIs such as font loading. */
   1.138 +  /* See bug 906996 for removing unlink(). */
   1.139 +  ALLOW_SYSCALL_LARGEFILE(fstat, fstat64),
   1.140 +  ALLOW_SYSCALL_LARGEFILE(stat, stat64),
   1.141 +  ALLOW_SYSCALL_LARGEFILE(lstat, lstat64),
   1.142 +  // FIXME, bug 920372: see above.
   1.143 +#if !SYSCALL_EXISTS(socketcall)
   1.144 +  ALLOW_SYSCALL(socketpair),
   1.145 +  DENY_SYSCALL(socket, EACCES),
   1.146 +#endif
   1.147 +  ALLOW_SYSCALL(open),
   1.148 +  ALLOW_SYSCALL(readlink), /* Workaround for bug 964455 */
   1.149 +  ALLOW_SYSCALL(prctl),
   1.150 +  ALLOW_SYSCALL(access),
   1.151 +  ALLOW_SYSCALL(unlink),
   1.152 +  ALLOW_SYSCALL(fsync),
   1.153 +  ALLOW_SYSCALL(msync),
   1.154 +
   1.155 +  /* Should remove all of the following in the future, if possible */
   1.156 +  ALLOW_SYSCALL(getpriority),
   1.157 +  ALLOW_SYSCALL(sched_get_priority_min),
   1.158 +  ALLOW_SYSCALL(sched_get_priority_max),
   1.159 +  ALLOW_SYSCALL(setpriority),
   1.160 +  // rt_sigprocmask is passed the sigset_t size.  On older archs,
   1.161 +  // sigprocmask is a compatibility shim that assumes the pre-RT size.
   1.162 +#if SYSCALL_EXISTS(sigprocmask)
   1.163 +  ALLOW_SYSCALL(sigprocmask),
   1.164 +#endif
   1.165 +  ALLOW_SYSCALL(rt_sigprocmask),
   1.166 +
   1.167 +  /* System calls used by the profiler */
   1.168 +#ifdef MOZ_PROFILING
   1.169 +  ALLOW_SYSCALL(tgkill),
   1.170 +#endif
   1.171 +
   1.172 +  /* B2G specific low-frequency syscalls */
   1.173 +#ifdef MOZ_WIDGET_GONK
   1.174 +#if !SYSCALL_EXISTS(socketcall)
   1.175 +  ALLOW_SYSCALL(sendto),
   1.176 +  ALLOW_SYSCALL(recvfrom),
   1.177 +#endif
   1.178 +  ALLOW_SYSCALL_LARGEFILE(getdents, getdents64),
   1.179 +  ALLOW_SYSCALL(epoll_ctl),
   1.180 +  ALLOW_SYSCALL(sched_yield),
   1.181 +  ALLOW_SYSCALL(sched_getscheduler),
   1.182 +  ALLOW_SYSCALL(sched_setscheduler),
   1.183 +  ALLOW_SYSCALL(sigaltstack),
   1.184 +#endif
   1.185 +
   1.186 +  /* Always last and always OK calls */
   1.187 +  /* Architecture-specific very infrequently used syscalls */
   1.188 +#if SYSCALL_EXISTS(sigaction)
   1.189 +  ALLOW_SYSCALL(sigaction),
   1.190 +#endif
   1.191 +  ALLOW_SYSCALL(rt_sigaction),
   1.192 +#ifdef ALLOW_ARM_SYSCALL
   1.193 +  ALLOW_ARM_SYSCALL(breakpoint),
   1.194 +  ALLOW_ARM_SYSCALL(cacheflush),
   1.195 +  ALLOW_ARM_SYSCALL(usr26),
   1.196 +  ALLOW_ARM_SYSCALL(usr32),
   1.197 +  ALLOW_ARM_SYSCALL(set_tls),
   1.198 +#endif
   1.199 +
   1.200 +  /* restart_syscall is called internally, generally when debugging */
   1.201 +  ALLOW_SYSCALL(restart_syscall),
   1.202 +
   1.203 +  /* linux desktop is not as performance critical as B2G */
   1.204 +  /* we can place desktop syscalls at the end */
   1.205 +#ifndef MOZ_WIDGET_GONK
   1.206 +  ALLOW_SYSCALL(stat),
   1.207 +  ALLOW_SYSCALL(getdents),
   1.208 +  ALLOW_SYSCALL(lstat),
   1.209 +  ALLOW_SYSCALL(mmap),
   1.210 +  ALLOW_SYSCALL(openat),
   1.211 +  ALLOW_SYSCALL(fcntl),
   1.212 +  ALLOW_SYSCALL(fstat),
   1.213 +  ALLOW_SYSCALL(readlink),
   1.214 +  ALLOW_SYSCALL(getsockname),
   1.215 +  ALLOW_SYSCALL(getuid),
   1.216 +  ALLOW_SYSCALL(geteuid),
   1.217 +  ALLOW_SYSCALL(mkdir),
   1.218 +  ALLOW_SYSCALL(getcwd),
   1.219 +  ALLOW_SYSCALL(readahead),
   1.220 +  ALLOW_SYSCALL(pread64),
   1.221 +  ALLOW_SYSCALL(statfs),
   1.222 +  ALLOW_SYSCALL(pipe),
   1.223 +  ALLOW_SYSCALL(getrlimit),
   1.224 +  ALLOW_SYSCALL(shutdown),
   1.225 +  ALLOW_SYSCALL(getpeername),
   1.226 +  ALLOW_SYSCALL(eventfd2),
   1.227 +  ALLOW_SYSCALL(clock_getres),
   1.228 +  ALLOW_SYSCALL(sysinfo),
   1.229 +  ALLOW_SYSCALL(getresuid),
   1.230 +  ALLOW_SYSCALL(umask),
   1.231 +  ALLOW_SYSCALL(getresgid),
   1.232 +  ALLOW_SYSCALL(poll),
   1.233 +  ALLOW_SYSCALL(getegid),
   1.234 +  ALLOW_SYSCALL(inotify_init1),
   1.235 +  ALLOW_SYSCALL(wait4),
   1.236 +  ALLOW_SYSCALL(shmctl),
   1.237 +  ALLOW_SYSCALL(set_robust_list),
   1.238 +  ALLOW_SYSCALL(rmdir),
   1.239 +  ALLOW_SYSCALL(recvfrom),
   1.240 +  ALLOW_SYSCALL(shmdt),
   1.241 +  ALLOW_SYSCALL(pipe2),
   1.242 +  ALLOW_SYSCALL(setsockopt),
   1.243 +  ALLOW_SYSCALL(shmat),
   1.244 +  ALLOW_SYSCALL(set_tid_address),
   1.245 +  ALLOW_SYSCALL(inotify_add_watch),
   1.246 +  ALLOW_SYSCALL(rt_sigprocmask),
   1.247 +  ALLOW_SYSCALL(shmget),
   1.248 +  ALLOW_SYSCALL(getgid),
   1.249 +  ALLOW_SYSCALL(utime),
   1.250 +  ALLOW_SYSCALL(arch_prctl),
   1.251 +  ALLOW_SYSCALL(sched_getaffinity),
   1.252 +  /* We should remove all of the following in the future (possibly even more) */
   1.253 +  ALLOW_SYSCALL(socket),
   1.254 +  ALLOW_SYSCALL(chmod),
   1.255 +  ALLOW_SYSCALL(execve),
   1.256 +  ALLOW_SYSCALL(rename),
   1.257 +  ALLOW_SYSCALL(symlink),
   1.258 +  ALLOW_SYSCALL(connect),
   1.259 +  ALLOW_SYSCALL(quotactl),
   1.260 +  ALLOW_SYSCALL(kill),
   1.261 +  ALLOW_SYSCALL(sendto),
   1.262 +#endif
   1.263 +
   1.264 +  /* nsSystemInfo uses uname (and we cache an instance, so */
   1.265 +  /* the info remains present even if we block the syscall) */
   1.266 +  ALLOW_SYSCALL(uname),
   1.267 +  ALLOW_SYSCALL(exit_group),
   1.268 +  ALLOW_SYSCALL(exit),
   1.269 +
   1.270 +#ifdef MOZ_CONTENT_SANDBOX_REPORTER
   1.271 +  TRAP_PROCESS,
   1.272 +#else
   1.273 +  KILL_PROCESS,
   1.274 +#endif
   1.275 +};
   1.276 +
   1.277 +static struct sock_fprog seccomp_prog = {
   1.278 +  (unsigned short)MOZ_ARRAY_LENGTH(seccomp_filter),
   1.279 +  seccomp_filter,
   1.280 +};
   1.281 +
   1.282 +const sock_fprog*
   1.283 +GetSandboxFilter()
   1.284 +{
   1.285 +  return &seccomp_prog;
   1.286 +}
   1.287 +
   1.288 +}

mercurial