security/sandbox/linux/SandboxFilter.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
michael@0 3 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
michael@0 5 * You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 #include "SandboxFilter.h"
michael@0 8
michael@0 9 #include "linux_seccomp.h"
michael@0 10 #include "linux_syscalls.h"
michael@0 11
michael@0 12 #include "mozilla/ArrayUtils.h"
michael@0 13
michael@0 14 #include <errno.h>
michael@0 15
michael@0 16 namespace mozilla {
michael@0 17
michael@0 18 #define SYSCALL_EXISTS(name) defined(__NR_##name)
michael@0 19
michael@0 20 static struct sock_filter seccomp_filter[] = {
michael@0 21 VALIDATE_ARCHITECTURE,
michael@0 22 EXAMINE_SYSCALL,
michael@0 23
michael@0 24 // Some architectures went through a transition from 32-bit to
michael@0 25 // 64-bit off_t and had to version all the syscalls that referenced
michael@0 26 // it; others (newer and/or 64-bit ones) didn't. Adjust the
michael@0 27 // conditional as needed.
michael@0 28 #if SYSCALL_EXISTS(stat64)
michael@0 29 #define ALLOW_SYSCALL_LARGEFILE(plain, versioned) ALLOW_SYSCALL(versioned)
michael@0 30 #else
michael@0 31 #define ALLOW_SYSCALL_LARGEFILE(plain, versioned) ALLOW_SYSCALL(plain)
michael@0 32 #endif
michael@0 33
michael@0 34 /* Most used system calls should be at the top of the whitelist
michael@0 35 * for performance reasons. The whitelist BPF filter exits after
michael@0 36 * processing any ALLOW_SYSCALL macro.
michael@0 37 *
michael@0 38 * How are those syscalls found?
michael@0 39 * 1) via strace -p <child pid> or/and
michael@0 40 * 2) with MOZ_CONTENT_SANDBOX_REPORTER set, the child will report which system call
michael@0 41 * has been denied by seccomp-bpf, just before exiting, via NSPR.
michael@0 42 * System call number to name mapping is found in:
michael@0 43 * bionic/libc/kernel/arch-arm/asm/unistd.h
michael@0 44 * or your libc's unistd.h/kernel headers.
michael@0 45 *
michael@0 46 * Current list order has been optimized through manual guess-work.
michael@0 47 * It could be further optimized by analyzing the output of:
michael@0 48 * 'strace -c -p <child pid>' for most used web apps.
michael@0 49 */
michael@0 50
michael@0 51 ALLOW_SYSCALL(futex),
michael@0 52 // FIXME, bug 920372: i386 multiplexes all the socket-related
michael@0 53 // interfaces into a single syscall. We should check the selector.
michael@0 54 #if SYSCALL_EXISTS(socketcall)
michael@0 55 ALLOW_SYSCALL(socketcall),
michael@0 56 #else
michael@0 57 ALLOW_SYSCALL(recvmsg),
michael@0 58 ALLOW_SYSCALL(sendmsg),
michael@0 59 #endif
michael@0 60
michael@0 61 // mmap2 is a little different from most off_t users, because it's
michael@0 62 // passed in a register (so it's a problem for even a "new" 32-bit
michael@0 63 // arch) -- and the workaround, mmap2, passes a page offset instead.
michael@0 64 #if SYSCALL_EXISTS(mmap2)
michael@0 65 ALLOW_SYSCALL(mmap2),
michael@0 66 #else
michael@0 67 ALLOW_SYSCALL(mmap),
michael@0 68 #endif
michael@0 69
michael@0 70 /* B2G specific high-frequency syscalls */
michael@0 71 #ifdef MOZ_WIDGET_GONK
michael@0 72 ALLOW_SYSCALL(clock_gettime),
michael@0 73 ALLOW_SYSCALL(epoll_wait),
michael@0 74 ALLOW_SYSCALL(gettimeofday),
michael@0 75 #endif
michael@0 76 ALLOW_SYSCALL(read),
michael@0 77 ALLOW_SYSCALL(write),
michael@0 78 // 32-bit lseek is used, at least on Android, to implement ANSI fseek.
michael@0 79 #if SYSCALL_EXISTS(_llseek)
michael@0 80 ALLOW_SYSCALL(_llseek),
michael@0 81 #endif
michael@0 82 ALLOW_SYSCALL(lseek),
michael@0 83 // Android also uses 32-bit ftruncate.
michael@0 84 ALLOW_SYSCALL(ftruncate),
michael@0 85 #if SYSCALL_EXISTS(ftruncate64)
michael@0 86 ALLOW_SYSCALL(ftruncate64),
michael@0 87 #endif
michael@0 88
michael@0 89 /* ioctl() is for GL. Remove when GL proxy is implemented.
michael@0 90 * Additionally ioctl() might be a place where we want to have
michael@0 91 * argument filtering */
michael@0 92 ALLOW_SYSCALL(ioctl),
michael@0 93 ALLOW_SYSCALL(close),
michael@0 94 ALLOW_SYSCALL(munmap),
michael@0 95 ALLOW_SYSCALL(mprotect),
michael@0 96 ALLOW_SYSCALL(writev),
michael@0 97 ALLOW_SYSCALL(clone),
michael@0 98 ALLOW_SYSCALL(brk),
michael@0 99 #if SYSCALL_EXISTS(set_thread_area)
michael@0 100 ALLOW_SYSCALL(set_thread_area),
michael@0 101 #endif
michael@0 102
michael@0 103 ALLOW_SYSCALL(getpid),
michael@0 104 ALLOW_SYSCALL(gettid),
michael@0 105 ALLOW_SYSCALL(getrusage),
michael@0 106 ALLOW_SYSCALL(madvise),
michael@0 107 ALLOW_SYSCALL(dup),
michael@0 108 ALLOW_SYSCALL(nanosleep),
michael@0 109 ALLOW_SYSCALL(poll),
michael@0 110 // select()'s arguments used to be passed by pointer as a struct.
michael@0 111 #if SYSCALL_EXISTS(_newselect)
michael@0 112 ALLOW_SYSCALL(_newselect),
michael@0 113 #else
michael@0 114 ALLOW_SYSCALL(select),
michael@0 115 #endif
michael@0 116 // Some archs used to have 16-bit uid/gid instead of 32-bit.
michael@0 117 #if SYSCALL_EXISTS(getuid32)
michael@0 118 ALLOW_SYSCALL(getuid32),
michael@0 119 ALLOW_SYSCALL(geteuid32),
michael@0 120 #else
michael@0 121 ALLOW_SYSCALL(getuid),
michael@0 122 ALLOW_SYSCALL(geteuid),
michael@0 123 #endif
michael@0 124 // Some newer archs (e.g., x64 and x32) have only rt_sigreturn, but
michael@0 125 // ARM has and uses both syscalls -- rt_sigreturn for SA_SIGINFO
michael@0 126 // handlers and classic sigreturn otherwise.
michael@0 127 #if SYSCALL_EXISTS(sigreturn)
michael@0 128 ALLOW_SYSCALL(sigreturn),
michael@0 129 #endif
michael@0 130 ALLOW_SYSCALL(rt_sigreturn),
michael@0 131 ALLOW_SYSCALL_LARGEFILE(fcntl, fcntl64),
michael@0 132
michael@0 133 /* Must remove all of the following in the future, when no longer used */
michael@0 134 /* open() is for some legacy APIs such as font loading. */
michael@0 135 /* See bug 906996 for removing unlink(). */
michael@0 136 ALLOW_SYSCALL_LARGEFILE(fstat, fstat64),
michael@0 137 ALLOW_SYSCALL_LARGEFILE(stat, stat64),
michael@0 138 ALLOW_SYSCALL_LARGEFILE(lstat, lstat64),
michael@0 139 // FIXME, bug 920372: see above.
michael@0 140 #if !SYSCALL_EXISTS(socketcall)
michael@0 141 ALLOW_SYSCALL(socketpair),
michael@0 142 DENY_SYSCALL(socket, EACCES),
michael@0 143 #endif
michael@0 144 ALLOW_SYSCALL(open),
michael@0 145 ALLOW_SYSCALL(readlink), /* Workaround for bug 964455 */
michael@0 146 ALLOW_SYSCALL(prctl),
michael@0 147 ALLOW_SYSCALL(access),
michael@0 148 ALLOW_SYSCALL(unlink),
michael@0 149 ALLOW_SYSCALL(fsync),
michael@0 150 ALLOW_SYSCALL(msync),
michael@0 151
michael@0 152 /* Should remove all of the following in the future, if possible */
michael@0 153 ALLOW_SYSCALL(getpriority),
michael@0 154 ALLOW_SYSCALL(sched_get_priority_min),
michael@0 155 ALLOW_SYSCALL(sched_get_priority_max),
michael@0 156 ALLOW_SYSCALL(setpriority),
michael@0 157 // rt_sigprocmask is passed the sigset_t size. On older archs,
michael@0 158 // sigprocmask is a compatibility shim that assumes the pre-RT size.
michael@0 159 #if SYSCALL_EXISTS(sigprocmask)
michael@0 160 ALLOW_SYSCALL(sigprocmask),
michael@0 161 #endif
michael@0 162 ALLOW_SYSCALL(rt_sigprocmask),
michael@0 163
michael@0 164 /* System calls used by the profiler */
michael@0 165 #ifdef MOZ_PROFILING
michael@0 166 ALLOW_SYSCALL(tgkill),
michael@0 167 #endif
michael@0 168
michael@0 169 /* B2G specific low-frequency syscalls */
michael@0 170 #ifdef MOZ_WIDGET_GONK
michael@0 171 #if !SYSCALL_EXISTS(socketcall)
michael@0 172 ALLOW_SYSCALL(sendto),
michael@0 173 ALLOW_SYSCALL(recvfrom),
michael@0 174 #endif
michael@0 175 ALLOW_SYSCALL_LARGEFILE(getdents, getdents64),
michael@0 176 ALLOW_SYSCALL(epoll_ctl),
michael@0 177 ALLOW_SYSCALL(sched_yield),
michael@0 178 ALLOW_SYSCALL(sched_getscheduler),
michael@0 179 ALLOW_SYSCALL(sched_setscheduler),
michael@0 180 ALLOW_SYSCALL(sigaltstack),
michael@0 181 #endif
michael@0 182
michael@0 183 /* Always last and always OK calls */
michael@0 184 /* Architecture-specific very infrequently used syscalls */
michael@0 185 #if SYSCALL_EXISTS(sigaction)
michael@0 186 ALLOW_SYSCALL(sigaction),
michael@0 187 #endif
michael@0 188 ALLOW_SYSCALL(rt_sigaction),
michael@0 189 #ifdef ALLOW_ARM_SYSCALL
michael@0 190 ALLOW_ARM_SYSCALL(breakpoint),
michael@0 191 ALLOW_ARM_SYSCALL(cacheflush),
michael@0 192 ALLOW_ARM_SYSCALL(usr26),
michael@0 193 ALLOW_ARM_SYSCALL(usr32),
michael@0 194 ALLOW_ARM_SYSCALL(set_tls),
michael@0 195 #endif
michael@0 196
michael@0 197 /* restart_syscall is called internally, generally when debugging */
michael@0 198 ALLOW_SYSCALL(restart_syscall),
michael@0 199
michael@0 200 /* linux desktop is not as performance critical as B2G */
michael@0 201 /* we can place desktop syscalls at the end */
michael@0 202 #ifndef MOZ_WIDGET_GONK
michael@0 203 ALLOW_SYSCALL(stat),
michael@0 204 ALLOW_SYSCALL(getdents),
michael@0 205 ALLOW_SYSCALL(lstat),
michael@0 206 ALLOW_SYSCALL(mmap),
michael@0 207 ALLOW_SYSCALL(openat),
michael@0 208 ALLOW_SYSCALL(fcntl),
michael@0 209 ALLOW_SYSCALL(fstat),
michael@0 210 ALLOW_SYSCALL(readlink),
michael@0 211 ALLOW_SYSCALL(getsockname),
michael@0 212 ALLOW_SYSCALL(getuid),
michael@0 213 ALLOW_SYSCALL(geteuid),
michael@0 214 ALLOW_SYSCALL(mkdir),
michael@0 215 ALLOW_SYSCALL(getcwd),
michael@0 216 ALLOW_SYSCALL(readahead),
michael@0 217 ALLOW_SYSCALL(pread64),
michael@0 218 ALLOW_SYSCALL(statfs),
michael@0 219 ALLOW_SYSCALL(pipe),
michael@0 220 ALLOW_SYSCALL(getrlimit),
michael@0 221 ALLOW_SYSCALL(shutdown),
michael@0 222 ALLOW_SYSCALL(getpeername),
michael@0 223 ALLOW_SYSCALL(eventfd2),
michael@0 224 ALLOW_SYSCALL(clock_getres),
michael@0 225 ALLOW_SYSCALL(sysinfo),
michael@0 226 ALLOW_SYSCALL(getresuid),
michael@0 227 ALLOW_SYSCALL(umask),
michael@0 228 ALLOW_SYSCALL(getresgid),
michael@0 229 ALLOW_SYSCALL(poll),
michael@0 230 ALLOW_SYSCALL(getegid),
michael@0 231 ALLOW_SYSCALL(inotify_init1),
michael@0 232 ALLOW_SYSCALL(wait4),
michael@0 233 ALLOW_SYSCALL(shmctl),
michael@0 234 ALLOW_SYSCALL(set_robust_list),
michael@0 235 ALLOW_SYSCALL(rmdir),
michael@0 236 ALLOW_SYSCALL(recvfrom),
michael@0 237 ALLOW_SYSCALL(shmdt),
michael@0 238 ALLOW_SYSCALL(pipe2),
michael@0 239 ALLOW_SYSCALL(setsockopt),
michael@0 240 ALLOW_SYSCALL(shmat),
michael@0 241 ALLOW_SYSCALL(set_tid_address),
michael@0 242 ALLOW_SYSCALL(inotify_add_watch),
michael@0 243 ALLOW_SYSCALL(rt_sigprocmask),
michael@0 244 ALLOW_SYSCALL(shmget),
michael@0 245 ALLOW_SYSCALL(getgid),
michael@0 246 ALLOW_SYSCALL(utime),
michael@0 247 ALLOW_SYSCALL(arch_prctl),
michael@0 248 ALLOW_SYSCALL(sched_getaffinity),
michael@0 249 /* We should remove all of the following in the future (possibly even more) */
michael@0 250 ALLOW_SYSCALL(socket),
michael@0 251 ALLOW_SYSCALL(chmod),
michael@0 252 ALLOW_SYSCALL(execve),
michael@0 253 ALLOW_SYSCALL(rename),
michael@0 254 ALLOW_SYSCALL(symlink),
michael@0 255 ALLOW_SYSCALL(connect),
michael@0 256 ALLOW_SYSCALL(quotactl),
michael@0 257 ALLOW_SYSCALL(kill),
michael@0 258 ALLOW_SYSCALL(sendto),
michael@0 259 #endif
michael@0 260
michael@0 261 /* nsSystemInfo uses uname (and we cache an instance, so */
michael@0 262 /* the info remains present even if we block the syscall) */
michael@0 263 ALLOW_SYSCALL(uname),
michael@0 264 ALLOW_SYSCALL(exit_group),
michael@0 265 ALLOW_SYSCALL(exit),
michael@0 266
michael@0 267 #ifdef MOZ_CONTENT_SANDBOX_REPORTER
michael@0 268 TRAP_PROCESS,
michael@0 269 #else
michael@0 270 KILL_PROCESS,
michael@0 271 #endif
michael@0 272 };
michael@0 273
michael@0 274 static struct sock_fprog seccomp_prog = {
michael@0 275 (unsigned short)MOZ_ARRAY_LENGTH(seccomp_filter),
michael@0 276 seccomp_filter,
michael@0 277 };
michael@0 278
michael@0 279 const sock_fprog*
michael@0 280 GetSandboxFilter()
michael@0 281 {
michael@0 282 return &seccomp_prog;
michael@0 283 }
michael@0 284
michael@0 285 }

mercurial