Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
michael@0 | 3 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, |
michael@0 | 5 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 6 | |
michael@0 | 7 | #include "SandboxFilter.h" |
michael@0 | 8 | |
michael@0 | 9 | #include "linux_seccomp.h" |
michael@0 | 10 | #include "linux_syscalls.h" |
michael@0 | 11 | |
michael@0 | 12 | #include "mozilla/ArrayUtils.h" |
michael@0 | 13 | |
michael@0 | 14 | #include <errno.h> |
michael@0 | 15 | |
michael@0 | 16 | namespace mozilla { |
michael@0 | 17 | |
michael@0 | 18 | #define SYSCALL_EXISTS(name) defined(__NR_##name) |
michael@0 | 19 | |
michael@0 | 20 | static struct sock_filter seccomp_filter[] = { |
michael@0 | 21 | VALIDATE_ARCHITECTURE, |
michael@0 | 22 | EXAMINE_SYSCALL, |
michael@0 | 23 | |
michael@0 | 24 | // Some architectures went through a transition from 32-bit to |
michael@0 | 25 | // 64-bit off_t and had to version all the syscalls that referenced |
michael@0 | 26 | // it; others (newer and/or 64-bit ones) didn't. Adjust the |
michael@0 | 27 | // conditional as needed. |
michael@0 | 28 | #if SYSCALL_EXISTS(stat64) |
michael@0 | 29 | #define ALLOW_SYSCALL_LARGEFILE(plain, versioned) ALLOW_SYSCALL(versioned) |
michael@0 | 30 | #else |
michael@0 | 31 | #define ALLOW_SYSCALL_LARGEFILE(plain, versioned) ALLOW_SYSCALL(plain) |
michael@0 | 32 | #endif |
michael@0 | 33 | |
michael@0 | 34 | /* Most used system calls should be at the top of the whitelist |
michael@0 | 35 | * for performance reasons. The whitelist BPF filter exits after |
michael@0 | 36 | * processing any ALLOW_SYSCALL macro. |
michael@0 | 37 | * |
michael@0 | 38 | * How are those syscalls found? |
michael@0 | 39 | * 1) via strace -p <child pid> or/and |
michael@0 | 40 | * 2) with MOZ_CONTENT_SANDBOX_REPORTER set, the child will report which system call |
michael@0 | 41 | * has been denied by seccomp-bpf, just before exiting, via NSPR. |
michael@0 | 42 | * System call number to name mapping is found in: |
michael@0 | 43 | * bionic/libc/kernel/arch-arm/asm/unistd.h |
michael@0 | 44 | * or your libc's unistd.h/kernel headers. |
michael@0 | 45 | * |
michael@0 | 46 | * Current list order has been optimized through manual guess-work. |
michael@0 | 47 | * It could be further optimized by analyzing the output of: |
michael@0 | 48 | * 'strace -c -p <child pid>' for most used web apps. |
michael@0 | 49 | */ |
michael@0 | 50 | |
michael@0 | 51 | ALLOW_SYSCALL(futex), |
michael@0 | 52 | // FIXME, bug 920372: i386 multiplexes all the socket-related |
michael@0 | 53 | // interfaces into a single syscall. We should check the selector. |
michael@0 | 54 | #if SYSCALL_EXISTS(socketcall) |
michael@0 | 55 | ALLOW_SYSCALL(socketcall), |
michael@0 | 56 | #else |
michael@0 | 57 | ALLOW_SYSCALL(recvmsg), |
michael@0 | 58 | ALLOW_SYSCALL(sendmsg), |
michael@0 | 59 | #endif |
michael@0 | 60 | |
michael@0 | 61 | // mmap2 is a little different from most off_t users, because it's |
michael@0 | 62 | // passed in a register (so it's a problem for even a "new" 32-bit |
michael@0 | 63 | // arch) -- and the workaround, mmap2, passes a page offset instead. |
michael@0 | 64 | #if SYSCALL_EXISTS(mmap2) |
michael@0 | 65 | ALLOW_SYSCALL(mmap2), |
michael@0 | 66 | #else |
michael@0 | 67 | ALLOW_SYSCALL(mmap), |
michael@0 | 68 | #endif |
michael@0 | 69 | |
michael@0 | 70 | /* B2G specific high-frequency syscalls */ |
michael@0 | 71 | #ifdef MOZ_WIDGET_GONK |
michael@0 | 72 | ALLOW_SYSCALL(clock_gettime), |
michael@0 | 73 | ALLOW_SYSCALL(epoll_wait), |
michael@0 | 74 | ALLOW_SYSCALL(gettimeofday), |
michael@0 | 75 | #endif |
michael@0 | 76 | ALLOW_SYSCALL(read), |
michael@0 | 77 | ALLOW_SYSCALL(write), |
michael@0 | 78 | // 32-bit lseek is used, at least on Android, to implement ANSI fseek. |
michael@0 | 79 | #if SYSCALL_EXISTS(_llseek) |
michael@0 | 80 | ALLOW_SYSCALL(_llseek), |
michael@0 | 81 | #endif |
michael@0 | 82 | ALLOW_SYSCALL(lseek), |
michael@0 | 83 | // Android also uses 32-bit ftruncate. |
michael@0 | 84 | ALLOW_SYSCALL(ftruncate), |
michael@0 | 85 | #if SYSCALL_EXISTS(ftruncate64) |
michael@0 | 86 | ALLOW_SYSCALL(ftruncate64), |
michael@0 | 87 | #endif |
michael@0 | 88 | |
michael@0 | 89 | /* ioctl() is for GL. Remove when GL proxy is implemented. |
michael@0 | 90 | * Additionally ioctl() might be a place where we want to have |
michael@0 | 91 | * argument filtering */ |
michael@0 | 92 | ALLOW_SYSCALL(ioctl), |
michael@0 | 93 | ALLOW_SYSCALL(close), |
michael@0 | 94 | ALLOW_SYSCALL(munmap), |
michael@0 | 95 | ALLOW_SYSCALL(mprotect), |
michael@0 | 96 | ALLOW_SYSCALL(writev), |
michael@0 | 97 | ALLOW_SYSCALL(clone), |
michael@0 | 98 | ALLOW_SYSCALL(brk), |
michael@0 | 99 | #if SYSCALL_EXISTS(set_thread_area) |
michael@0 | 100 | ALLOW_SYSCALL(set_thread_area), |
michael@0 | 101 | #endif |
michael@0 | 102 | |
michael@0 | 103 | ALLOW_SYSCALL(getpid), |
michael@0 | 104 | ALLOW_SYSCALL(gettid), |
michael@0 | 105 | ALLOW_SYSCALL(getrusage), |
michael@0 | 106 | ALLOW_SYSCALL(madvise), |
michael@0 | 107 | ALLOW_SYSCALL(dup), |
michael@0 | 108 | ALLOW_SYSCALL(nanosleep), |
michael@0 | 109 | ALLOW_SYSCALL(poll), |
michael@0 | 110 | // select()'s arguments used to be passed by pointer as a struct. |
michael@0 | 111 | #if SYSCALL_EXISTS(_newselect) |
michael@0 | 112 | ALLOW_SYSCALL(_newselect), |
michael@0 | 113 | #else |
michael@0 | 114 | ALLOW_SYSCALL(select), |
michael@0 | 115 | #endif |
michael@0 | 116 | // Some archs used to have 16-bit uid/gid instead of 32-bit. |
michael@0 | 117 | #if SYSCALL_EXISTS(getuid32) |
michael@0 | 118 | ALLOW_SYSCALL(getuid32), |
michael@0 | 119 | ALLOW_SYSCALL(geteuid32), |
michael@0 | 120 | #else |
michael@0 | 121 | ALLOW_SYSCALL(getuid), |
michael@0 | 122 | ALLOW_SYSCALL(geteuid), |
michael@0 | 123 | #endif |
michael@0 | 124 | // Some newer archs (e.g., x64 and x32) have only rt_sigreturn, but |
michael@0 | 125 | // ARM has and uses both syscalls -- rt_sigreturn for SA_SIGINFO |
michael@0 | 126 | // handlers and classic sigreturn otherwise. |
michael@0 | 127 | #if SYSCALL_EXISTS(sigreturn) |
michael@0 | 128 | ALLOW_SYSCALL(sigreturn), |
michael@0 | 129 | #endif |
michael@0 | 130 | ALLOW_SYSCALL(rt_sigreturn), |
michael@0 | 131 | ALLOW_SYSCALL_LARGEFILE(fcntl, fcntl64), |
michael@0 | 132 | |
michael@0 | 133 | /* Must remove all of the following in the future, when no longer used */ |
michael@0 | 134 | /* open() is for some legacy APIs such as font loading. */ |
michael@0 | 135 | /* See bug 906996 for removing unlink(). */ |
michael@0 | 136 | ALLOW_SYSCALL_LARGEFILE(fstat, fstat64), |
michael@0 | 137 | ALLOW_SYSCALL_LARGEFILE(stat, stat64), |
michael@0 | 138 | ALLOW_SYSCALL_LARGEFILE(lstat, lstat64), |
michael@0 | 139 | // FIXME, bug 920372: see above. |
michael@0 | 140 | #if !SYSCALL_EXISTS(socketcall) |
michael@0 | 141 | ALLOW_SYSCALL(socketpair), |
michael@0 | 142 | DENY_SYSCALL(socket, EACCES), |
michael@0 | 143 | #endif |
michael@0 | 144 | ALLOW_SYSCALL(open), |
michael@0 | 145 | ALLOW_SYSCALL(readlink), /* Workaround for bug 964455 */ |
michael@0 | 146 | ALLOW_SYSCALL(prctl), |
michael@0 | 147 | ALLOW_SYSCALL(access), |
michael@0 | 148 | ALLOW_SYSCALL(unlink), |
michael@0 | 149 | ALLOW_SYSCALL(fsync), |
michael@0 | 150 | ALLOW_SYSCALL(msync), |
michael@0 | 151 | |
michael@0 | 152 | /* Should remove all of the following in the future, if possible */ |
michael@0 | 153 | ALLOW_SYSCALL(getpriority), |
michael@0 | 154 | ALLOW_SYSCALL(sched_get_priority_min), |
michael@0 | 155 | ALLOW_SYSCALL(sched_get_priority_max), |
michael@0 | 156 | ALLOW_SYSCALL(setpriority), |
michael@0 | 157 | // rt_sigprocmask is passed the sigset_t size. On older archs, |
michael@0 | 158 | // sigprocmask is a compatibility shim that assumes the pre-RT size. |
michael@0 | 159 | #if SYSCALL_EXISTS(sigprocmask) |
michael@0 | 160 | ALLOW_SYSCALL(sigprocmask), |
michael@0 | 161 | #endif |
michael@0 | 162 | ALLOW_SYSCALL(rt_sigprocmask), |
michael@0 | 163 | |
michael@0 | 164 | /* System calls used by the profiler */ |
michael@0 | 165 | #ifdef MOZ_PROFILING |
michael@0 | 166 | ALLOW_SYSCALL(tgkill), |
michael@0 | 167 | #endif |
michael@0 | 168 | |
michael@0 | 169 | /* B2G specific low-frequency syscalls */ |
michael@0 | 170 | #ifdef MOZ_WIDGET_GONK |
michael@0 | 171 | #if !SYSCALL_EXISTS(socketcall) |
michael@0 | 172 | ALLOW_SYSCALL(sendto), |
michael@0 | 173 | ALLOW_SYSCALL(recvfrom), |
michael@0 | 174 | #endif |
michael@0 | 175 | ALLOW_SYSCALL_LARGEFILE(getdents, getdents64), |
michael@0 | 176 | ALLOW_SYSCALL(epoll_ctl), |
michael@0 | 177 | ALLOW_SYSCALL(sched_yield), |
michael@0 | 178 | ALLOW_SYSCALL(sched_getscheduler), |
michael@0 | 179 | ALLOW_SYSCALL(sched_setscheduler), |
michael@0 | 180 | ALLOW_SYSCALL(sigaltstack), |
michael@0 | 181 | #endif |
michael@0 | 182 | |
michael@0 | 183 | /* Always last and always OK calls */ |
michael@0 | 184 | /* Architecture-specific very infrequently used syscalls */ |
michael@0 | 185 | #if SYSCALL_EXISTS(sigaction) |
michael@0 | 186 | ALLOW_SYSCALL(sigaction), |
michael@0 | 187 | #endif |
michael@0 | 188 | ALLOW_SYSCALL(rt_sigaction), |
michael@0 | 189 | #ifdef ALLOW_ARM_SYSCALL |
michael@0 | 190 | ALLOW_ARM_SYSCALL(breakpoint), |
michael@0 | 191 | ALLOW_ARM_SYSCALL(cacheflush), |
michael@0 | 192 | ALLOW_ARM_SYSCALL(usr26), |
michael@0 | 193 | ALLOW_ARM_SYSCALL(usr32), |
michael@0 | 194 | ALLOW_ARM_SYSCALL(set_tls), |
michael@0 | 195 | #endif |
michael@0 | 196 | |
michael@0 | 197 | /* restart_syscall is called internally, generally when debugging */ |
michael@0 | 198 | ALLOW_SYSCALL(restart_syscall), |
michael@0 | 199 | |
michael@0 | 200 | /* linux desktop is not as performance critical as B2G */ |
michael@0 | 201 | /* we can place desktop syscalls at the end */ |
michael@0 | 202 | #ifndef MOZ_WIDGET_GONK |
michael@0 | 203 | ALLOW_SYSCALL(stat), |
michael@0 | 204 | ALLOW_SYSCALL(getdents), |
michael@0 | 205 | ALLOW_SYSCALL(lstat), |
michael@0 | 206 | ALLOW_SYSCALL(mmap), |
michael@0 | 207 | ALLOW_SYSCALL(openat), |
michael@0 | 208 | ALLOW_SYSCALL(fcntl), |
michael@0 | 209 | ALLOW_SYSCALL(fstat), |
michael@0 | 210 | ALLOW_SYSCALL(readlink), |
michael@0 | 211 | ALLOW_SYSCALL(getsockname), |
michael@0 | 212 | ALLOW_SYSCALL(getuid), |
michael@0 | 213 | ALLOW_SYSCALL(geteuid), |
michael@0 | 214 | ALLOW_SYSCALL(mkdir), |
michael@0 | 215 | ALLOW_SYSCALL(getcwd), |
michael@0 | 216 | ALLOW_SYSCALL(readahead), |
michael@0 | 217 | ALLOW_SYSCALL(pread64), |
michael@0 | 218 | ALLOW_SYSCALL(statfs), |
michael@0 | 219 | ALLOW_SYSCALL(pipe), |
michael@0 | 220 | ALLOW_SYSCALL(getrlimit), |
michael@0 | 221 | ALLOW_SYSCALL(shutdown), |
michael@0 | 222 | ALLOW_SYSCALL(getpeername), |
michael@0 | 223 | ALLOW_SYSCALL(eventfd2), |
michael@0 | 224 | ALLOW_SYSCALL(clock_getres), |
michael@0 | 225 | ALLOW_SYSCALL(sysinfo), |
michael@0 | 226 | ALLOW_SYSCALL(getresuid), |
michael@0 | 227 | ALLOW_SYSCALL(umask), |
michael@0 | 228 | ALLOW_SYSCALL(getresgid), |
michael@0 | 229 | ALLOW_SYSCALL(poll), |
michael@0 | 230 | ALLOW_SYSCALL(getegid), |
michael@0 | 231 | ALLOW_SYSCALL(inotify_init1), |
michael@0 | 232 | ALLOW_SYSCALL(wait4), |
michael@0 | 233 | ALLOW_SYSCALL(shmctl), |
michael@0 | 234 | ALLOW_SYSCALL(set_robust_list), |
michael@0 | 235 | ALLOW_SYSCALL(rmdir), |
michael@0 | 236 | ALLOW_SYSCALL(recvfrom), |
michael@0 | 237 | ALLOW_SYSCALL(shmdt), |
michael@0 | 238 | ALLOW_SYSCALL(pipe2), |
michael@0 | 239 | ALLOW_SYSCALL(setsockopt), |
michael@0 | 240 | ALLOW_SYSCALL(shmat), |
michael@0 | 241 | ALLOW_SYSCALL(set_tid_address), |
michael@0 | 242 | ALLOW_SYSCALL(inotify_add_watch), |
michael@0 | 243 | ALLOW_SYSCALL(rt_sigprocmask), |
michael@0 | 244 | ALLOW_SYSCALL(shmget), |
michael@0 | 245 | ALLOW_SYSCALL(getgid), |
michael@0 | 246 | ALLOW_SYSCALL(utime), |
michael@0 | 247 | ALLOW_SYSCALL(arch_prctl), |
michael@0 | 248 | ALLOW_SYSCALL(sched_getaffinity), |
michael@0 | 249 | /* We should remove all of the following in the future (possibly even more) */ |
michael@0 | 250 | ALLOW_SYSCALL(socket), |
michael@0 | 251 | ALLOW_SYSCALL(chmod), |
michael@0 | 252 | ALLOW_SYSCALL(execve), |
michael@0 | 253 | ALLOW_SYSCALL(rename), |
michael@0 | 254 | ALLOW_SYSCALL(symlink), |
michael@0 | 255 | ALLOW_SYSCALL(connect), |
michael@0 | 256 | ALLOW_SYSCALL(quotactl), |
michael@0 | 257 | ALLOW_SYSCALL(kill), |
michael@0 | 258 | ALLOW_SYSCALL(sendto), |
michael@0 | 259 | #endif |
michael@0 | 260 | |
michael@0 | 261 | /* nsSystemInfo uses uname (and we cache an instance, so */ |
michael@0 | 262 | /* the info remains present even if we block the syscall) */ |
michael@0 | 263 | ALLOW_SYSCALL(uname), |
michael@0 | 264 | ALLOW_SYSCALL(exit_group), |
michael@0 | 265 | ALLOW_SYSCALL(exit), |
michael@0 | 266 | |
michael@0 | 267 | #ifdef MOZ_CONTENT_SANDBOX_REPORTER |
michael@0 | 268 | TRAP_PROCESS, |
michael@0 | 269 | #else |
michael@0 | 270 | KILL_PROCESS, |
michael@0 | 271 | #endif |
michael@0 | 272 | }; |
michael@0 | 273 | |
michael@0 | 274 | static struct sock_fprog seccomp_prog = { |
michael@0 | 275 | (unsigned short)MOZ_ARRAY_LENGTH(seccomp_filter), |
michael@0 | 276 | seccomp_filter, |
michael@0 | 277 | }; |
michael@0 | 278 | |
michael@0 | 279 | const sock_fprog* |
michael@0 | 280 | GetSandboxFilter() |
michael@0 | 281 | { |
michael@0 | 282 | return &seccomp_prog; |
michael@0 | 283 | } |
michael@0 | 284 | |
michael@0 | 285 | } |