1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/security/sandbox/linux/SandboxFilter.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,285 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this file, 1.8 + * You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#include "SandboxFilter.h" 1.11 + 1.12 +#include "linux_seccomp.h" 1.13 +#include "linux_syscalls.h" 1.14 + 1.15 +#include "mozilla/ArrayUtils.h" 1.16 + 1.17 +#include <errno.h> 1.18 + 1.19 +namespace mozilla { 1.20 + 1.21 +#define SYSCALL_EXISTS(name) defined(__NR_##name) 1.22 + 1.23 +static struct sock_filter seccomp_filter[] = { 1.24 + VALIDATE_ARCHITECTURE, 1.25 + EXAMINE_SYSCALL, 1.26 + 1.27 + // Some architectures went through a transition from 32-bit to 1.28 + // 64-bit off_t and had to version all the syscalls that referenced 1.29 + // it; others (newer and/or 64-bit ones) didn't. Adjust the 1.30 + // conditional as needed. 1.31 +#if SYSCALL_EXISTS(stat64) 1.32 +#define ALLOW_SYSCALL_LARGEFILE(plain, versioned) ALLOW_SYSCALL(versioned) 1.33 +#else 1.34 +#define ALLOW_SYSCALL_LARGEFILE(plain, versioned) ALLOW_SYSCALL(plain) 1.35 +#endif 1.36 + 1.37 + /* Most used system calls should be at the top of the whitelist 1.38 + * for performance reasons. The whitelist BPF filter exits after 1.39 + * processing any ALLOW_SYSCALL macro. 1.40 + * 1.41 + * How are those syscalls found? 1.42 + * 1) via strace -p <child pid> or/and 1.43 + * 2) with MOZ_CONTENT_SANDBOX_REPORTER set, the child will report which system call 1.44 + * has been denied by seccomp-bpf, just before exiting, via NSPR. 1.45 + * System call number to name mapping is found in: 1.46 + * bionic/libc/kernel/arch-arm/asm/unistd.h 1.47 + * or your libc's unistd.h/kernel headers. 1.48 + * 1.49 + * Current list order has been optimized through manual guess-work. 1.50 + * It could be further optimized by analyzing the output of: 1.51 + * 'strace -c -p <child pid>' for most used web apps. 1.52 + */ 1.53 + 1.54 + ALLOW_SYSCALL(futex), 1.55 + // FIXME, bug 920372: i386 multiplexes all the socket-related 1.56 + // interfaces into a single syscall. We should check the selector. 1.57 +#if SYSCALL_EXISTS(socketcall) 1.58 + ALLOW_SYSCALL(socketcall), 1.59 +#else 1.60 + ALLOW_SYSCALL(recvmsg), 1.61 + ALLOW_SYSCALL(sendmsg), 1.62 +#endif 1.63 + 1.64 + // mmap2 is a little different from most off_t users, because it's 1.65 + // passed in a register (so it's a problem for even a "new" 32-bit 1.66 + // arch) -- and the workaround, mmap2, passes a page offset instead. 1.67 +#if SYSCALL_EXISTS(mmap2) 1.68 + ALLOW_SYSCALL(mmap2), 1.69 +#else 1.70 + ALLOW_SYSCALL(mmap), 1.71 +#endif 1.72 + 1.73 + /* B2G specific high-frequency syscalls */ 1.74 +#ifdef MOZ_WIDGET_GONK 1.75 + ALLOW_SYSCALL(clock_gettime), 1.76 + ALLOW_SYSCALL(epoll_wait), 1.77 + ALLOW_SYSCALL(gettimeofday), 1.78 +#endif 1.79 + ALLOW_SYSCALL(read), 1.80 + ALLOW_SYSCALL(write), 1.81 + // 32-bit lseek is used, at least on Android, to implement ANSI fseek. 1.82 +#if SYSCALL_EXISTS(_llseek) 1.83 + ALLOW_SYSCALL(_llseek), 1.84 +#endif 1.85 + ALLOW_SYSCALL(lseek), 1.86 + // Android also uses 32-bit ftruncate. 1.87 + ALLOW_SYSCALL(ftruncate), 1.88 +#if SYSCALL_EXISTS(ftruncate64) 1.89 + ALLOW_SYSCALL(ftruncate64), 1.90 +#endif 1.91 + 1.92 + /* ioctl() is for GL. Remove when GL proxy is implemented. 1.93 + * Additionally ioctl() might be a place where we want to have 1.94 + * argument filtering */ 1.95 + ALLOW_SYSCALL(ioctl), 1.96 + ALLOW_SYSCALL(close), 1.97 + ALLOW_SYSCALL(munmap), 1.98 + ALLOW_SYSCALL(mprotect), 1.99 + ALLOW_SYSCALL(writev), 1.100 + ALLOW_SYSCALL(clone), 1.101 + ALLOW_SYSCALL(brk), 1.102 +#if SYSCALL_EXISTS(set_thread_area) 1.103 + ALLOW_SYSCALL(set_thread_area), 1.104 +#endif 1.105 + 1.106 + ALLOW_SYSCALL(getpid), 1.107 + ALLOW_SYSCALL(gettid), 1.108 + ALLOW_SYSCALL(getrusage), 1.109 + ALLOW_SYSCALL(madvise), 1.110 + ALLOW_SYSCALL(dup), 1.111 + ALLOW_SYSCALL(nanosleep), 1.112 + ALLOW_SYSCALL(poll), 1.113 + // select()'s arguments used to be passed by pointer as a struct. 1.114 +#if SYSCALL_EXISTS(_newselect) 1.115 + ALLOW_SYSCALL(_newselect), 1.116 +#else 1.117 + ALLOW_SYSCALL(select), 1.118 +#endif 1.119 + // Some archs used to have 16-bit uid/gid instead of 32-bit. 1.120 +#if SYSCALL_EXISTS(getuid32) 1.121 + ALLOW_SYSCALL(getuid32), 1.122 + ALLOW_SYSCALL(geteuid32), 1.123 +#else 1.124 + ALLOW_SYSCALL(getuid), 1.125 + ALLOW_SYSCALL(geteuid), 1.126 +#endif 1.127 + // Some newer archs (e.g., x64 and x32) have only rt_sigreturn, but 1.128 + // ARM has and uses both syscalls -- rt_sigreturn for SA_SIGINFO 1.129 + // handlers and classic sigreturn otherwise. 1.130 +#if SYSCALL_EXISTS(sigreturn) 1.131 + ALLOW_SYSCALL(sigreturn), 1.132 +#endif 1.133 + ALLOW_SYSCALL(rt_sigreturn), 1.134 + ALLOW_SYSCALL_LARGEFILE(fcntl, fcntl64), 1.135 + 1.136 + /* Must remove all of the following in the future, when no longer used */ 1.137 + /* open() is for some legacy APIs such as font loading. */ 1.138 + /* See bug 906996 for removing unlink(). */ 1.139 + ALLOW_SYSCALL_LARGEFILE(fstat, fstat64), 1.140 + ALLOW_SYSCALL_LARGEFILE(stat, stat64), 1.141 + ALLOW_SYSCALL_LARGEFILE(lstat, lstat64), 1.142 + // FIXME, bug 920372: see above. 1.143 +#if !SYSCALL_EXISTS(socketcall) 1.144 + ALLOW_SYSCALL(socketpair), 1.145 + DENY_SYSCALL(socket, EACCES), 1.146 +#endif 1.147 + ALLOW_SYSCALL(open), 1.148 + ALLOW_SYSCALL(readlink), /* Workaround for bug 964455 */ 1.149 + ALLOW_SYSCALL(prctl), 1.150 + ALLOW_SYSCALL(access), 1.151 + ALLOW_SYSCALL(unlink), 1.152 + ALLOW_SYSCALL(fsync), 1.153 + ALLOW_SYSCALL(msync), 1.154 + 1.155 + /* Should remove all of the following in the future, if possible */ 1.156 + ALLOW_SYSCALL(getpriority), 1.157 + ALLOW_SYSCALL(sched_get_priority_min), 1.158 + ALLOW_SYSCALL(sched_get_priority_max), 1.159 + ALLOW_SYSCALL(setpriority), 1.160 + // rt_sigprocmask is passed the sigset_t size. On older archs, 1.161 + // sigprocmask is a compatibility shim that assumes the pre-RT size. 1.162 +#if SYSCALL_EXISTS(sigprocmask) 1.163 + ALLOW_SYSCALL(sigprocmask), 1.164 +#endif 1.165 + ALLOW_SYSCALL(rt_sigprocmask), 1.166 + 1.167 + /* System calls used by the profiler */ 1.168 +#ifdef MOZ_PROFILING 1.169 + ALLOW_SYSCALL(tgkill), 1.170 +#endif 1.171 + 1.172 + /* B2G specific low-frequency syscalls */ 1.173 +#ifdef MOZ_WIDGET_GONK 1.174 +#if !SYSCALL_EXISTS(socketcall) 1.175 + ALLOW_SYSCALL(sendto), 1.176 + ALLOW_SYSCALL(recvfrom), 1.177 +#endif 1.178 + ALLOW_SYSCALL_LARGEFILE(getdents, getdents64), 1.179 + ALLOW_SYSCALL(epoll_ctl), 1.180 + ALLOW_SYSCALL(sched_yield), 1.181 + ALLOW_SYSCALL(sched_getscheduler), 1.182 + ALLOW_SYSCALL(sched_setscheduler), 1.183 + ALLOW_SYSCALL(sigaltstack), 1.184 +#endif 1.185 + 1.186 + /* Always last and always OK calls */ 1.187 + /* Architecture-specific very infrequently used syscalls */ 1.188 +#if SYSCALL_EXISTS(sigaction) 1.189 + ALLOW_SYSCALL(sigaction), 1.190 +#endif 1.191 + ALLOW_SYSCALL(rt_sigaction), 1.192 +#ifdef ALLOW_ARM_SYSCALL 1.193 + ALLOW_ARM_SYSCALL(breakpoint), 1.194 + ALLOW_ARM_SYSCALL(cacheflush), 1.195 + ALLOW_ARM_SYSCALL(usr26), 1.196 + ALLOW_ARM_SYSCALL(usr32), 1.197 + ALLOW_ARM_SYSCALL(set_tls), 1.198 +#endif 1.199 + 1.200 + /* restart_syscall is called internally, generally when debugging */ 1.201 + ALLOW_SYSCALL(restart_syscall), 1.202 + 1.203 + /* linux desktop is not as performance critical as B2G */ 1.204 + /* we can place desktop syscalls at the end */ 1.205 +#ifndef MOZ_WIDGET_GONK 1.206 + ALLOW_SYSCALL(stat), 1.207 + ALLOW_SYSCALL(getdents), 1.208 + ALLOW_SYSCALL(lstat), 1.209 + ALLOW_SYSCALL(mmap), 1.210 + ALLOW_SYSCALL(openat), 1.211 + ALLOW_SYSCALL(fcntl), 1.212 + ALLOW_SYSCALL(fstat), 1.213 + ALLOW_SYSCALL(readlink), 1.214 + ALLOW_SYSCALL(getsockname), 1.215 + ALLOW_SYSCALL(getuid), 1.216 + ALLOW_SYSCALL(geteuid), 1.217 + ALLOW_SYSCALL(mkdir), 1.218 + ALLOW_SYSCALL(getcwd), 1.219 + ALLOW_SYSCALL(readahead), 1.220 + ALLOW_SYSCALL(pread64), 1.221 + ALLOW_SYSCALL(statfs), 1.222 + ALLOW_SYSCALL(pipe), 1.223 + ALLOW_SYSCALL(getrlimit), 1.224 + ALLOW_SYSCALL(shutdown), 1.225 + ALLOW_SYSCALL(getpeername), 1.226 + ALLOW_SYSCALL(eventfd2), 1.227 + ALLOW_SYSCALL(clock_getres), 1.228 + ALLOW_SYSCALL(sysinfo), 1.229 + ALLOW_SYSCALL(getresuid), 1.230 + ALLOW_SYSCALL(umask), 1.231 + ALLOW_SYSCALL(getresgid), 1.232 + ALLOW_SYSCALL(poll), 1.233 + ALLOW_SYSCALL(getegid), 1.234 + ALLOW_SYSCALL(inotify_init1), 1.235 + ALLOW_SYSCALL(wait4), 1.236 + ALLOW_SYSCALL(shmctl), 1.237 + ALLOW_SYSCALL(set_robust_list), 1.238 + ALLOW_SYSCALL(rmdir), 1.239 + ALLOW_SYSCALL(recvfrom), 1.240 + ALLOW_SYSCALL(shmdt), 1.241 + ALLOW_SYSCALL(pipe2), 1.242 + ALLOW_SYSCALL(setsockopt), 1.243 + ALLOW_SYSCALL(shmat), 1.244 + ALLOW_SYSCALL(set_tid_address), 1.245 + ALLOW_SYSCALL(inotify_add_watch), 1.246 + ALLOW_SYSCALL(rt_sigprocmask), 1.247 + ALLOW_SYSCALL(shmget), 1.248 + ALLOW_SYSCALL(getgid), 1.249 + ALLOW_SYSCALL(utime), 1.250 + ALLOW_SYSCALL(arch_prctl), 1.251 + ALLOW_SYSCALL(sched_getaffinity), 1.252 + /* We should remove all of the following in the future (possibly even more) */ 1.253 + ALLOW_SYSCALL(socket), 1.254 + ALLOW_SYSCALL(chmod), 1.255 + ALLOW_SYSCALL(execve), 1.256 + ALLOW_SYSCALL(rename), 1.257 + ALLOW_SYSCALL(symlink), 1.258 + ALLOW_SYSCALL(connect), 1.259 + ALLOW_SYSCALL(quotactl), 1.260 + ALLOW_SYSCALL(kill), 1.261 + ALLOW_SYSCALL(sendto), 1.262 +#endif 1.263 + 1.264 + /* nsSystemInfo uses uname (and we cache an instance, so */ 1.265 + /* the info remains present even if we block the syscall) */ 1.266 + ALLOW_SYSCALL(uname), 1.267 + ALLOW_SYSCALL(exit_group), 1.268 + ALLOW_SYSCALL(exit), 1.269 + 1.270 +#ifdef MOZ_CONTENT_SANDBOX_REPORTER 1.271 + TRAP_PROCESS, 1.272 +#else 1.273 + KILL_PROCESS, 1.274 +#endif 1.275 +}; 1.276 + 1.277 +static struct sock_fprog seccomp_prog = { 1.278 + (unsigned short)MOZ_ARRAY_LENGTH(seccomp_filter), 1.279 + seccomp_filter, 1.280 +}; 1.281 + 1.282 +const sock_fprog* 1.283 +GetSandboxFilter() 1.284 +{ 1.285 + return &seccomp_prog; 1.286 +} 1.287 + 1.288 +}