michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set ts=8 sts=2 et sw=2 tw=80: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this file, michael@0: * You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "SandboxFilter.h" michael@0: michael@0: #include "linux_seccomp.h" michael@0: #include "linux_syscalls.h" michael@0: michael@0: #include "mozilla/ArrayUtils.h" michael@0: michael@0: #include michael@0: michael@0: namespace mozilla { michael@0: michael@0: #define SYSCALL_EXISTS(name) defined(__NR_##name) michael@0: michael@0: static struct sock_filter seccomp_filter[] = { michael@0: VALIDATE_ARCHITECTURE, michael@0: EXAMINE_SYSCALL, michael@0: michael@0: // Some architectures went through a transition from 32-bit to michael@0: // 64-bit off_t and had to version all the syscalls that referenced michael@0: // it; others (newer and/or 64-bit ones) didn't. Adjust the michael@0: // conditional as needed. michael@0: #if SYSCALL_EXISTS(stat64) michael@0: #define ALLOW_SYSCALL_LARGEFILE(plain, versioned) ALLOW_SYSCALL(versioned) michael@0: #else michael@0: #define ALLOW_SYSCALL_LARGEFILE(plain, versioned) ALLOW_SYSCALL(plain) michael@0: #endif michael@0: michael@0: /* Most used system calls should be at the top of the whitelist michael@0: * for performance reasons. The whitelist BPF filter exits after michael@0: * processing any ALLOW_SYSCALL macro. michael@0: * michael@0: * How are those syscalls found? michael@0: * 1) via strace -p or/and michael@0: * 2) with MOZ_CONTENT_SANDBOX_REPORTER set, the child will report which system call michael@0: * has been denied by seccomp-bpf, just before exiting, via NSPR. michael@0: * System call number to name mapping is found in: michael@0: * bionic/libc/kernel/arch-arm/asm/unistd.h michael@0: * or your libc's unistd.h/kernel headers. michael@0: * michael@0: * Current list order has been optimized through manual guess-work. michael@0: * It could be further optimized by analyzing the output of: michael@0: * 'strace -c -p ' for most used web apps. michael@0: */ michael@0: michael@0: ALLOW_SYSCALL(futex), michael@0: // FIXME, bug 920372: i386 multiplexes all the socket-related michael@0: // interfaces into a single syscall. We should check the selector. michael@0: #if SYSCALL_EXISTS(socketcall) michael@0: ALLOW_SYSCALL(socketcall), michael@0: #else michael@0: ALLOW_SYSCALL(recvmsg), michael@0: ALLOW_SYSCALL(sendmsg), michael@0: #endif michael@0: michael@0: // mmap2 is a little different from most off_t users, because it's michael@0: // passed in a register (so it's a problem for even a "new" 32-bit michael@0: // arch) -- and the workaround, mmap2, passes a page offset instead. michael@0: #if SYSCALL_EXISTS(mmap2) michael@0: ALLOW_SYSCALL(mmap2), michael@0: #else michael@0: ALLOW_SYSCALL(mmap), michael@0: #endif michael@0: michael@0: /* B2G specific high-frequency syscalls */ michael@0: #ifdef MOZ_WIDGET_GONK michael@0: ALLOW_SYSCALL(clock_gettime), michael@0: ALLOW_SYSCALL(epoll_wait), michael@0: ALLOW_SYSCALL(gettimeofday), michael@0: #endif michael@0: ALLOW_SYSCALL(read), michael@0: ALLOW_SYSCALL(write), michael@0: // 32-bit lseek is used, at least on Android, to implement ANSI fseek. michael@0: #if SYSCALL_EXISTS(_llseek) michael@0: ALLOW_SYSCALL(_llseek), michael@0: #endif michael@0: ALLOW_SYSCALL(lseek), michael@0: // Android also uses 32-bit ftruncate. michael@0: ALLOW_SYSCALL(ftruncate), michael@0: #if SYSCALL_EXISTS(ftruncate64) michael@0: ALLOW_SYSCALL(ftruncate64), michael@0: #endif michael@0: michael@0: /* ioctl() is for GL. Remove when GL proxy is implemented. michael@0: * Additionally ioctl() might be a place where we want to have michael@0: * argument filtering */ michael@0: ALLOW_SYSCALL(ioctl), michael@0: ALLOW_SYSCALL(close), michael@0: ALLOW_SYSCALL(munmap), michael@0: ALLOW_SYSCALL(mprotect), michael@0: ALLOW_SYSCALL(writev), michael@0: ALLOW_SYSCALL(clone), michael@0: ALLOW_SYSCALL(brk), michael@0: #if SYSCALL_EXISTS(set_thread_area) michael@0: ALLOW_SYSCALL(set_thread_area), michael@0: #endif michael@0: michael@0: ALLOW_SYSCALL(getpid), michael@0: ALLOW_SYSCALL(gettid), michael@0: ALLOW_SYSCALL(getrusage), michael@0: ALLOW_SYSCALL(madvise), michael@0: ALLOW_SYSCALL(dup), michael@0: ALLOW_SYSCALL(nanosleep), michael@0: ALLOW_SYSCALL(poll), michael@0: // select()'s arguments used to be passed by pointer as a struct. michael@0: #if SYSCALL_EXISTS(_newselect) michael@0: ALLOW_SYSCALL(_newselect), michael@0: #else michael@0: ALLOW_SYSCALL(select), michael@0: #endif michael@0: // Some archs used to have 16-bit uid/gid instead of 32-bit. michael@0: #if SYSCALL_EXISTS(getuid32) michael@0: ALLOW_SYSCALL(getuid32), michael@0: ALLOW_SYSCALL(geteuid32), michael@0: #else michael@0: ALLOW_SYSCALL(getuid), michael@0: ALLOW_SYSCALL(geteuid), michael@0: #endif michael@0: // Some newer archs (e.g., x64 and x32) have only rt_sigreturn, but michael@0: // ARM has and uses both syscalls -- rt_sigreturn for SA_SIGINFO michael@0: // handlers and classic sigreturn otherwise. michael@0: #if SYSCALL_EXISTS(sigreturn) michael@0: ALLOW_SYSCALL(sigreturn), michael@0: #endif michael@0: ALLOW_SYSCALL(rt_sigreturn), michael@0: ALLOW_SYSCALL_LARGEFILE(fcntl, fcntl64), michael@0: michael@0: /* Must remove all of the following in the future, when no longer used */ michael@0: /* open() is for some legacy APIs such as font loading. */ michael@0: /* See bug 906996 for removing unlink(). */ michael@0: ALLOW_SYSCALL_LARGEFILE(fstat, fstat64), michael@0: ALLOW_SYSCALL_LARGEFILE(stat, stat64), michael@0: ALLOW_SYSCALL_LARGEFILE(lstat, lstat64), michael@0: // FIXME, bug 920372: see above. michael@0: #if !SYSCALL_EXISTS(socketcall) michael@0: ALLOW_SYSCALL(socketpair), michael@0: DENY_SYSCALL(socket, EACCES), michael@0: #endif michael@0: ALLOW_SYSCALL(open), michael@0: ALLOW_SYSCALL(readlink), /* Workaround for bug 964455 */ michael@0: ALLOW_SYSCALL(prctl), michael@0: ALLOW_SYSCALL(access), michael@0: ALLOW_SYSCALL(unlink), michael@0: ALLOW_SYSCALL(fsync), michael@0: ALLOW_SYSCALL(msync), michael@0: michael@0: /* Should remove all of the following in the future, if possible */ michael@0: ALLOW_SYSCALL(getpriority), michael@0: ALLOW_SYSCALL(sched_get_priority_min), michael@0: ALLOW_SYSCALL(sched_get_priority_max), michael@0: ALLOW_SYSCALL(setpriority), michael@0: // rt_sigprocmask is passed the sigset_t size. On older archs, michael@0: // sigprocmask is a compatibility shim that assumes the pre-RT size. michael@0: #if SYSCALL_EXISTS(sigprocmask) michael@0: ALLOW_SYSCALL(sigprocmask), michael@0: #endif michael@0: ALLOW_SYSCALL(rt_sigprocmask), michael@0: michael@0: /* System calls used by the profiler */ michael@0: #ifdef MOZ_PROFILING michael@0: ALLOW_SYSCALL(tgkill), michael@0: #endif michael@0: michael@0: /* B2G specific low-frequency syscalls */ michael@0: #ifdef MOZ_WIDGET_GONK michael@0: #if !SYSCALL_EXISTS(socketcall) michael@0: ALLOW_SYSCALL(sendto), michael@0: ALLOW_SYSCALL(recvfrom), michael@0: #endif michael@0: ALLOW_SYSCALL_LARGEFILE(getdents, getdents64), michael@0: ALLOW_SYSCALL(epoll_ctl), michael@0: ALLOW_SYSCALL(sched_yield), michael@0: ALLOW_SYSCALL(sched_getscheduler), michael@0: ALLOW_SYSCALL(sched_setscheduler), michael@0: ALLOW_SYSCALL(sigaltstack), michael@0: #endif michael@0: michael@0: /* Always last and always OK calls */ michael@0: /* Architecture-specific very infrequently used syscalls */ michael@0: #if SYSCALL_EXISTS(sigaction) michael@0: ALLOW_SYSCALL(sigaction), michael@0: #endif michael@0: ALLOW_SYSCALL(rt_sigaction), michael@0: #ifdef ALLOW_ARM_SYSCALL michael@0: ALLOW_ARM_SYSCALL(breakpoint), michael@0: ALLOW_ARM_SYSCALL(cacheflush), michael@0: ALLOW_ARM_SYSCALL(usr26), michael@0: ALLOW_ARM_SYSCALL(usr32), michael@0: ALLOW_ARM_SYSCALL(set_tls), michael@0: #endif michael@0: michael@0: /* restart_syscall is called internally, generally when debugging */ michael@0: ALLOW_SYSCALL(restart_syscall), michael@0: michael@0: /* linux desktop is not as performance critical as B2G */ michael@0: /* we can place desktop syscalls at the end */ michael@0: #ifndef MOZ_WIDGET_GONK michael@0: ALLOW_SYSCALL(stat), michael@0: ALLOW_SYSCALL(getdents), michael@0: ALLOW_SYSCALL(lstat), michael@0: ALLOW_SYSCALL(mmap), michael@0: ALLOW_SYSCALL(openat), michael@0: ALLOW_SYSCALL(fcntl), michael@0: ALLOW_SYSCALL(fstat), michael@0: ALLOW_SYSCALL(readlink), michael@0: ALLOW_SYSCALL(getsockname), michael@0: ALLOW_SYSCALL(getuid), michael@0: ALLOW_SYSCALL(geteuid), michael@0: ALLOW_SYSCALL(mkdir), michael@0: ALLOW_SYSCALL(getcwd), michael@0: ALLOW_SYSCALL(readahead), michael@0: ALLOW_SYSCALL(pread64), michael@0: ALLOW_SYSCALL(statfs), michael@0: ALLOW_SYSCALL(pipe), michael@0: ALLOW_SYSCALL(getrlimit), michael@0: ALLOW_SYSCALL(shutdown), michael@0: ALLOW_SYSCALL(getpeername), michael@0: ALLOW_SYSCALL(eventfd2), michael@0: ALLOW_SYSCALL(clock_getres), michael@0: ALLOW_SYSCALL(sysinfo), michael@0: ALLOW_SYSCALL(getresuid), michael@0: ALLOW_SYSCALL(umask), michael@0: ALLOW_SYSCALL(getresgid), michael@0: ALLOW_SYSCALL(poll), michael@0: ALLOW_SYSCALL(getegid), michael@0: ALLOW_SYSCALL(inotify_init1), michael@0: ALLOW_SYSCALL(wait4), michael@0: ALLOW_SYSCALL(shmctl), michael@0: ALLOW_SYSCALL(set_robust_list), michael@0: ALLOW_SYSCALL(rmdir), michael@0: ALLOW_SYSCALL(recvfrom), michael@0: ALLOW_SYSCALL(shmdt), michael@0: ALLOW_SYSCALL(pipe2), michael@0: ALLOW_SYSCALL(setsockopt), michael@0: ALLOW_SYSCALL(shmat), michael@0: ALLOW_SYSCALL(set_tid_address), michael@0: ALLOW_SYSCALL(inotify_add_watch), michael@0: ALLOW_SYSCALL(rt_sigprocmask), michael@0: ALLOW_SYSCALL(shmget), michael@0: ALLOW_SYSCALL(getgid), michael@0: ALLOW_SYSCALL(utime), michael@0: ALLOW_SYSCALL(arch_prctl), michael@0: ALLOW_SYSCALL(sched_getaffinity), michael@0: /* We should remove all of the following in the future (possibly even more) */ michael@0: ALLOW_SYSCALL(socket), michael@0: ALLOW_SYSCALL(chmod), michael@0: ALLOW_SYSCALL(execve), michael@0: ALLOW_SYSCALL(rename), michael@0: ALLOW_SYSCALL(symlink), michael@0: ALLOW_SYSCALL(connect), michael@0: ALLOW_SYSCALL(quotactl), michael@0: ALLOW_SYSCALL(kill), michael@0: ALLOW_SYSCALL(sendto), michael@0: #endif michael@0: michael@0: /* nsSystemInfo uses uname (and we cache an instance, so */ michael@0: /* the info remains present even if we block the syscall) */ michael@0: ALLOW_SYSCALL(uname), michael@0: ALLOW_SYSCALL(exit_group), michael@0: ALLOW_SYSCALL(exit), michael@0: michael@0: #ifdef MOZ_CONTENT_SANDBOX_REPORTER michael@0: TRAP_PROCESS, michael@0: #else michael@0: KILL_PROCESS, michael@0: #endif michael@0: }; michael@0: michael@0: static struct sock_fprog seccomp_prog = { michael@0: (unsigned short)MOZ_ARRAY_LENGTH(seccomp_filter), michael@0: seccomp_filter, michael@0: }; michael@0: michael@0: const sock_fprog* michael@0: GetSandboxFilter() michael@0: { michael@0: return &seccomp_prog; michael@0: } michael@0: michael@0: }