|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this file, |
|
5 * You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #include "SandboxFilter.h" |
|
8 |
|
9 #include "linux_seccomp.h" |
|
10 #include "linux_syscalls.h" |
|
11 |
|
12 #include "mozilla/ArrayUtils.h" |
|
13 |
|
14 #include <errno.h> |
|
15 |
|
16 namespace mozilla { |
|
17 |
|
18 #define SYSCALL_EXISTS(name) defined(__NR_##name) |
|
19 |
|
20 static struct sock_filter seccomp_filter[] = { |
|
21 VALIDATE_ARCHITECTURE, |
|
22 EXAMINE_SYSCALL, |
|
23 |
|
24 // Some architectures went through a transition from 32-bit to |
|
25 // 64-bit off_t and had to version all the syscalls that referenced |
|
26 // it; others (newer and/or 64-bit ones) didn't. Adjust the |
|
27 // conditional as needed. |
|
28 #if SYSCALL_EXISTS(stat64) |
|
29 #define ALLOW_SYSCALL_LARGEFILE(plain, versioned) ALLOW_SYSCALL(versioned) |
|
30 #else |
|
31 #define ALLOW_SYSCALL_LARGEFILE(plain, versioned) ALLOW_SYSCALL(plain) |
|
32 #endif |
|
33 |
|
34 /* Most used system calls should be at the top of the whitelist |
|
35 * for performance reasons. The whitelist BPF filter exits after |
|
36 * processing any ALLOW_SYSCALL macro. |
|
37 * |
|
38 * How are those syscalls found? |
|
39 * 1) via strace -p <child pid> or/and |
|
40 * 2) with MOZ_CONTENT_SANDBOX_REPORTER set, the child will report which system call |
|
41 * has been denied by seccomp-bpf, just before exiting, via NSPR. |
|
42 * System call number to name mapping is found in: |
|
43 * bionic/libc/kernel/arch-arm/asm/unistd.h |
|
44 * or your libc's unistd.h/kernel headers. |
|
45 * |
|
46 * Current list order has been optimized through manual guess-work. |
|
47 * It could be further optimized by analyzing the output of: |
|
48 * 'strace -c -p <child pid>' for most used web apps. |
|
49 */ |
|
50 |
|
51 ALLOW_SYSCALL(futex), |
|
52 // FIXME, bug 920372: i386 multiplexes all the socket-related |
|
53 // interfaces into a single syscall. We should check the selector. |
|
54 #if SYSCALL_EXISTS(socketcall) |
|
55 ALLOW_SYSCALL(socketcall), |
|
56 #else |
|
57 ALLOW_SYSCALL(recvmsg), |
|
58 ALLOW_SYSCALL(sendmsg), |
|
59 #endif |
|
60 |
|
61 // mmap2 is a little different from most off_t users, because it's |
|
62 // passed in a register (so it's a problem for even a "new" 32-bit |
|
63 // arch) -- and the workaround, mmap2, passes a page offset instead. |
|
64 #if SYSCALL_EXISTS(mmap2) |
|
65 ALLOW_SYSCALL(mmap2), |
|
66 #else |
|
67 ALLOW_SYSCALL(mmap), |
|
68 #endif |
|
69 |
|
70 /* B2G specific high-frequency syscalls */ |
|
71 #ifdef MOZ_WIDGET_GONK |
|
72 ALLOW_SYSCALL(clock_gettime), |
|
73 ALLOW_SYSCALL(epoll_wait), |
|
74 ALLOW_SYSCALL(gettimeofday), |
|
75 #endif |
|
76 ALLOW_SYSCALL(read), |
|
77 ALLOW_SYSCALL(write), |
|
78 // 32-bit lseek is used, at least on Android, to implement ANSI fseek. |
|
79 #if SYSCALL_EXISTS(_llseek) |
|
80 ALLOW_SYSCALL(_llseek), |
|
81 #endif |
|
82 ALLOW_SYSCALL(lseek), |
|
83 // Android also uses 32-bit ftruncate. |
|
84 ALLOW_SYSCALL(ftruncate), |
|
85 #if SYSCALL_EXISTS(ftruncate64) |
|
86 ALLOW_SYSCALL(ftruncate64), |
|
87 #endif |
|
88 |
|
89 /* ioctl() is for GL. Remove when GL proxy is implemented. |
|
90 * Additionally ioctl() might be a place where we want to have |
|
91 * argument filtering */ |
|
92 ALLOW_SYSCALL(ioctl), |
|
93 ALLOW_SYSCALL(close), |
|
94 ALLOW_SYSCALL(munmap), |
|
95 ALLOW_SYSCALL(mprotect), |
|
96 ALLOW_SYSCALL(writev), |
|
97 ALLOW_SYSCALL(clone), |
|
98 ALLOW_SYSCALL(brk), |
|
99 #if SYSCALL_EXISTS(set_thread_area) |
|
100 ALLOW_SYSCALL(set_thread_area), |
|
101 #endif |
|
102 |
|
103 ALLOW_SYSCALL(getpid), |
|
104 ALLOW_SYSCALL(gettid), |
|
105 ALLOW_SYSCALL(getrusage), |
|
106 ALLOW_SYSCALL(madvise), |
|
107 ALLOW_SYSCALL(dup), |
|
108 ALLOW_SYSCALL(nanosleep), |
|
109 ALLOW_SYSCALL(poll), |
|
110 // select()'s arguments used to be passed by pointer as a struct. |
|
111 #if SYSCALL_EXISTS(_newselect) |
|
112 ALLOW_SYSCALL(_newselect), |
|
113 #else |
|
114 ALLOW_SYSCALL(select), |
|
115 #endif |
|
116 // Some archs used to have 16-bit uid/gid instead of 32-bit. |
|
117 #if SYSCALL_EXISTS(getuid32) |
|
118 ALLOW_SYSCALL(getuid32), |
|
119 ALLOW_SYSCALL(geteuid32), |
|
120 #else |
|
121 ALLOW_SYSCALL(getuid), |
|
122 ALLOW_SYSCALL(geteuid), |
|
123 #endif |
|
124 // Some newer archs (e.g., x64 and x32) have only rt_sigreturn, but |
|
125 // ARM has and uses both syscalls -- rt_sigreturn for SA_SIGINFO |
|
126 // handlers and classic sigreturn otherwise. |
|
127 #if SYSCALL_EXISTS(sigreturn) |
|
128 ALLOW_SYSCALL(sigreturn), |
|
129 #endif |
|
130 ALLOW_SYSCALL(rt_sigreturn), |
|
131 ALLOW_SYSCALL_LARGEFILE(fcntl, fcntl64), |
|
132 |
|
133 /* Must remove all of the following in the future, when no longer used */ |
|
134 /* open() is for some legacy APIs such as font loading. */ |
|
135 /* See bug 906996 for removing unlink(). */ |
|
136 ALLOW_SYSCALL_LARGEFILE(fstat, fstat64), |
|
137 ALLOW_SYSCALL_LARGEFILE(stat, stat64), |
|
138 ALLOW_SYSCALL_LARGEFILE(lstat, lstat64), |
|
139 // FIXME, bug 920372: see above. |
|
140 #if !SYSCALL_EXISTS(socketcall) |
|
141 ALLOW_SYSCALL(socketpair), |
|
142 DENY_SYSCALL(socket, EACCES), |
|
143 #endif |
|
144 ALLOW_SYSCALL(open), |
|
145 ALLOW_SYSCALL(readlink), /* Workaround for bug 964455 */ |
|
146 ALLOW_SYSCALL(prctl), |
|
147 ALLOW_SYSCALL(access), |
|
148 ALLOW_SYSCALL(unlink), |
|
149 ALLOW_SYSCALL(fsync), |
|
150 ALLOW_SYSCALL(msync), |
|
151 |
|
152 /* Should remove all of the following in the future, if possible */ |
|
153 ALLOW_SYSCALL(getpriority), |
|
154 ALLOW_SYSCALL(sched_get_priority_min), |
|
155 ALLOW_SYSCALL(sched_get_priority_max), |
|
156 ALLOW_SYSCALL(setpriority), |
|
157 // rt_sigprocmask is passed the sigset_t size. On older archs, |
|
158 // sigprocmask is a compatibility shim that assumes the pre-RT size. |
|
159 #if SYSCALL_EXISTS(sigprocmask) |
|
160 ALLOW_SYSCALL(sigprocmask), |
|
161 #endif |
|
162 ALLOW_SYSCALL(rt_sigprocmask), |
|
163 |
|
164 /* System calls used by the profiler */ |
|
165 #ifdef MOZ_PROFILING |
|
166 ALLOW_SYSCALL(tgkill), |
|
167 #endif |
|
168 |
|
169 /* B2G specific low-frequency syscalls */ |
|
170 #ifdef MOZ_WIDGET_GONK |
|
171 #if !SYSCALL_EXISTS(socketcall) |
|
172 ALLOW_SYSCALL(sendto), |
|
173 ALLOW_SYSCALL(recvfrom), |
|
174 #endif |
|
175 ALLOW_SYSCALL_LARGEFILE(getdents, getdents64), |
|
176 ALLOW_SYSCALL(epoll_ctl), |
|
177 ALLOW_SYSCALL(sched_yield), |
|
178 ALLOW_SYSCALL(sched_getscheduler), |
|
179 ALLOW_SYSCALL(sched_setscheduler), |
|
180 ALLOW_SYSCALL(sigaltstack), |
|
181 #endif |
|
182 |
|
183 /* Always last and always OK calls */ |
|
184 /* Architecture-specific very infrequently used syscalls */ |
|
185 #if SYSCALL_EXISTS(sigaction) |
|
186 ALLOW_SYSCALL(sigaction), |
|
187 #endif |
|
188 ALLOW_SYSCALL(rt_sigaction), |
|
189 #ifdef ALLOW_ARM_SYSCALL |
|
190 ALLOW_ARM_SYSCALL(breakpoint), |
|
191 ALLOW_ARM_SYSCALL(cacheflush), |
|
192 ALLOW_ARM_SYSCALL(usr26), |
|
193 ALLOW_ARM_SYSCALL(usr32), |
|
194 ALLOW_ARM_SYSCALL(set_tls), |
|
195 #endif |
|
196 |
|
197 /* restart_syscall is called internally, generally when debugging */ |
|
198 ALLOW_SYSCALL(restart_syscall), |
|
199 |
|
200 /* linux desktop is not as performance critical as B2G */ |
|
201 /* we can place desktop syscalls at the end */ |
|
202 #ifndef MOZ_WIDGET_GONK |
|
203 ALLOW_SYSCALL(stat), |
|
204 ALLOW_SYSCALL(getdents), |
|
205 ALLOW_SYSCALL(lstat), |
|
206 ALLOW_SYSCALL(mmap), |
|
207 ALLOW_SYSCALL(openat), |
|
208 ALLOW_SYSCALL(fcntl), |
|
209 ALLOW_SYSCALL(fstat), |
|
210 ALLOW_SYSCALL(readlink), |
|
211 ALLOW_SYSCALL(getsockname), |
|
212 ALLOW_SYSCALL(getuid), |
|
213 ALLOW_SYSCALL(geteuid), |
|
214 ALLOW_SYSCALL(mkdir), |
|
215 ALLOW_SYSCALL(getcwd), |
|
216 ALLOW_SYSCALL(readahead), |
|
217 ALLOW_SYSCALL(pread64), |
|
218 ALLOW_SYSCALL(statfs), |
|
219 ALLOW_SYSCALL(pipe), |
|
220 ALLOW_SYSCALL(getrlimit), |
|
221 ALLOW_SYSCALL(shutdown), |
|
222 ALLOW_SYSCALL(getpeername), |
|
223 ALLOW_SYSCALL(eventfd2), |
|
224 ALLOW_SYSCALL(clock_getres), |
|
225 ALLOW_SYSCALL(sysinfo), |
|
226 ALLOW_SYSCALL(getresuid), |
|
227 ALLOW_SYSCALL(umask), |
|
228 ALLOW_SYSCALL(getresgid), |
|
229 ALLOW_SYSCALL(poll), |
|
230 ALLOW_SYSCALL(getegid), |
|
231 ALLOW_SYSCALL(inotify_init1), |
|
232 ALLOW_SYSCALL(wait4), |
|
233 ALLOW_SYSCALL(shmctl), |
|
234 ALLOW_SYSCALL(set_robust_list), |
|
235 ALLOW_SYSCALL(rmdir), |
|
236 ALLOW_SYSCALL(recvfrom), |
|
237 ALLOW_SYSCALL(shmdt), |
|
238 ALLOW_SYSCALL(pipe2), |
|
239 ALLOW_SYSCALL(setsockopt), |
|
240 ALLOW_SYSCALL(shmat), |
|
241 ALLOW_SYSCALL(set_tid_address), |
|
242 ALLOW_SYSCALL(inotify_add_watch), |
|
243 ALLOW_SYSCALL(rt_sigprocmask), |
|
244 ALLOW_SYSCALL(shmget), |
|
245 ALLOW_SYSCALL(getgid), |
|
246 ALLOW_SYSCALL(utime), |
|
247 ALLOW_SYSCALL(arch_prctl), |
|
248 ALLOW_SYSCALL(sched_getaffinity), |
|
249 /* We should remove all of the following in the future (possibly even more) */ |
|
250 ALLOW_SYSCALL(socket), |
|
251 ALLOW_SYSCALL(chmod), |
|
252 ALLOW_SYSCALL(execve), |
|
253 ALLOW_SYSCALL(rename), |
|
254 ALLOW_SYSCALL(symlink), |
|
255 ALLOW_SYSCALL(connect), |
|
256 ALLOW_SYSCALL(quotactl), |
|
257 ALLOW_SYSCALL(kill), |
|
258 ALLOW_SYSCALL(sendto), |
|
259 #endif |
|
260 |
|
261 /* nsSystemInfo uses uname (and we cache an instance, so */ |
|
262 /* the info remains present even if we block the syscall) */ |
|
263 ALLOW_SYSCALL(uname), |
|
264 ALLOW_SYSCALL(exit_group), |
|
265 ALLOW_SYSCALL(exit), |
|
266 |
|
267 #ifdef MOZ_CONTENT_SANDBOX_REPORTER |
|
268 TRAP_PROCESS, |
|
269 #else |
|
270 KILL_PROCESS, |
|
271 #endif |
|
272 }; |
|
273 |
|
274 static struct sock_fprog seccomp_prog = { |
|
275 (unsigned short)MOZ_ARRAY_LENGTH(seccomp_filter), |
|
276 seccomp_filter, |
|
277 }; |
|
278 |
|
279 const sock_fprog* |
|
280 GetSandboxFilter() |
|
281 { |
|
282 return &seccomp_prog; |
|
283 } |
|
284 |
|
285 } |