|
1 /* $OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $ */ |
|
2 |
|
3 /* |
|
4 * Copyright 2000-2007 Niels Provos <provos@citi.umich.edu> |
|
5 * Copyright 2007-2012 Niels Provos and Nick Mathewson |
|
6 * |
|
7 * Redistribution and use in source and binary forms, with or without |
|
8 * modification, are permitted provided that the following conditions |
|
9 * are met: |
|
10 * 1. Redistributions of source code must retain the above copyright |
|
11 * notice, this list of conditions and the following disclaimer. |
|
12 * 2. Redistributions in binary form must reproduce the above copyright |
|
13 * notice, this list of conditions and the following disclaimer in the |
|
14 * documentation and/or other materials provided with the distribution. |
|
15 * 3. The name of the author may not be used to endorse or promote products |
|
16 * derived from this software without specific prior written permission. |
|
17 * |
|
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
|
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
|
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
|
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
|
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
|
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
28 */ |
|
29 #include "event2/event-config.h" |
|
30 |
|
31 #define _GNU_SOURCE |
|
32 |
|
33 #include <sys/types.h> |
|
34 #ifdef _EVENT_HAVE_SYS_TIME_H |
|
35 #include <sys/time.h> |
|
36 #endif |
|
37 #include <sys/queue.h> |
|
38 #include <sys/event.h> |
|
39 #include <signal.h> |
|
40 #include <stdio.h> |
|
41 #include <stdlib.h> |
|
42 #include <string.h> |
|
43 #include <unistd.h> |
|
44 #include <errno.h> |
|
45 #ifdef _EVENT_HAVE_INTTYPES_H |
|
46 #include <inttypes.h> |
|
47 #endif |
|
48 |
|
49 /* Some platforms apparently define the udata field of struct kevent as |
|
50 * intptr_t, whereas others define it as void*. There doesn't seem to be an |
|
51 * easy way to tell them apart via autoconf, so we need to use OS macros. */ |
|
52 #if defined(_EVENT_HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__) |
|
53 #define PTR_TO_UDATA(x) ((intptr_t)(x)) |
|
54 #define INT_TO_UDATA(x) ((intptr_t)(x)) |
|
55 #else |
|
56 #define PTR_TO_UDATA(x) (x) |
|
57 #define INT_TO_UDATA(x) ((void*)(x)) |
|
58 #endif |
|
59 |
|
60 #include "event-internal.h" |
|
61 #include "log-internal.h" |
|
62 #include "evmap-internal.h" |
|
63 #include "event2/thread.h" |
|
64 #include "evthread-internal.h" |
|
65 #include "changelist-internal.h" |
|
66 |
|
67 #define NEVENT 64 |
|
68 |
|
69 struct kqop { |
|
70 struct kevent *changes; |
|
71 int changes_size; |
|
72 |
|
73 struct kevent *events; |
|
74 int events_size; |
|
75 int kq; |
|
76 pid_t pid; |
|
77 }; |
|
78 |
|
79 static void kqop_free(struct kqop *kqop); |
|
80 |
|
81 static void *kq_init(struct event_base *); |
|
82 static int kq_sig_add(struct event_base *, int, short, short, void *); |
|
83 static int kq_sig_del(struct event_base *, int, short, short, void *); |
|
84 static int kq_dispatch(struct event_base *, struct timeval *); |
|
85 static void kq_dealloc(struct event_base *); |
|
86 |
|
87 const struct eventop kqops = { |
|
88 "kqueue", |
|
89 kq_init, |
|
90 event_changelist_add, |
|
91 event_changelist_del, |
|
92 kq_dispatch, |
|
93 kq_dealloc, |
|
94 1 /* need reinit */, |
|
95 EV_FEATURE_ET|EV_FEATURE_O1|EV_FEATURE_FDS, |
|
96 EVENT_CHANGELIST_FDINFO_SIZE |
|
97 }; |
|
98 |
|
99 static const struct eventop kqsigops = { |
|
100 "kqueue_signal", |
|
101 NULL, |
|
102 kq_sig_add, |
|
103 kq_sig_del, |
|
104 NULL, |
|
105 NULL, |
|
106 1 /* need reinit */, |
|
107 0, |
|
108 0 |
|
109 }; |
|
110 |
|
111 static void * |
|
112 kq_init(struct event_base *base) |
|
113 { |
|
114 int kq = -1; |
|
115 struct kqop *kqueueop = NULL; |
|
116 |
|
117 if (!(kqueueop = mm_calloc(1, sizeof(struct kqop)))) |
|
118 return (NULL); |
|
119 |
|
120 /* Initialize the kernel queue */ |
|
121 |
|
122 if ((kq = kqueue()) == -1) { |
|
123 event_warn("kqueue"); |
|
124 goto err; |
|
125 } |
|
126 |
|
127 kqueueop->kq = kq; |
|
128 |
|
129 kqueueop->pid = getpid(); |
|
130 |
|
131 /* Initialize fields */ |
|
132 kqueueop->changes = mm_calloc(NEVENT, sizeof(struct kevent)); |
|
133 if (kqueueop->changes == NULL) |
|
134 goto err; |
|
135 kqueueop->events = mm_calloc(NEVENT, sizeof(struct kevent)); |
|
136 if (kqueueop->events == NULL) |
|
137 goto err; |
|
138 kqueueop->events_size = kqueueop->changes_size = NEVENT; |
|
139 |
|
140 /* Check for Mac OS X kqueue bug. */ |
|
141 memset(&kqueueop->changes[0], 0, sizeof kqueueop->changes[0]); |
|
142 kqueueop->changes[0].ident = -1; |
|
143 kqueueop->changes[0].filter = EVFILT_READ; |
|
144 kqueueop->changes[0].flags = EV_ADD; |
|
145 /* |
|
146 * If kqueue works, then kevent will succeed, and it will |
|
147 * stick an error in events[0]. If kqueue is broken, then |
|
148 * kevent will fail. |
|
149 */ |
|
150 if (kevent(kq, |
|
151 kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 || |
|
152 (int)kqueueop->events[0].ident != -1 || |
|
153 kqueueop->events[0].flags != EV_ERROR) { |
|
154 event_warn("%s: detected broken kqueue; not using.", __func__); |
|
155 goto err; |
|
156 } |
|
157 |
|
158 base->evsigsel = &kqsigops; |
|
159 |
|
160 return (kqueueop); |
|
161 err: |
|
162 if (kqueueop) |
|
163 kqop_free(kqueueop); |
|
164 |
|
165 return (NULL); |
|
166 } |
|
167 |
|
168 #define ADD_UDATA 0x30303 |
|
169 |
|
170 static void |
|
171 kq_setup_kevent(struct kevent *out, evutil_socket_t fd, int filter, short change) |
|
172 { |
|
173 memset(out, 0, sizeof(struct kevent)); |
|
174 out->ident = fd; |
|
175 out->filter = filter; |
|
176 |
|
177 if (change & EV_CHANGE_ADD) { |
|
178 out->flags = EV_ADD; |
|
179 /* We set a magic number here so that we can tell 'add' |
|
180 * errors from 'del' errors. */ |
|
181 out->udata = INT_TO_UDATA(ADD_UDATA); |
|
182 if (change & EV_ET) |
|
183 out->flags |= EV_CLEAR; |
|
184 #ifdef NOTE_EOF |
|
185 /* Make it behave like select() and poll() */ |
|
186 if (filter == EVFILT_READ) |
|
187 out->fflags = NOTE_EOF; |
|
188 #endif |
|
189 } else { |
|
190 EVUTIL_ASSERT(change & EV_CHANGE_DEL); |
|
191 out->flags = EV_DELETE; |
|
192 } |
|
193 } |
|
194 |
|
195 static int |
|
196 kq_build_changes_list(const struct event_changelist *changelist, |
|
197 struct kqop *kqop) |
|
198 { |
|
199 int i; |
|
200 int n_changes = 0; |
|
201 |
|
202 for (i = 0; i < changelist->n_changes; ++i) { |
|
203 struct event_change *in_ch = &changelist->changes[i]; |
|
204 struct kevent *out_ch; |
|
205 if (n_changes >= kqop->changes_size - 1) { |
|
206 int newsize = kqop->changes_size * 2; |
|
207 struct kevent *newchanges; |
|
208 |
|
209 newchanges = mm_realloc(kqop->changes, |
|
210 newsize * sizeof(struct kevent)); |
|
211 if (newchanges == NULL) { |
|
212 event_warn("%s: realloc", __func__); |
|
213 return (-1); |
|
214 } |
|
215 kqop->changes = newchanges; |
|
216 kqop->changes_size = newsize; |
|
217 } |
|
218 if (in_ch->read_change) { |
|
219 out_ch = &kqop->changes[n_changes++]; |
|
220 kq_setup_kevent(out_ch, in_ch->fd, EVFILT_READ, |
|
221 in_ch->read_change); |
|
222 } |
|
223 if (in_ch->write_change) { |
|
224 out_ch = &kqop->changes[n_changes++]; |
|
225 kq_setup_kevent(out_ch, in_ch->fd, EVFILT_WRITE, |
|
226 in_ch->write_change); |
|
227 } |
|
228 } |
|
229 return n_changes; |
|
230 } |
|
231 |
|
232 static int |
|
233 kq_grow_events(struct kqop *kqop, size_t new_size) |
|
234 { |
|
235 struct kevent *newresult; |
|
236 |
|
237 newresult = mm_realloc(kqop->events, |
|
238 new_size * sizeof(struct kevent)); |
|
239 |
|
240 if (newresult) { |
|
241 kqop->events = newresult; |
|
242 kqop->events_size = new_size; |
|
243 return 0; |
|
244 } else { |
|
245 return -1; |
|
246 } |
|
247 } |
|
248 |
|
249 static int |
|
250 kq_dispatch(struct event_base *base, struct timeval *tv) |
|
251 { |
|
252 struct kqop *kqop = base->evbase; |
|
253 struct kevent *events = kqop->events; |
|
254 struct kevent *changes; |
|
255 struct timespec ts, *ts_p = NULL; |
|
256 int i, n_changes, res; |
|
257 |
|
258 if (tv != NULL) { |
|
259 TIMEVAL_TO_TIMESPEC(tv, &ts); |
|
260 ts_p = &ts; |
|
261 } |
|
262 |
|
263 /* Build "changes" from "base->changes" */ |
|
264 EVUTIL_ASSERT(kqop->changes); |
|
265 n_changes = kq_build_changes_list(&base->changelist, kqop); |
|
266 if (n_changes < 0) |
|
267 return -1; |
|
268 |
|
269 event_changelist_remove_all(&base->changelist, base); |
|
270 |
|
271 /* steal the changes array in case some broken code tries to call |
|
272 * dispatch twice at once. */ |
|
273 changes = kqop->changes; |
|
274 kqop->changes = NULL; |
|
275 |
|
276 /* Make sure that 'events' is at least as long as the list of changes: |
|
277 * otherwise errors in the changes can get reported as a -1 return |
|
278 * value from kevent() rather than as EV_ERROR events in the events |
|
279 * array. |
|
280 * |
|
281 * (We could instead handle -1 return values from kevent() by |
|
282 * retrying with a smaller changes array or a larger events array, |
|
283 * but this approach seems less risky for now.) |
|
284 */ |
|
285 if (kqop->events_size < n_changes) { |
|
286 int new_size = kqop->events_size; |
|
287 do { |
|
288 new_size *= 2; |
|
289 } while (new_size < n_changes); |
|
290 |
|
291 kq_grow_events(kqop, new_size); |
|
292 events = kqop->events; |
|
293 } |
|
294 |
|
295 EVBASE_RELEASE_LOCK(base, th_base_lock); |
|
296 |
|
297 res = kevent(kqop->kq, changes, n_changes, |
|
298 events, kqop->events_size, ts_p); |
|
299 |
|
300 EVBASE_ACQUIRE_LOCK(base, th_base_lock); |
|
301 |
|
302 EVUTIL_ASSERT(kqop->changes == NULL); |
|
303 kqop->changes = changes; |
|
304 |
|
305 if (res == -1) { |
|
306 if (errno != EINTR) { |
|
307 event_warn("kevent"); |
|
308 return (-1); |
|
309 } |
|
310 |
|
311 return (0); |
|
312 } |
|
313 |
|
314 event_debug(("%s: kevent reports %d", __func__, res)); |
|
315 |
|
316 for (i = 0; i < res; i++) { |
|
317 int which = 0; |
|
318 |
|
319 if (events[i].flags & EV_ERROR) { |
|
320 switch (events[i].data) { |
|
321 |
|
322 /* Can occur on delete if we are not currently |
|
323 * watching any events on this fd. That can |
|
324 * happen when the fd was closed and another |
|
325 * file was opened with that fd. */ |
|
326 case ENOENT: |
|
327 /* Can occur for reasons not fully understood |
|
328 * on FreeBSD. */ |
|
329 case EINVAL: |
|
330 continue; |
|
331 |
|
332 /* Can occur on a delete if the fd is closed. */ |
|
333 case EBADF: |
|
334 /* XXXX On NetBSD, we can also get EBADF if we |
|
335 * try to add the write side of a pipe, but |
|
336 * the read side has already been closed. |
|
337 * Other BSDs call this situation 'EPIPE'. It |
|
338 * would be good if we had a way to report |
|
339 * this situation. */ |
|
340 continue; |
|
341 /* These two can occur on an add if the fd was one side |
|
342 * of a pipe, and the other side was closed. */ |
|
343 case EPERM: |
|
344 case EPIPE: |
|
345 /* Report read events, if we're listening for |
|
346 * them, so that the user can learn about any |
|
347 * add errors. (If the operation was a |
|
348 * delete, then udata should be cleared.) */ |
|
349 if (events[i].udata) { |
|
350 /* The operation was an add: |
|
351 * report the error as a read. */ |
|
352 which |= EV_READ; |
|
353 break; |
|
354 } else { |
|
355 /* The operation was a del: |
|
356 * report nothing. */ |
|
357 continue; |
|
358 } |
|
359 |
|
360 /* Other errors shouldn't occur. */ |
|
361 default: |
|
362 errno = events[i].data; |
|
363 return (-1); |
|
364 } |
|
365 } else if (events[i].filter == EVFILT_READ) { |
|
366 which |= EV_READ; |
|
367 } else if (events[i].filter == EVFILT_WRITE) { |
|
368 which |= EV_WRITE; |
|
369 } else if (events[i].filter == EVFILT_SIGNAL) { |
|
370 which |= EV_SIGNAL; |
|
371 } |
|
372 |
|
373 if (!which) |
|
374 continue; |
|
375 |
|
376 if (events[i].filter == EVFILT_SIGNAL) { |
|
377 evmap_signal_active(base, events[i].ident, 1); |
|
378 } else { |
|
379 evmap_io_active(base, events[i].ident, which | EV_ET); |
|
380 } |
|
381 } |
|
382 |
|
383 if (res == kqop->events_size) { |
|
384 /* We used all the events space that we have. Maybe we should |
|
385 make it bigger. */ |
|
386 kq_grow_events(kqop, kqop->events_size * 2); |
|
387 } |
|
388 |
|
389 return (0); |
|
390 } |
|
391 |
|
392 static void |
|
393 kqop_free(struct kqop *kqop) |
|
394 { |
|
395 if (kqop->changes) |
|
396 mm_free(kqop->changes); |
|
397 if (kqop->events) |
|
398 mm_free(kqop->events); |
|
399 if (kqop->kq >= 0 && kqop->pid == getpid()) |
|
400 close(kqop->kq); |
|
401 memset(kqop, 0, sizeof(struct kqop)); |
|
402 mm_free(kqop); |
|
403 } |
|
404 |
|
405 static void |
|
406 kq_dealloc(struct event_base *base) |
|
407 { |
|
408 struct kqop *kqop = base->evbase; |
|
409 evsig_dealloc(base); |
|
410 kqop_free(kqop); |
|
411 } |
|
412 |
|
413 /* signal handling */ |
|
414 static int |
|
415 kq_sig_add(struct event_base *base, int nsignal, short old, short events, void *p) |
|
416 { |
|
417 struct kqop *kqop = base->evbase; |
|
418 struct kevent kev; |
|
419 struct timespec timeout = { 0, 0 }; |
|
420 (void)p; |
|
421 |
|
422 EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG); |
|
423 |
|
424 memset(&kev, 0, sizeof(kev)); |
|
425 kev.ident = nsignal; |
|
426 kev.filter = EVFILT_SIGNAL; |
|
427 kev.flags = EV_ADD; |
|
428 |
|
429 /* Be ready for the signal if it is sent any |
|
430 * time between now and the next call to |
|
431 * kq_dispatch. */ |
|
432 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) |
|
433 return (-1); |
|
434 |
|
435 /* Backported from |
|
436 * https://github.com/nmathewson/Libevent/commit/148458e0a1fd25e167aa2ef229d1c9a70b27c3e9 */ |
|
437 /* We can set the handler for most signals to SIG_IGN and |
|
438 * still have them reported to us in the queue. However, |
|
439 * if the handler for SIGCHLD is SIG_IGN, the system reaps |
|
440 * zombie processes for us, and we don't get any notification. |
|
441 * This appears to be the only signal with this quirk. */ |
|
442 if (_evsig_set_handler(base, nsignal, |
|
443 nsignal == SIGCHLD ? SIG_DFL : SIG_IGN) == -1) { |
|
444 return (-1); |
|
445 } |
|
446 |
|
447 return (0); |
|
448 } |
|
449 |
|
450 static int |
|
451 kq_sig_del(struct event_base *base, int nsignal, short old, short events, void *p) |
|
452 { |
|
453 struct kqop *kqop = base->evbase; |
|
454 struct kevent kev; |
|
455 |
|
456 struct timespec timeout = { 0, 0 }; |
|
457 (void)p; |
|
458 |
|
459 EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG); |
|
460 |
|
461 memset(&kev, 0, sizeof(kev)); |
|
462 kev.ident = nsignal; |
|
463 kev.filter = EVFILT_SIGNAL; |
|
464 kev.flags = EV_DELETE; |
|
465 |
|
466 /* Because we insert signal events |
|
467 * immediately, we need to delete them |
|
468 * immediately, too */ |
|
469 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) |
|
470 return (-1); |
|
471 |
|
472 if (_evsig_restore_handler(base, nsignal) == -1) |
|
473 return (-1); |
|
474 |
|
475 return (0); |
|
476 } |