ipc/chromium/src/third_party/libevent/epoll.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 * Copyright 2000-2007 Niels Provos <provos@citi.umich.edu>
michael@0 3 * Copyright 2007-2012 Niels Provos, Nick Mathewson
michael@0 4 *
michael@0 5 * Redistribution and use in source and binary forms, with or without
michael@0 6 * modification, are permitted provided that the following conditions
michael@0 7 * are met:
michael@0 8 * 1. Redistributions of source code must retain the above copyright
michael@0 9 * notice, this list of conditions and the following disclaimer.
michael@0 10 * 2. Redistributions in binary form must reproduce the above copyright
michael@0 11 * notice, this list of conditions and the following disclaimer in the
michael@0 12 * documentation and/or other materials provided with the distribution.
michael@0 13 * 3. The name of the author may not be used to endorse or promote products
michael@0 14 * derived from this software without specific prior written permission.
michael@0 15 *
michael@0 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
michael@0 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
michael@0 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
michael@0 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
michael@0 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
michael@0 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
michael@0 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
michael@0 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
michael@0 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
michael@0 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
michael@0 26 */
michael@0 27 #include "event2/event-config.h"
michael@0 28
michael@0 29 #include <stdint.h>
michael@0 30 #include <sys/types.h>
michael@0 31 #include <sys/resource.h>
michael@0 32 #ifdef _EVENT_HAVE_SYS_TIME_H
michael@0 33 #include <sys/time.h>
michael@0 34 #endif
michael@0 35 #include <sys/queue.h>
michael@0 36 #include <sys/epoll.h>
michael@0 37 #include <signal.h>
michael@0 38 #include <limits.h>
michael@0 39 #include <stdio.h>
michael@0 40 #include <stdlib.h>
michael@0 41 #include <string.h>
michael@0 42 #include <unistd.h>
michael@0 43 #include <errno.h>
michael@0 44 #ifdef _EVENT_HAVE_FCNTL_H
michael@0 45 #include <fcntl.h>
michael@0 46 #endif
michael@0 47
michael@0 48 #include "event-internal.h"
michael@0 49 #include "evsignal-internal.h"
michael@0 50 #include "event2/thread.h"
michael@0 51 #include "evthread-internal.h"
michael@0 52 #include "log-internal.h"
michael@0 53 #include "evmap-internal.h"
michael@0 54 #include "changelist-internal.h"
michael@0 55
michael@0 56 struct epollop {
michael@0 57 struct epoll_event *events;
michael@0 58 int nevents;
michael@0 59 int epfd;
michael@0 60 };
michael@0 61
michael@0 62 static void *epoll_init(struct event_base *);
michael@0 63 static int epoll_dispatch(struct event_base *, struct timeval *);
michael@0 64 static void epoll_dealloc(struct event_base *);
michael@0 65
michael@0 66 static const struct eventop epollops_changelist = {
michael@0 67 "epoll (with changelist)",
michael@0 68 epoll_init,
michael@0 69 event_changelist_add,
michael@0 70 event_changelist_del,
michael@0 71 epoll_dispatch,
michael@0 72 epoll_dealloc,
michael@0 73 1, /* need reinit */
michael@0 74 EV_FEATURE_ET|EV_FEATURE_O1,
michael@0 75 EVENT_CHANGELIST_FDINFO_SIZE
michael@0 76 };
michael@0 77
michael@0 78
michael@0 79 static int epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
michael@0 80 short old, short events, void *p);
michael@0 81 static int epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
michael@0 82 short old, short events, void *p);
michael@0 83
michael@0 84 const struct eventop epollops = {
michael@0 85 "epoll",
michael@0 86 epoll_init,
michael@0 87 epoll_nochangelist_add,
michael@0 88 epoll_nochangelist_del,
michael@0 89 epoll_dispatch,
michael@0 90 epoll_dealloc,
michael@0 91 1, /* need reinit */
michael@0 92 EV_FEATURE_ET|EV_FEATURE_O1,
michael@0 93 0
michael@0 94 };
michael@0 95
michael@0 96 #define INITIAL_NEVENT 32
michael@0 97 #define MAX_NEVENT 4096
michael@0 98
michael@0 99 /* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout
michael@0 100 * values bigger than (LONG_MAX - 999ULL)/HZ. HZ in the wild can be
michael@0 101 * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the
michael@0 102 * largest number of msec we can support here is 2147482. Let's
michael@0 103 * round that down by 47 seconds.
michael@0 104 */
michael@0 105 #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)
michael@0 106
michael@0 107 static void *
michael@0 108 epoll_init(struct event_base *base)
michael@0 109 {
michael@0 110 int epfd;
michael@0 111 struct epollop *epollop;
michael@0 112
michael@0 113 /* Initialize the kernel queue. (The size field is ignored since
michael@0 114 * 2.6.8.) */
michael@0 115 if ((epfd = epoll_create(32000)) == -1) {
michael@0 116 if (errno != ENOSYS)
michael@0 117 event_warn("epoll_create");
michael@0 118 return (NULL);
michael@0 119 }
michael@0 120
michael@0 121 evutil_make_socket_closeonexec(epfd);
michael@0 122
michael@0 123 if (!(epollop = mm_calloc(1, sizeof(struct epollop)))) {
michael@0 124 close(epfd);
michael@0 125 return (NULL);
michael@0 126 }
michael@0 127
michael@0 128 epollop->epfd = epfd;
michael@0 129
michael@0 130 /* Initialize fields */
michael@0 131 epollop->events = mm_calloc(INITIAL_NEVENT, sizeof(struct epoll_event));
michael@0 132 if (epollop->events == NULL) {
michael@0 133 mm_free(epollop);
michael@0 134 close(epfd);
michael@0 135 return (NULL);
michael@0 136 }
michael@0 137 epollop->nevents = INITIAL_NEVENT;
michael@0 138
michael@0 139 if ((base->flags & EVENT_BASE_FLAG_EPOLL_USE_CHANGELIST) != 0 ||
michael@0 140 ((base->flags & EVENT_BASE_FLAG_IGNORE_ENV) == 0 &&
michael@0 141 evutil_getenv("EVENT_EPOLL_USE_CHANGELIST") != NULL))
michael@0 142 base->evsel = &epollops_changelist;
michael@0 143
michael@0 144 evsig_init(base);
michael@0 145
michael@0 146 return (epollop);
michael@0 147 }
michael@0 148
michael@0 149 static const char *
michael@0 150 change_to_string(int change)
michael@0 151 {
michael@0 152 change &= (EV_CHANGE_ADD|EV_CHANGE_DEL);
michael@0 153 if (change == EV_CHANGE_ADD) {
michael@0 154 return "add";
michael@0 155 } else if (change == EV_CHANGE_DEL) {
michael@0 156 return "del";
michael@0 157 } else if (change == 0) {
michael@0 158 return "none";
michael@0 159 } else {
michael@0 160 return "???";
michael@0 161 }
michael@0 162 }
michael@0 163
michael@0 164 static const char *
michael@0 165 epoll_op_to_string(int op)
michael@0 166 {
michael@0 167 return op == EPOLL_CTL_ADD?"ADD":
michael@0 168 op == EPOLL_CTL_DEL?"DEL":
michael@0 169 op == EPOLL_CTL_MOD?"MOD":
michael@0 170 "???";
michael@0 171 }
michael@0 172
michael@0 173 static int
michael@0 174 epoll_apply_one_change(struct event_base *base,
michael@0 175 struct epollop *epollop,
michael@0 176 const struct event_change *ch)
michael@0 177 {
michael@0 178 struct epoll_event epev;
michael@0 179 int op, events = 0;
michael@0 180
michael@0 181 if (1) {
michael@0 182 /* The logic here is a little tricky. If we had no events set
michael@0 183 on the fd before, we need to set op="ADD" and set
michael@0 184 events=the events we want to add. If we had any events set
michael@0 185 on the fd before, and we want any events to remain on the
michael@0 186 fd, we need to say op="MOD" and set events=the events we
michael@0 187 want to remain. But if we want to delete the last event,
michael@0 188 we say op="DEL" and set events=the remaining events. What
michael@0 189 fun!
michael@0 190 */
michael@0 191
michael@0 192 /* TODO: Turn this into a switch or a table lookup. */
michael@0 193
michael@0 194 if ((ch->read_change & EV_CHANGE_ADD) ||
michael@0 195 (ch->write_change & EV_CHANGE_ADD)) {
michael@0 196 /* If we are adding anything at all, we'll want to do
michael@0 197 * either an ADD or a MOD. */
michael@0 198 events = 0;
michael@0 199 op = EPOLL_CTL_ADD;
michael@0 200 if (ch->read_change & EV_CHANGE_ADD) {
michael@0 201 events |= EPOLLIN;
michael@0 202 } else if (ch->read_change & EV_CHANGE_DEL) {
michael@0 203 ;
michael@0 204 } else if (ch->old_events & EV_READ) {
michael@0 205 events |= EPOLLIN;
michael@0 206 }
michael@0 207 if (ch->write_change & EV_CHANGE_ADD) {
michael@0 208 events |= EPOLLOUT;
michael@0 209 } else if (ch->write_change & EV_CHANGE_DEL) {
michael@0 210 ;
michael@0 211 } else if (ch->old_events & EV_WRITE) {
michael@0 212 events |= EPOLLOUT;
michael@0 213 }
michael@0 214 if ((ch->read_change|ch->write_change) & EV_ET)
michael@0 215 events |= EPOLLET;
michael@0 216
michael@0 217 if (ch->old_events) {
michael@0 218 /* If MOD fails, we retry as an ADD, and if
michael@0 219 * ADD fails we will retry as a MOD. So the
michael@0 220 * only hard part here is to guess which one
michael@0 221 * will work. As a heuristic, we'll try
michael@0 222 * MOD first if we think there were old
michael@0 223 * events and ADD if we think there were none.
michael@0 224 *
michael@0 225 * We can be wrong about the MOD if the file
michael@0 226 * has in fact been closed and re-opened.
michael@0 227 *
michael@0 228 * We can be wrong about the ADD if the
michael@0 229 * the fd has been re-created with a dup()
michael@0 230 * of the same file that it was before.
michael@0 231 */
michael@0 232 op = EPOLL_CTL_MOD;
michael@0 233 }
michael@0 234 } else if ((ch->read_change & EV_CHANGE_DEL) ||
michael@0 235 (ch->write_change & EV_CHANGE_DEL)) {
michael@0 236 /* If we're deleting anything, we'll want to do a MOD
michael@0 237 * or a DEL. */
michael@0 238 op = EPOLL_CTL_DEL;
michael@0 239
michael@0 240 if (ch->read_change & EV_CHANGE_DEL) {
michael@0 241 if (ch->write_change & EV_CHANGE_DEL) {
michael@0 242 events = EPOLLIN|EPOLLOUT;
michael@0 243 } else if (ch->old_events & EV_WRITE) {
michael@0 244 events = EPOLLOUT;
michael@0 245 op = EPOLL_CTL_MOD;
michael@0 246 } else {
michael@0 247 events = EPOLLIN;
michael@0 248 }
michael@0 249 } else if (ch->write_change & EV_CHANGE_DEL) {
michael@0 250 if (ch->old_events & EV_READ) {
michael@0 251 events = EPOLLIN;
michael@0 252 op = EPOLL_CTL_MOD;
michael@0 253 } else {
michael@0 254 events = EPOLLOUT;
michael@0 255 }
michael@0 256 }
michael@0 257 }
michael@0 258
michael@0 259 if (!events)
michael@0 260 return 0;
michael@0 261
michael@0 262 memset(&epev, 0, sizeof(epev));
michael@0 263 epev.data.fd = ch->fd;
michael@0 264 epev.events = events;
michael@0 265 if (epoll_ctl(epollop->epfd, op, ch->fd, &epev) == -1) {
michael@0 266 if (op == EPOLL_CTL_MOD && errno == ENOENT) {
michael@0 267 /* If a MOD operation fails with ENOENT, the
michael@0 268 * fd was probably closed and re-opened. We
michael@0 269 * should retry the operation as an ADD.
michael@0 270 */
michael@0 271 if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, ch->fd, &epev) == -1) {
michael@0 272 event_warn("Epoll MOD(%d) on %d retried as ADD; that failed too",
michael@0 273 (int)epev.events, ch->fd);
michael@0 274 return -1;
michael@0 275 } else {
michael@0 276 event_debug(("Epoll MOD(%d) on %d retried as ADD; succeeded.",
michael@0 277 (int)epev.events,
michael@0 278 ch->fd));
michael@0 279 }
michael@0 280 } else if (op == EPOLL_CTL_ADD && errno == EEXIST) {
michael@0 281 /* If an ADD operation fails with EEXIST,
michael@0 282 * either the operation was redundant (as with a
michael@0 283 * precautionary add), or we ran into a fun
michael@0 284 * kernel bug where using dup*() to duplicate the
michael@0 285 * same file into the same fd gives you the same epitem
michael@0 286 * rather than a fresh one. For the second case,
michael@0 287 * we must retry with MOD. */
michael@0 288 if (epoll_ctl(epollop->epfd, EPOLL_CTL_MOD, ch->fd, &epev) == -1) {
michael@0 289 event_warn("Epoll ADD(%d) on %d retried as MOD; that failed too",
michael@0 290 (int)epev.events, ch->fd);
michael@0 291 return -1;
michael@0 292 } else {
michael@0 293 event_debug(("Epoll ADD(%d) on %d retried as MOD; succeeded.",
michael@0 294 (int)epev.events,
michael@0 295 ch->fd));
michael@0 296 }
michael@0 297 } else if (op == EPOLL_CTL_DEL &&
michael@0 298 (errno == ENOENT || errno == EBADF ||
michael@0 299 errno == EPERM)) {
michael@0 300 /* If a delete fails with one of these errors,
michael@0 301 * that's fine too: we closed the fd before we
michael@0 302 * got around to calling epoll_dispatch. */
michael@0 303 event_debug(("Epoll DEL(%d) on fd %d gave %s: DEL was unnecessary.",
michael@0 304 (int)epev.events,
michael@0 305 ch->fd,
michael@0 306 strerror(errno)));
michael@0 307 } else {
michael@0 308 event_warn("Epoll %s(%d) on fd %d failed. Old events were %d; read change was %d (%s); write change was %d (%s)",
michael@0 309 epoll_op_to_string(op),
michael@0 310 (int)epev.events,
michael@0 311 ch->fd,
michael@0 312 ch->old_events,
michael@0 313 ch->read_change,
michael@0 314 change_to_string(ch->read_change),
michael@0 315 ch->write_change,
michael@0 316 change_to_string(ch->write_change));
michael@0 317 return -1;
michael@0 318 }
michael@0 319 } else {
michael@0 320 event_debug(("Epoll %s(%d) on fd %d okay. [old events were %d; read change was %d; write change was %d]",
michael@0 321 epoll_op_to_string(op),
michael@0 322 (int)epev.events,
michael@0 323 (int)ch->fd,
michael@0 324 ch->old_events,
michael@0 325 ch->read_change,
michael@0 326 ch->write_change));
michael@0 327 }
michael@0 328 }
michael@0 329 return 0;
michael@0 330 }
michael@0 331
michael@0 332 static int
michael@0 333 epoll_apply_changes(struct event_base *base)
michael@0 334 {
michael@0 335 struct event_changelist *changelist = &base->changelist;
michael@0 336 struct epollop *epollop = base->evbase;
michael@0 337 struct event_change *ch;
michael@0 338
michael@0 339 int r = 0;
michael@0 340 int i;
michael@0 341
michael@0 342 for (i = 0; i < changelist->n_changes; ++i) {
michael@0 343 ch = &changelist->changes[i];
michael@0 344 if (epoll_apply_one_change(base, epollop, ch) < 0)
michael@0 345 r = -1;
michael@0 346 }
michael@0 347
michael@0 348 return (r);
michael@0 349 }
michael@0 350
michael@0 351 static int
michael@0 352 epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
michael@0 353 short old, short events, void *p)
michael@0 354 {
michael@0 355 struct event_change ch;
michael@0 356 ch.fd = fd;
michael@0 357 ch.old_events = old;
michael@0 358 ch.read_change = ch.write_change = 0;
michael@0 359 if (events & EV_WRITE)
michael@0 360 ch.write_change = EV_CHANGE_ADD |
michael@0 361 (events & EV_ET);
michael@0 362 if (events & EV_READ)
michael@0 363 ch.read_change = EV_CHANGE_ADD |
michael@0 364 (events & EV_ET);
michael@0 365
michael@0 366 return epoll_apply_one_change(base, base->evbase, &ch);
michael@0 367 }
michael@0 368
michael@0 369 static int
michael@0 370 epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
michael@0 371 short old, short events, void *p)
michael@0 372 {
michael@0 373 struct event_change ch;
michael@0 374 ch.fd = fd;
michael@0 375 ch.old_events = old;
michael@0 376 ch.read_change = ch.write_change = 0;
michael@0 377 if (events & EV_WRITE)
michael@0 378 ch.write_change = EV_CHANGE_DEL;
michael@0 379 if (events & EV_READ)
michael@0 380 ch.read_change = EV_CHANGE_DEL;
michael@0 381
michael@0 382 return epoll_apply_one_change(base, base->evbase, &ch);
michael@0 383 }
michael@0 384
michael@0 385 static int
michael@0 386 epoll_dispatch(struct event_base *base, struct timeval *tv)
michael@0 387 {
michael@0 388 struct epollop *epollop = base->evbase;
michael@0 389 struct epoll_event *events = epollop->events;
michael@0 390 int i, res;
michael@0 391 long timeout = -1;
michael@0 392
michael@0 393 if (tv != NULL) {
michael@0 394 timeout = evutil_tv_to_msec(tv);
michael@0 395 if (timeout < 0 || timeout > MAX_EPOLL_TIMEOUT_MSEC) {
michael@0 396 /* Linux kernels can wait forever if the timeout is
michael@0 397 * too big; see comment on MAX_EPOLL_TIMEOUT_MSEC. */
michael@0 398 timeout = MAX_EPOLL_TIMEOUT_MSEC;
michael@0 399 }
michael@0 400 }
michael@0 401
michael@0 402 epoll_apply_changes(base);
michael@0 403 event_changelist_remove_all(&base->changelist, base);
michael@0 404
michael@0 405 EVBASE_RELEASE_LOCK(base, th_base_lock);
michael@0 406
michael@0 407 res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);
michael@0 408
michael@0 409 EVBASE_ACQUIRE_LOCK(base, th_base_lock);
michael@0 410
michael@0 411 if (res == -1) {
michael@0 412 if (errno != EINTR) {
michael@0 413 event_warn("epoll_wait");
michael@0 414 return (-1);
michael@0 415 }
michael@0 416
michael@0 417 return (0);
michael@0 418 }
michael@0 419
michael@0 420 event_debug(("%s: epoll_wait reports %d", __func__, res));
michael@0 421 EVUTIL_ASSERT(res <= epollop->nevents);
michael@0 422
michael@0 423 for (i = 0; i < res; i++) {
michael@0 424 int what = events[i].events;
michael@0 425 short ev = 0;
michael@0 426
michael@0 427 if (what & (EPOLLHUP|EPOLLERR)) {
michael@0 428 ev = EV_READ | EV_WRITE;
michael@0 429 } else {
michael@0 430 if (what & EPOLLIN)
michael@0 431 ev |= EV_READ;
michael@0 432 if (what & EPOLLOUT)
michael@0 433 ev |= EV_WRITE;
michael@0 434 }
michael@0 435
michael@0 436 if (!ev)
michael@0 437 continue;
michael@0 438
michael@0 439 evmap_io_active(base, events[i].data.fd, ev | EV_ET);
michael@0 440 }
michael@0 441
michael@0 442 if (res == epollop->nevents && epollop->nevents < MAX_NEVENT) {
michael@0 443 /* We used all of the event space this time. We should
michael@0 444 be ready for more events next time. */
michael@0 445 int new_nevents = epollop->nevents * 2;
michael@0 446 struct epoll_event *new_events;
michael@0 447
michael@0 448 new_events = mm_realloc(epollop->events,
michael@0 449 new_nevents * sizeof(struct epoll_event));
michael@0 450 if (new_events) {
michael@0 451 epollop->events = new_events;
michael@0 452 epollop->nevents = new_nevents;
michael@0 453 }
michael@0 454 }
michael@0 455
michael@0 456 return (0);
michael@0 457 }
michael@0 458
michael@0 459
michael@0 460 static void
michael@0 461 epoll_dealloc(struct event_base *base)
michael@0 462 {
michael@0 463 struct epollop *epollop = base->evbase;
michael@0 464
michael@0 465 evsig_dealloc(base);
michael@0 466 if (epollop->events)
michael@0 467 mm_free(epollop->events);
michael@0 468 if (epollop->epfd >= 0)
michael@0 469 close(epollop->epfd);
michael@0 470
michael@0 471 memset(epollop, 0, sizeof(struct epollop));
michael@0 472 mm_free(epollop);
michael@0 473 }

mercurial