ipc/chromium/src/third_party/libevent/epoll.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2  * Copyright 2000-2007 Niels Provos <provos@citi.umich.edu>
     3  * Copyright 2007-2012 Niels Provos, Nick Mathewson
     4  *
     5  * Redistribution and use in source and binary forms, with or without
     6  * modification, are permitted provided that the following conditions
     7  * are met:
     8  * 1. Redistributions of source code must retain the above copyright
     9  *    notice, this list of conditions and the following disclaimer.
    10  * 2. Redistributions in binary form must reproduce the above copyright
    11  *    notice, this list of conditions and the following disclaimer in the
    12  *    documentation and/or other materials provided with the distribution.
    13  * 3. The name of the author may not be used to endorse or promote products
    14  *    derived from this software without specific prior written permission.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
    17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
    18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
    19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
    20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
    21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
    25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    26  */
    27 #include "event2/event-config.h"
    29 #include <stdint.h>
    30 #include <sys/types.h>
    31 #include <sys/resource.h>
    32 #ifdef _EVENT_HAVE_SYS_TIME_H
    33 #include <sys/time.h>
    34 #endif
    35 #include <sys/queue.h>
    36 #include <sys/epoll.h>
    37 #include <signal.h>
    38 #include <limits.h>
    39 #include <stdio.h>
    40 #include <stdlib.h>
    41 #include <string.h>
    42 #include <unistd.h>
    43 #include <errno.h>
    44 #ifdef _EVENT_HAVE_FCNTL_H
    45 #include <fcntl.h>
    46 #endif
    48 #include "event-internal.h"
    49 #include "evsignal-internal.h"
    50 #include "event2/thread.h"
    51 #include "evthread-internal.h"
    52 #include "log-internal.h"
    53 #include "evmap-internal.h"
    54 #include "changelist-internal.h"
    56 struct epollop {
    57 	struct epoll_event *events;
    58 	int nevents;
    59 	int epfd;
    60 };
    62 static void *epoll_init(struct event_base *);
    63 static int epoll_dispatch(struct event_base *, struct timeval *);
    64 static void epoll_dealloc(struct event_base *);
    66 static const struct eventop epollops_changelist = {
    67 	"epoll (with changelist)",
    68 	epoll_init,
    69 	event_changelist_add,
    70 	event_changelist_del,
    71 	epoll_dispatch,
    72 	epoll_dealloc,
    73 	1, /* need reinit */
    74 	EV_FEATURE_ET|EV_FEATURE_O1,
    75 	EVENT_CHANGELIST_FDINFO_SIZE
    76 };
    79 static int epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
    80     short old, short events, void *p);
    81 static int epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
    82     short old, short events, void *p);
    84 const struct eventop epollops = {
    85 	"epoll",
    86 	epoll_init,
    87 	epoll_nochangelist_add,
    88 	epoll_nochangelist_del,
    89 	epoll_dispatch,
    90 	epoll_dealloc,
    91 	1, /* need reinit */
    92 	EV_FEATURE_ET|EV_FEATURE_O1,
    93 	0
    94 };
    96 #define INITIAL_NEVENT 32
    97 #define MAX_NEVENT 4096
    99 /* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout
   100  * values bigger than (LONG_MAX - 999ULL)/HZ.  HZ in the wild can be
   101  * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the
   102  * largest number of msec we can support here is 2147482.  Let's
   103  * round that down by 47 seconds.
   104  */
   105 #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)
   107 static void *
   108 epoll_init(struct event_base *base)
   109 {
   110 	int epfd;
   111 	struct epollop *epollop;
   113 	/* Initialize the kernel queue.  (The size field is ignored since
   114 	 * 2.6.8.) */
   115 	if ((epfd = epoll_create(32000)) == -1) {
   116 		if (errno != ENOSYS)
   117 			event_warn("epoll_create");
   118 		return (NULL);
   119 	}
   121 	evutil_make_socket_closeonexec(epfd);
   123 	if (!(epollop = mm_calloc(1, sizeof(struct epollop)))) {
   124 		close(epfd);
   125 		return (NULL);
   126 	}
   128 	epollop->epfd = epfd;
   130 	/* Initialize fields */
   131 	epollop->events = mm_calloc(INITIAL_NEVENT, sizeof(struct epoll_event));
   132 	if (epollop->events == NULL) {
   133 		mm_free(epollop);
   134 		close(epfd);
   135 		return (NULL);
   136 	}
   137 	epollop->nevents = INITIAL_NEVENT;
   139 	if ((base->flags & EVENT_BASE_FLAG_EPOLL_USE_CHANGELIST) != 0 ||
   140 	    ((base->flags & EVENT_BASE_FLAG_IGNORE_ENV) == 0 &&
   141 		evutil_getenv("EVENT_EPOLL_USE_CHANGELIST") != NULL))
   142 		base->evsel = &epollops_changelist;
   144 	evsig_init(base);
   146 	return (epollop);
   147 }
   149 static const char *
   150 change_to_string(int change)
   151 {
   152 	change &= (EV_CHANGE_ADD|EV_CHANGE_DEL);
   153 	if (change == EV_CHANGE_ADD) {
   154 		return "add";
   155 	} else if (change == EV_CHANGE_DEL) {
   156 		return "del";
   157 	} else if (change == 0) {
   158 		return "none";
   159 	} else {
   160 		return "???";
   161 	}
   162 }
   164 static const char *
   165 epoll_op_to_string(int op)
   166 {
   167 	return op == EPOLL_CTL_ADD?"ADD":
   168 	    op == EPOLL_CTL_DEL?"DEL":
   169 	    op == EPOLL_CTL_MOD?"MOD":
   170 	    "???";
   171 }
   173 static int
   174 epoll_apply_one_change(struct event_base *base,
   175     struct epollop *epollop,
   176     const struct event_change *ch)
   177 {
   178 	struct epoll_event epev;
   179 	int op, events = 0;
   181 	if (1) {
   182 		/* The logic here is a little tricky.  If we had no events set
   183 		   on the fd before, we need to set op="ADD" and set
   184 		   events=the events we want to add.  If we had any events set
   185 		   on the fd before, and we want any events to remain on the
   186 		   fd, we need to say op="MOD" and set events=the events we
   187 		   want to remain.  But if we want to delete the last event,
   188 		   we say op="DEL" and set events=the remaining events.  What
   189 		   fun!
   190 		*/
   192 		/* TODO: Turn this into a switch or a table lookup. */
   194 		if ((ch->read_change & EV_CHANGE_ADD) ||
   195 		    (ch->write_change & EV_CHANGE_ADD)) {
   196 			/* If we are adding anything at all, we'll want to do
   197 			 * either an ADD or a MOD. */
   198 			events = 0;
   199 			op = EPOLL_CTL_ADD;
   200 			if (ch->read_change & EV_CHANGE_ADD) {
   201 				events |= EPOLLIN;
   202 			} else if (ch->read_change & EV_CHANGE_DEL) {
   203 				;
   204 			} else if (ch->old_events & EV_READ) {
   205 				events |= EPOLLIN;
   206 			}
   207 			if (ch->write_change & EV_CHANGE_ADD) {
   208 				events |= EPOLLOUT;
   209 			} else if (ch->write_change & EV_CHANGE_DEL) {
   210 				;
   211 			} else if (ch->old_events & EV_WRITE) {
   212 				events |= EPOLLOUT;
   213 			}
   214 			if ((ch->read_change|ch->write_change) & EV_ET)
   215 				events |= EPOLLET;
   217 			if (ch->old_events) {
   218 				/* If MOD fails, we retry as an ADD, and if
   219 				 * ADD fails we will retry as a MOD.  So the
   220 				 * only hard part here is to guess which one
   221 				 * will work.  As a heuristic, we'll try
   222 				 * MOD first if we think there were old
   223 				 * events and ADD if we think there were none.
   224 				 *
   225 				 * We can be wrong about the MOD if the file
   226 				 * has in fact been closed and re-opened.
   227 				 *
   228 				 * We can be wrong about the ADD if the
   229 				 * the fd has been re-created with a dup()
   230 				 * of the same file that it was before.
   231 				 */
   232 				op = EPOLL_CTL_MOD;
   233 			}
   234 		} else if ((ch->read_change & EV_CHANGE_DEL) ||
   235 		    (ch->write_change & EV_CHANGE_DEL)) {
   236 			/* If we're deleting anything, we'll want to do a MOD
   237 			 * or a DEL. */
   238 			op = EPOLL_CTL_DEL;
   240 			if (ch->read_change & EV_CHANGE_DEL) {
   241 				if (ch->write_change & EV_CHANGE_DEL) {
   242 					events = EPOLLIN|EPOLLOUT;
   243 				} else if (ch->old_events & EV_WRITE) {
   244 					events = EPOLLOUT;
   245 					op = EPOLL_CTL_MOD;
   246 				} else {
   247 					events = EPOLLIN;
   248 				}
   249 			} else if (ch->write_change & EV_CHANGE_DEL) {
   250 				if (ch->old_events & EV_READ) {
   251 					events = EPOLLIN;
   252 					op = EPOLL_CTL_MOD;
   253 				} else {
   254 					events = EPOLLOUT;
   255 				}
   256 			}
   257 		}
   259 		if (!events)
   260 			return 0;
   262 		memset(&epev, 0, sizeof(epev));
   263 		epev.data.fd = ch->fd;
   264 		epev.events = events;
   265 		if (epoll_ctl(epollop->epfd, op, ch->fd, &epev) == -1) {
   266 			if (op == EPOLL_CTL_MOD && errno == ENOENT) {
   267 				/* If a MOD operation fails with ENOENT, the
   268 				 * fd was probably closed and re-opened.  We
   269 				 * should retry the operation as an ADD.
   270 				 */
   271 				if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, ch->fd, &epev) == -1) {
   272 					event_warn("Epoll MOD(%d) on %d retried as ADD; that failed too",
   273 					    (int)epev.events, ch->fd);
   274 					return -1;
   275 				} else {
   276 					event_debug(("Epoll MOD(%d) on %d retried as ADD; succeeded.",
   277 						(int)epev.events,
   278 						ch->fd));
   279 				}
   280 			} else if (op == EPOLL_CTL_ADD && errno == EEXIST) {
   281 				/* If an ADD operation fails with EEXIST,
   282 				 * either the operation was redundant (as with a
   283 				 * precautionary add), or we ran into a fun
   284 				 * kernel bug where using dup*() to duplicate the
   285 				 * same file into the same fd gives you the same epitem
   286 				 * rather than a fresh one.  For the second case,
   287 				 * we must retry with MOD. */
   288 				if (epoll_ctl(epollop->epfd, EPOLL_CTL_MOD, ch->fd, &epev) == -1) {
   289 					event_warn("Epoll ADD(%d) on %d retried as MOD; that failed too",
   290 					    (int)epev.events, ch->fd);
   291 					return -1;
   292 				} else {
   293 					event_debug(("Epoll ADD(%d) on %d retried as MOD; succeeded.",
   294 						(int)epev.events,
   295 						ch->fd));
   296 				}
   297 			} else if (op == EPOLL_CTL_DEL &&
   298 			    (errno == ENOENT || errno == EBADF ||
   299 				errno == EPERM)) {
   300 				/* If a delete fails with one of these errors,
   301 				 * that's fine too: we closed the fd before we
   302 				 * got around to calling epoll_dispatch. */
   303 				event_debug(("Epoll DEL(%d) on fd %d gave %s: DEL was unnecessary.",
   304 					(int)epev.events,
   305 					ch->fd,
   306 					strerror(errno)));
   307 			} else {
   308 				event_warn("Epoll %s(%d) on fd %d failed.  Old events were %d; read change was %d (%s); write change was %d (%s)",
   309 				    epoll_op_to_string(op),
   310 				    (int)epev.events,
   311 				    ch->fd,
   312 				    ch->old_events,
   313 				    ch->read_change,
   314 				    change_to_string(ch->read_change),
   315 				    ch->write_change,
   316 				    change_to_string(ch->write_change));
   317 				return -1;
   318 			}
   319 		} else {
   320 			event_debug(("Epoll %s(%d) on fd %d okay. [old events were %d; read change was %d; write change was %d]",
   321 				epoll_op_to_string(op),
   322 				(int)epev.events,
   323 				(int)ch->fd,
   324 				ch->old_events,
   325 				ch->read_change,
   326 				ch->write_change));
   327 		}
   328 	}
   329 	return 0;
   330 }
   332 static int
   333 epoll_apply_changes(struct event_base *base)
   334 {
   335 	struct event_changelist *changelist = &base->changelist;
   336 	struct epollop *epollop = base->evbase;
   337 	struct event_change *ch;
   339 	int r = 0;
   340 	int i;
   342 	for (i = 0; i < changelist->n_changes; ++i) {
   343 		ch = &changelist->changes[i];
   344 		if (epoll_apply_one_change(base, epollop, ch) < 0)
   345 			r = -1;
   346 	}
   348 	return (r);
   349 }
   351 static int
   352 epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
   353     short old, short events, void *p)
   354 {
   355 	struct event_change ch;
   356 	ch.fd = fd;
   357 	ch.old_events = old;
   358 	ch.read_change = ch.write_change = 0;
   359 	if (events & EV_WRITE)
   360 		ch.write_change = EV_CHANGE_ADD |
   361 		    (events & EV_ET);
   362 	if (events & EV_READ)
   363 		ch.read_change = EV_CHANGE_ADD |
   364 		    (events & EV_ET);
   366 	return epoll_apply_one_change(base, base->evbase, &ch);
   367 }
   369 static int
   370 epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
   371     short old, short events, void *p)
   372 {
   373 	struct event_change ch;
   374 	ch.fd = fd;
   375 	ch.old_events = old;
   376 	ch.read_change = ch.write_change = 0;
   377 	if (events & EV_WRITE)
   378 		ch.write_change = EV_CHANGE_DEL;
   379 	if (events & EV_READ)
   380 		ch.read_change = EV_CHANGE_DEL;
   382 	return epoll_apply_one_change(base, base->evbase, &ch);
   383 }
   385 static int
   386 epoll_dispatch(struct event_base *base, struct timeval *tv)
   387 {
   388 	struct epollop *epollop = base->evbase;
   389 	struct epoll_event *events = epollop->events;
   390 	int i, res;
   391 	long timeout = -1;
   393 	if (tv != NULL) {
   394 		timeout = evutil_tv_to_msec(tv);
   395 		if (timeout < 0 || timeout > MAX_EPOLL_TIMEOUT_MSEC) {
   396 			/* Linux kernels can wait forever if the timeout is
   397 			 * too big; see comment on MAX_EPOLL_TIMEOUT_MSEC. */
   398 			timeout = MAX_EPOLL_TIMEOUT_MSEC;
   399 		}
   400 	}
   402 	epoll_apply_changes(base);
   403 	event_changelist_remove_all(&base->changelist, base);
   405 	EVBASE_RELEASE_LOCK(base, th_base_lock);
   407 	res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);
   409 	EVBASE_ACQUIRE_LOCK(base, th_base_lock);
   411 	if (res == -1) {
   412 		if (errno != EINTR) {
   413 			event_warn("epoll_wait");
   414 			return (-1);
   415 		}
   417 		return (0);
   418 	}
   420 	event_debug(("%s: epoll_wait reports %d", __func__, res));
   421 	EVUTIL_ASSERT(res <= epollop->nevents);
   423 	for (i = 0; i < res; i++) {
   424 		int what = events[i].events;
   425 		short ev = 0;
   427 		if (what & (EPOLLHUP|EPOLLERR)) {
   428 			ev = EV_READ | EV_WRITE;
   429 		} else {
   430 			if (what & EPOLLIN)
   431 				ev |= EV_READ;
   432 			if (what & EPOLLOUT)
   433 				ev |= EV_WRITE;
   434 		}
   436 		if (!ev)
   437 			continue;
   439 		evmap_io_active(base, events[i].data.fd, ev | EV_ET);
   440 	}
   442 	if (res == epollop->nevents && epollop->nevents < MAX_NEVENT) {
   443 		/* We used all of the event space this time.  We should
   444 		   be ready for more events next time. */
   445 		int new_nevents = epollop->nevents * 2;
   446 		struct epoll_event *new_events;
   448 		new_events = mm_realloc(epollop->events,
   449 		    new_nevents * sizeof(struct epoll_event));
   450 		if (new_events) {
   451 			epollop->events = new_events;
   452 			epollop->nevents = new_nevents;
   453 		}
   454 	}
   456 	return (0);
   457 }
   460 static void
   461 epoll_dealloc(struct event_base *base)
   462 {
   463 	struct epollop *epollop = base->evbase;
   465 	evsig_dealloc(base);
   466 	if (epollop->events)
   467 		mm_free(epollop->events);
   468 	if (epollop->epfd >= 0)
   469 		close(epollop->epfd);
   471 	memset(epollop, 0, sizeof(struct epollop));
   472 	mm_free(epollop);
   473 }

mercurial