michael@0: /* michael@0: * Submitted by David Pacheco (dp.spambait@gmail.com) michael@0: * michael@0: * Copyright 2006-2007 Niels Provos michael@0: * Copyright 2007-2012 Niels Provos and Nick Mathewson michael@0: * michael@0: * Redistribution and use in source and binary forms, with or without michael@0: * modification, are permitted provided that the following conditions michael@0: * are met: michael@0: * 1. Redistributions of source code must retain the above copyright michael@0: * notice, this list of conditions and the following disclaimer. michael@0: * 2. Redistributions in binary form must reproduce the above copyright michael@0: * notice, this list of conditions and the following disclaimer in the michael@0: * documentation and/or other materials provided with the distribution. michael@0: * 3. The name of the author may not be used to endorse or promote products michael@0: * derived from this software without specific prior written permission. michael@0: * michael@0: * THIS SOFTWARE IS PROVIDED BY SUN MICROSYSTEMS, INC. ``AS IS'' AND ANY michael@0: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED michael@0: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE michael@0: * DISCLAIMED. IN NO EVENT SHALL SUN MICROSYSTEMS, INC. BE LIABLE FOR ANY michael@0: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES michael@0: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; michael@0: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND michael@0: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS michael@0: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: */ michael@0: michael@0: /* michael@0: * Copyright (c) 2007 Sun Microsystems. All rights reserved. michael@0: * Use is subject to license terms. michael@0: */ michael@0: michael@0: /* michael@0: * evport.c: event backend using Solaris 10 event ports. See port_create(3C). michael@0: * This implementation is loosely modeled after the one used for select(2) (in michael@0: * select.c). michael@0: * michael@0: * The outstanding events are tracked in a data structure called evport_data. michael@0: * Each entry in the ed_fds array corresponds to a file descriptor, and contains michael@0: * pointers to the read and write events that correspond to that fd. (That is, michael@0: * when the file is readable, the "read" event should handle it, etc.) michael@0: * michael@0: * evport_add and evport_del update this data structure. evport_dispatch uses it michael@0: * to determine where to callback when an event occurs (which it gets from michael@0: * port_getn). michael@0: * michael@0: * Helper functions are used: grow() grows the file descriptor array as michael@0: * necessary when large fd's come in. reassociate() takes care of maintaining michael@0: * the proper file-descriptor/event-port associations. michael@0: * michael@0: * As in the select(2) implementation, signals are handled by evsignal. michael@0: */ michael@0: michael@0: #include "event2/event-config.h" michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "event2/thread.h" michael@0: michael@0: #include "evthread-internal.h" michael@0: #include "event-internal.h" michael@0: #include "log-internal.h" michael@0: #include "evsignal-internal.h" michael@0: #include "evmap-internal.h" michael@0: michael@0: /* michael@0: * Default value for ed_nevents, which is the maximum file descriptor number we michael@0: * can handle. If an event comes in for a file descriptor F > nevents, we will michael@0: * grow the array of file descriptors, doubling its size. michael@0: */ michael@0: #define DEFAULT_NFDS 16 michael@0: michael@0: michael@0: /* michael@0: * EVENTS_PER_GETN is the maximum number of events to retrieve from port_getn on michael@0: * any particular call. You can speed things up by increasing this, but it will michael@0: * (obviously) require more memory. michael@0: */ michael@0: #define EVENTS_PER_GETN 8 michael@0: michael@0: /* michael@0: * Per-file-descriptor information about what events we're subscribed to. These michael@0: * fields are NULL if no event is subscribed to either of them. michael@0: */ michael@0: michael@0: struct fd_info { michael@0: short fdi_what; /* combinations of EV_READ and EV_WRITE */ michael@0: }; michael@0: michael@0: #define FDI_HAS_READ(fdi) ((fdi)->fdi_what & EV_READ) michael@0: #define FDI_HAS_WRITE(fdi) ((fdi)->fdi_what & EV_WRITE) michael@0: #define FDI_HAS_EVENTS(fdi) (FDI_HAS_READ(fdi) || FDI_HAS_WRITE(fdi)) michael@0: #define FDI_TO_SYSEVENTS(fdi) (FDI_HAS_READ(fdi) ? POLLIN : 0) | \ michael@0: (FDI_HAS_WRITE(fdi) ? POLLOUT : 0) michael@0: michael@0: struct evport_data { michael@0: int ed_port; /* event port for system events */ michael@0: int ed_nevents; /* number of allocated fdi's */ michael@0: struct fd_info *ed_fds; /* allocated fdi table */ michael@0: /* fdi's that we need to reassoc */ michael@0: int ed_pending[EVENTS_PER_GETN]; /* fd's with pending events */ michael@0: }; michael@0: michael@0: static void* evport_init(struct event_base *); michael@0: static int evport_add(struct event_base *, int fd, short old, short events, void *); michael@0: static int evport_del(struct event_base *, int fd, short old, short events, void *); michael@0: static int evport_dispatch(struct event_base *, struct timeval *); michael@0: static void evport_dealloc(struct event_base *); michael@0: michael@0: const struct eventop evportops = { michael@0: "evport", michael@0: evport_init, michael@0: evport_add, michael@0: evport_del, michael@0: evport_dispatch, michael@0: evport_dealloc, michael@0: 1, /* need reinit */ michael@0: 0, /* features */ michael@0: 0, /* fdinfo length */ michael@0: }; michael@0: michael@0: /* michael@0: * Initialize the event port implementation. michael@0: */ michael@0: michael@0: static void* michael@0: evport_init(struct event_base *base) michael@0: { michael@0: struct evport_data *evpd; michael@0: int i; michael@0: michael@0: if (!(evpd = mm_calloc(1, sizeof(struct evport_data)))) michael@0: return (NULL); michael@0: michael@0: if ((evpd->ed_port = port_create()) == -1) { michael@0: mm_free(evpd); michael@0: return (NULL); michael@0: } michael@0: michael@0: /* michael@0: * Initialize file descriptor structure michael@0: */ michael@0: evpd->ed_fds = mm_calloc(DEFAULT_NFDS, sizeof(struct fd_info)); michael@0: if (evpd->ed_fds == NULL) { michael@0: close(evpd->ed_port); michael@0: mm_free(evpd); michael@0: return (NULL); michael@0: } michael@0: evpd->ed_nevents = DEFAULT_NFDS; michael@0: for (i = 0; i < EVENTS_PER_GETN; i++) michael@0: evpd->ed_pending[i] = -1; michael@0: michael@0: evsig_init(base); michael@0: michael@0: return (evpd); michael@0: } michael@0: michael@0: #ifdef CHECK_INVARIANTS michael@0: /* michael@0: * Checks some basic properties about the evport_data structure. Because it michael@0: * checks all file descriptors, this function can be expensive when the maximum michael@0: * file descriptor ever used is rather large. michael@0: */ michael@0: michael@0: static void michael@0: check_evportop(struct evport_data *evpd) michael@0: { michael@0: EVUTIL_ASSERT(evpd); michael@0: EVUTIL_ASSERT(evpd->ed_nevents > 0); michael@0: EVUTIL_ASSERT(evpd->ed_port > 0); michael@0: EVUTIL_ASSERT(evpd->ed_fds > 0); michael@0: } michael@0: michael@0: /* michael@0: * Verifies very basic integrity of a given port_event. michael@0: */ michael@0: static void michael@0: check_event(port_event_t* pevt) michael@0: { michael@0: /* michael@0: * We've only registered for PORT_SOURCE_FD events. The only michael@0: * other thing we can legitimately receive is PORT_SOURCE_ALERT, michael@0: * but since we're not using port_alert either, we can assume michael@0: * PORT_SOURCE_FD. michael@0: */ michael@0: EVUTIL_ASSERT(pevt->portev_source == PORT_SOURCE_FD); michael@0: EVUTIL_ASSERT(pevt->portev_user == NULL); michael@0: } michael@0: michael@0: #else michael@0: #define check_evportop(epop) michael@0: #define check_event(pevt) michael@0: #endif /* CHECK_INVARIANTS */ michael@0: michael@0: /* michael@0: * Doubles the size of the allocated file descriptor array. michael@0: */ michael@0: static int michael@0: grow(struct evport_data *epdp, int factor) michael@0: { michael@0: struct fd_info *tmp; michael@0: int oldsize = epdp->ed_nevents; michael@0: int newsize = factor * oldsize; michael@0: EVUTIL_ASSERT(factor > 1); michael@0: michael@0: check_evportop(epdp); michael@0: michael@0: tmp = mm_realloc(epdp->ed_fds, sizeof(struct fd_info) * newsize); michael@0: if (NULL == tmp) michael@0: return -1; michael@0: epdp->ed_fds = tmp; michael@0: memset((char*) (epdp->ed_fds + oldsize), 0, michael@0: (newsize - oldsize)*sizeof(struct fd_info)); michael@0: epdp->ed_nevents = newsize; michael@0: michael@0: check_evportop(epdp); michael@0: michael@0: return 0; michael@0: } michael@0: michael@0: michael@0: /* michael@0: * (Re)associates the given file descriptor with the event port. The OS events michael@0: * are specified (implicitly) from the fd_info struct. michael@0: */ michael@0: static int michael@0: reassociate(struct evport_data *epdp, struct fd_info *fdip, int fd) michael@0: { michael@0: int sysevents = FDI_TO_SYSEVENTS(fdip); michael@0: michael@0: if (sysevents != 0) { michael@0: if (port_associate(epdp->ed_port, PORT_SOURCE_FD, michael@0: fd, sysevents, NULL) == -1) { michael@0: event_warn("port_associate"); michael@0: return (-1); michael@0: } michael@0: } michael@0: michael@0: check_evportop(epdp); michael@0: michael@0: return (0); michael@0: } michael@0: michael@0: /* michael@0: * Main event loop - polls port_getn for some number of events, and processes michael@0: * them. michael@0: */ michael@0: michael@0: static int michael@0: evport_dispatch(struct event_base *base, struct timeval *tv) michael@0: { michael@0: int i, res; michael@0: struct evport_data *epdp = base->evbase; michael@0: port_event_t pevtlist[EVENTS_PER_GETN]; michael@0: michael@0: /* michael@0: * port_getn will block until it has at least nevents events. It will michael@0: * also return how many it's given us (which may be more than we asked michael@0: * for, as long as it's less than our maximum (EVENTS_PER_GETN)) in michael@0: * nevents. michael@0: */ michael@0: int nevents = 1; michael@0: michael@0: /* michael@0: * We have to convert a struct timeval to a struct timespec michael@0: * (only difference is nanoseconds vs. microseconds). If no time-based michael@0: * events are active, we should wait for I/O (and tv == NULL). michael@0: */ michael@0: struct timespec ts; michael@0: struct timespec *ts_p = NULL; michael@0: if (tv != NULL) { michael@0: ts.tv_sec = tv->tv_sec; michael@0: ts.tv_nsec = tv->tv_usec * 1000; michael@0: ts_p = &ts; michael@0: } michael@0: michael@0: /* michael@0: * Before doing anything else, we need to reassociate the events we hit michael@0: * last time which need reassociation. See comment at the end of the michael@0: * loop below. michael@0: */ michael@0: for (i = 0; i < EVENTS_PER_GETN; ++i) { michael@0: struct fd_info *fdi = NULL; michael@0: if (epdp->ed_pending[i] != -1) { michael@0: fdi = &(epdp->ed_fds[epdp->ed_pending[i]]); michael@0: } michael@0: michael@0: if (fdi != NULL && FDI_HAS_EVENTS(fdi)) { michael@0: int fd = epdp->ed_pending[i]; michael@0: reassociate(epdp, fdi, fd); michael@0: epdp->ed_pending[i] = -1; michael@0: } michael@0: } michael@0: michael@0: EVBASE_RELEASE_LOCK(base, th_base_lock); michael@0: michael@0: res = port_getn(epdp->ed_port, pevtlist, EVENTS_PER_GETN, michael@0: (unsigned int *) &nevents, ts_p); michael@0: michael@0: EVBASE_ACQUIRE_LOCK(base, th_base_lock); michael@0: michael@0: if (res == -1) { michael@0: if (errno == EINTR || errno == EAGAIN) { michael@0: return (0); michael@0: } else if (errno == ETIME) { michael@0: if (nevents == 0) michael@0: return (0); michael@0: } else { michael@0: event_warn("port_getn"); michael@0: return (-1); michael@0: } michael@0: } michael@0: michael@0: event_debug(("%s: port_getn reports %d events", __func__, nevents)); michael@0: michael@0: for (i = 0; i < nevents; ++i) { michael@0: struct fd_info *fdi; michael@0: port_event_t *pevt = &pevtlist[i]; michael@0: int fd = (int) pevt->portev_object; michael@0: michael@0: check_evportop(epdp); michael@0: check_event(pevt); michael@0: epdp->ed_pending[i] = fd; michael@0: michael@0: /* michael@0: * Figure out what kind of event it was michael@0: * (because we have to pass this to the callback) michael@0: */ michael@0: res = 0; michael@0: if (pevt->portev_events & (POLLERR|POLLHUP)) { michael@0: res = EV_READ | EV_WRITE; michael@0: } else { michael@0: if (pevt->portev_events & POLLIN) michael@0: res |= EV_READ; michael@0: if (pevt->portev_events & POLLOUT) michael@0: res |= EV_WRITE; michael@0: } michael@0: michael@0: /* michael@0: * Check for the error situations or a hangup situation michael@0: */ michael@0: if (pevt->portev_events & (POLLERR|POLLHUP|POLLNVAL)) michael@0: res |= EV_READ|EV_WRITE; michael@0: michael@0: EVUTIL_ASSERT(epdp->ed_nevents > fd); michael@0: fdi = &(epdp->ed_fds[fd]); michael@0: michael@0: evmap_io_active(base, fd, res); michael@0: } /* end of all events gotten */ michael@0: michael@0: check_evportop(epdp); michael@0: michael@0: return (0); michael@0: } michael@0: michael@0: michael@0: /* michael@0: * Adds the given event (so that you will be notified when it happens via michael@0: * the callback function). michael@0: */ michael@0: michael@0: static int michael@0: evport_add(struct event_base *base, int fd, short old, short events, void *p) michael@0: { michael@0: struct evport_data *evpd = base->evbase; michael@0: struct fd_info *fdi; michael@0: int factor; michael@0: (void)p; michael@0: michael@0: check_evportop(evpd); michael@0: michael@0: /* michael@0: * If necessary, grow the file descriptor info table michael@0: */ michael@0: michael@0: factor = 1; michael@0: while (fd >= factor * evpd->ed_nevents) michael@0: factor *= 2; michael@0: michael@0: if (factor > 1) { michael@0: if (-1 == grow(evpd, factor)) { michael@0: return (-1); michael@0: } michael@0: } michael@0: michael@0: fdi = &evpd->ed_fds[fd]; michael@0: fdi->fdi_what |= events; michael@0: michael@0: return reassociate(evpd, fdi, fd); michael@0: } michael@0: michael@0: /* michael@0: * Removes the given event from the list of events to wait for. michael@0: */ michael@0: michael@0: static int michael@0: evport_del(struct event_base *base, int fd, short old, short events, void *p) michael@0: { michael@0: struct evport_data *evpd = base->evbase; michael@0: struct fd_info *fdi; michael@0: int i; michael@0: int associated = 1; michael@0: (void)p; michael@0: michael@0: check_evportop(evpd); michael@0: michael@0: if (evpd->ed_nevents < fd) { michael@0: return (-1); michael@0: } michael@0: michael@0: for (i = 0; i < EVENTS_PER_GETN; ++i) { michael@0: if (evpd->ed_pending[i] == fd) { michael@0: associated = 0; michael@0: break; michael@0: } michael@0: } michael@0: michael@0: fdi = &evpd->ed_fds[fd]; michael@0: if (events & EV_READ) michael@0: fdi->fdi_what &= ~EV_READ; michael@0: if (events & EV_WRITE) michael@0: fdi->fdi_what &= ~EV_WRITE; michael@0: michael@0: if (associated) { michael@0: if (!FDI_HAS_EVENTS(fdi) && michael@0: port_dissociate(evpd->ed_port, PORT_SOURCE_FD, fd) == -1) { michael@0: /* michael@0: * Ignore EBADFD error the fd could have been closed michael@0: * before event_del() was called. michael@0: */ michael@0: if (errno != EBADFD) { michael@0: event_warn("port_dissociate"); michael@0: return (-1); michael@0: } michael@0: } else { michael@0: if (FDI_HAS_EVENTS(fdi)) { michael@0: return (reassociate(evpd, fdi, fd)); michael@0: } michael@0: } michael@0: } else { michael@0: if ((fdi->fdi_what & (EV_READ|EV_WRITE)) == 0) { michael@0: evpd->ed_pending[i] = -1; michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: michael@0: static void michael@0: evport_dealloc(struct event_base *base) michael@0: { michael@0: struct evport_data *evpd = base->evbase; michael@0: michael@0: evsig_dealloc(base); michael@0: michael@0: close(evpd->ed_port); michael@0: michael@0: if (evpd->ed_fds) michael@0: mm_free(evpd->ed_fds); michael@0: mm_free(evpd); michael@0: }