diff --git a/bufferevent_sock.c b/bufferevent_sock.c index bf9e35e05f..431ece0588 100644 --- a/bufferevent_sock.c +++ b/bufferevent_sock.c @@ -365,11 +365,10 @@ bufferevent_socket_connect(struct bufferevent *bev, if (fd < 0) { if (!sa) goto done; - fd = socket(sa->sa_family, SOCK_STREAM, 0); + fd = evutil_socket(sa->sa_family, + SOCK_STREAM|EVUTIL_SOCK_NONBLOCK, 0); if (fd < 0) goto done; - if (evutil_make_socket_nonblocking(fd)<0) - goto done; ownfd = 1; } if (sa) { diff --git a/configure.in b/configure.in index 31eabdb9cf..0bf5b5c6e6 100644 --- a/configure.in +++ b/configure.in @@ -311,10 +311,12 @@ AC_HEADER_TIME dnl Checks for library functions. AC_CHECK_FUNCS([ \ + accept4 \ arc4random \ arc4random_buf \ clock_gettime \ eventfd \ + epoll_create1 \ fcntl \ getegid \ geteuid \ @@ -329,6 +331,7 @@ AC_CHECK_FUNCS([ \ mmap \ nanosleep \ pipe \ + pipe2 \ putenv \ sendfile \ setenv \ diff --git a/epoll.c b/epoll.c index 4c84f56985..ab9cff4c03 100644 --- a/epoll.c +++ b/epoll.c @@ -110,19 +110,24 @@ const struct eventop epollops = { static void * epoll_init(struct event_base *base) { - int epfd; + int epfd = -1; struct epollop *epollop; - /* Initialize the kernel queue. (The size field is ignored since - * 2.6.8.) */ - if ((epfd = epoll_create(32000)) == -1) { - if (errno != ENOSYS) - event_warn("epoll_create"); - return (NULL); +#ifdef _EVENT_HAVE_EPOLL_CREATE1 + /* First, try the shiny new epoll_create1 interface, if we have it. */ + epfd = epoll_create1(EPOLL_CLOEXEC); +#endif + if (epfd == -1) { + /* Initialize the kernel queue using the old interface. (The + size field is ignored since 2.6.8.) */ + if ((epfd = epoll_create(32000)) == -1) { + if (errno != ENOSYS) + event_warn("epoll_create"); + return (NULL); + } + evutil_make_socket_closeonexec(epfd); } - evutil_make_socket_closeonexec(epfd); - if (!(epollop = mm_calloc(1, sizeof(struct epollop)))) { close(epfd); return (NULL); diff --git a/evdns.c b/evdns.c index c8d96c4c3f..7ad3d09db9 100644 --- a/evdns.c +++ b/evdns.c @@ -2458,10 +2458,9 @@ _evdns_nameserver_add_impl(struct evdns_base *base, const struct sockaddr *addre evtimer_assign(&ns->timeout_event, ns->base->event_base, nameserver_prod_callback, ns); - ns->socket = socket(address->sa_family, SOCK_DGRAM, 0); + ns->socket = evutil_socket(address->sa_family, + SOCK_DGRAM|EVUTIL_SOCK_NONBLOCK|EVUTIL_SOCK_CLOEXEC, 0); if (ns->socket < 0) { err = 1; goto out1; } - evutil_make_socket_closeonexec(ns->socket); - evutil_make_socket_nonblocking(ns->socket); if (base->global_outgoing_addrlen && !evutil_sockaddr_is_loopback(address)) { diff --git a/event.c b/event.c index 9f9b0915b8..8da5546376 100644 --- a/event.c +++ b/event.c @@ -46,9 +46,6 @@ #ifdef _EVENT_HAVE_UNISTD_H #include #endif -#ifdef _EVENT_HAVE_SYS_EVENTFD_H -#include -#endif #include #include #include @@ -2118,7 +2115,6 @@ evthread_notify_base_default(struct event_base *base) return (r < 0 && errno != EAGAIN) ? -1 : 0; } -#if defined(_EVENT_HAVE_EVENTFD) && defined(_EVENT_HAVE_SYS_EVENTFD_H) /* Helper callback: wake an event_base from another thread. This version * assumes that you have a working eventfd() implementation. */ static int @@ -2132,7 +2128,6 @@ evthread_notify_base_eventfd(struct event_base *base) return (r < 0) ? -1 : 0; } -#endif /** Tell the thread currently running the event_loop for base (if any) that it * needs to stop waiting in its dispatch function (if it is) and process all @@ -2910,7 +2905,6 @@ event_set_mem_functions(void *(*malloc_fn)(size_t sz), } #endif -#if defined(_EVENT_HAVE_EVENTFD) && defined(_EVENT_HAVE_SYS_EVENTFD_H) static void evthread_notify_drain_eventfd(evutil_socket_t fd, short what, void *arg) { @@ -2926,7 +2920,6 @@ evthread_notify_drain_eventfd(evutil_socket_t fd, short what, void *arg) base->is_notify_pending = 0; EVBASE_RELEASE_LOCK(base, th_base_lock); } -#endif static void evthread_notify_drain_default(evutil_socket_t fd, short what, void *arg) @@ -2949,8 +2942,8 @@ evthread_notify_drain_default(evutil_socket_t fd, short what, void *arg) int evthread_make_base_notifiable(struct event_base *base) { - void (*cb)(evutil_socket_t, short, void *) = evthread_notify_drain_default; - int (*notify)(struct event_base *) = evthread_notify_base_default; + void (*cb)(evutil_socket_t, short, void *); + int (*notify)(struct event_base *); /* XXXX grab the lock here? */ if (!base) @@ -2961,61 +2954,21 @@ evthread_make_base_notifiable(struct event_base *base) return 0; } -#if defined(_EVENT_HAVE_EVENTFD) && defined(_EVENT_HAVE_SYS_EVENTFD_H) -#ifndef EFD_CLOEXEC -#define EFD_CLOEXEC 0 -#endif - base->th_notify_fd[0] = eventfd(0, EFD_CLOEXEC); + base->th_notify_fd[0] = evutil_eventfd(0, + EVUTIL_EFD_CLOEXEC|EVUTIL_EFD_NONBLOCK); if (base->th_notify_fd[0] >= 0) { - evutil_make_socket_closeonexec(base->th_notify_fd[0]); + base->th_notify_fd[1] = -1; notify = evthread_notify_base_eventfd; cb = evthread_notify_drain_eventfd; - } -#endif -#if defined(_EVENT_HAVE_PIPE) - if (base->th_notify_fd[0] < 0) { - if ((base->evsel->features & EV_FEATURE_FDS)) { - if (pipe(base->th_notify_fd) < 0) { - event_warn("%s: pipe", __func__); - } else { - evutil_make_socket_closeonexec(base->th_notify_fd[0]); - evutil_make_socket_closeonexec(base->th_notify_fd[1]); - } - } - } -#endif - -#ifdef _WIN32 -#define LOCAL_SOCKETPAIR_AF AF_INET -#else -#define LOCAL_SOCKETPAIR_AF AF_UNIX -#endif - if (base->th_notify_fd[0] < 0) { - if (evutil_socketpair(LOCAL_SOCKETPAIR_AF, SOCK_STREAM, 0, - base->th_notify_fd) == -1) { - event_sock_warn(-1, "%s: socketpair", __func__); - return (-1); - } else { - evutil_make_socket_closeonexec(base->th_notify_fd[0]); - evutil_make_socket_closeonexec(base->th_notify_fd[1]); - } + } else if (evutil_make_internal_pipe(base->th_notify_fd) == 0) { + notify = evthread_notify_base_default; + cb = evthread_notify_drain_default; + } else { + return -1; } - evutil_make_socket_nonblocking(base->th_notify_fd[0]); - base->th_notify_fn = notify; - /* - Making the second socket nonblocking is a bit subtle, given that we - ignore any EAGAIN returns when writing to it, and you don't usally - do that for a nonblocking socket. But if the kernel gives us EAGAIN, - then there's no need to add any more data to the buffer, since - the main thread is already either about to wake up and drain it, - or woken up and in the process of draining it. - */ - if (base->th_notify_fd[1] > 0) - evutil_make_socket_nonblocking(base->th_notify_fd[1]); - /* prepare an event that we can use for wakeup */ event_assign(&base->th_notify, base, base->th_notify_fd[0], EV_READ|EV_PERSIST, cb, base); diff --git a/evutil.c b/evutil.c index de27e03658..bc81a093cc 100644 --- a/evutil.c +++ b/evutil.c @@ -331,6 +331,24 @@ evutil_make_socket_nonblocking(evutil_socket_t fd) return 0; } +/* Faster version of evutil_make_socket_nonblocking for internal use. + * + * Requires that no F_SETFL flags were previously set on the fd. + */ +static int +evutil_fast_socket_nonblocking(evutil_socket_t fd) +{ +#ifdef _WIN32 + return evutil_make_socket_nonblocking(fd); +#else + if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { + event_warn("fcntl(%d, F_SETFL)", fd); + return -1; + } + return 0; +#endif +} + int evutil_make_listen_socket_reuseable(evutil_socket_t sock) { @@ -363,6 +381,22 @@ evutil_make_socket_closeonexec(evutil_socket_t fd) return 0; } +/* Faster version of evutil_make_socket_closeonexec for internal use. + * + * Requires that no F_SETFD flags were previously set on the fd. + */ +static int +evutil_fast_socket_closeonexec(evutil_socket_t fd) +{ +#if !defined(_WIN32) && defined(_EVENT_HAVE_SETFD) + if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) { + event_warn("fcntl(%d, F_SETFD)", fd); + return -1; + } +#endif + return 0; +} + int evutil_closesocket(evutil_socket_t sock) { @@ -2323,3 +2357,168 @@ evutil_usleep(const struct timeval *tv) select(0, NULL, NULL, NULL, tv); #endif } + +/* Internal wrapper around 'socket' to provide Linux-style support for + * syscall-saving methods where available. + * + * In addition to regular socket behavior, you can use a bitwise or to set the + * flags EVUTIL_SOCK_NONBLOCK and EVUTIL_SOCK_CLOEXEC in the 'type' argument, + * to make the socket nonblocking or close-on-exec with as few syscalls as + * possible. + */ +evutil_socket_t +evutil_socket(int domain, int type, int protocol) +{ + evutil_socket_t r; +#if defined(SOCK_NONBLOCK) && defined(SOCK_CLOEXEC) + r = socket(domain, type, protocol); + if (r < 0 && !(type & (SOCK_NONBLOCK|SOCK_CLOEXEC))) + return -1; +#endif +#define SOCKET_TYPE_MASK (~(EVUTIL_SOCK_NONBLOCK|EVUTIL_SOCK_CLOEXEC)) + r = socket(domain, type & SOCKET_TYPE_MASK, protocol); + if (r < 0) + return -1; + if (type & EVUTIL_SOCK_NONBLOCK) { + if (evutil_fast_socket_nonblocking(r) < 0) { + evutil_closesocket(r); + return -1; + } + } + if (type & EVUTIL_SOCK_CLOEXEC) { + if (evutil_fast_socket_closeonexec(r) < 0) { + evutil_closesocket(r); + return -1; + } + } + return r; +} + +/* Internal wrapper around 'accept' or 'accept4' to provide Linux-style + * support for syscall-saving methods where available. + * + * In addition to regular accept behavior, you can set one or more of flags + * EVUTIL_SOCK_NONBLOCK and EVUTIL_SOCK_CLOEXEC in the 'flags' argument, to + * make the socket nonblocking or close-on-exec with as few syscalls as + * possible. + */ +evutil_socket_t +evutil_accept4(evutil_socket_t sockfd, struct sockaddr *addr, + socklen_t *addrlen, int flags) +{ +#if defined(_EVENT_HAVE_ACCEPT4) && defined(SOCK_CLOEXEC) && defined(SOCK_NONBLOCK) + return accept4(sockfd, addr, addrlen, flags); +#else + evutil_socket_t result = accept(sockfd, addr, addrlen); + if (result < 0) + return result; + + if (flags & EVUTIL_SOCK_CLOEXEC) { + if (evutil_fast_socket_closeonexec(result) < 0) { + evutil_closesocket(result); + return -1; + } + } + if (flags & EVUTIL_SOCK_NONBLOCK) { + if (evutil_fast_socket_nonblocking(result) < 0) { + evutil_closesocket(result); + return -1; + } + } + return result; +#endif +} + +/* Internal function: Set fd[0] and fd[1] to a pair of fds such that writes on + * fd[0] get read from fd[1]. Make both fds nonblocking and close-on-exec. + * Return 0 on success, -1 on failure. + */ +int +evutil_make_internal_pipe(evutil_socket_t fd[2]) +{ + /* + Making the second socket nonblocking is a bit subtle, given that we + ignore any EAGAIN returns when writing to it, and you don't usally + do that for a nonblocking socket. But if the kernel gives us EAGAIN, + then there's no need to add any more data to the buffer, since + the main thread is already either about to wake up and drain it, + or woken up and in the process of draining it. + */ + +#if defined(_EVENT_HAVE_PIPE2) + if (pipe2(fd, O_NONBLOCK|O_CLOEXEC) == 0) + return 0; +#endif +#if defined(_EVENT_HAVE_PIPE) + if (pipe(fd) == 0) { + if (evutil_fast_socket_nonblocking(fd[0]) < 0 || + evutil_fast_socket_nonblocking(fd[1]) < 0 || + evutil_fast_socket_closeonexec(fd[0]) < 0 || + evutil_fast_socket_closeonexec(fd[1]) < 0) { + close(fd[0]); + close(fd[1]); + fd[0] = fd[1] = -1; + return -1; + } + return 0; + } else { + event_warn("%s: pipe", __func__); + } +#endif + +#ifdef _WIN32 +#define LOCAL_SOCKETPAIR_AF AF_INET +#else +#define LOCAL_SOCKETPAIR_AF AF_UNIX +#endif + if (evutil_socketpair(LOCAL_SOCKETPAIR_AF, SOCK_STREAM, 0, fd) == 0) { + if (evutil_fast_socket_nonblocking(fd[0]) < 0 || + evutil_fast_socket_nonblocking(fd[1]) < 0 || + evutil_fast_socket_closeonexec(fd[0]) < 0 || + evutil_fast_socket_closeonexec(fd[1]) < 0) { + evutil_closesocket(fd[0]); + evutil_closesocket(fd[1]); + fd[0] = fd[1] = -1; + return -1; + } + return 0; + } + fd[0] = fd[1] = -1; + return -1; +} + +/* Wrapper around eventfd on systems that provide it. Unlike the system + * eventfd, it always supports EVUTIL_EFD_CLOEXEC and EVUTIL_EFD_NONBLOCK as + * flags. Returns -1 on error or if eventfd is not supported. + */ +evutil_socket_t +evutil_eventfd(unsigned initval, int flags) +{ +#if defined(_EVENT_HAVE_EVENTFD) && defined(_EVENT_HAVE_SYS_EVENTFD_H) + int r; +#if defined(EFD_CLOEXEC) && defined(EFD_NONBLOCK) + r = eventfd(initval, flags); + if (r >= 0 || flags == 0) + return r; +#endif + r = eventfd(initval, 0); + if (r < 0) + return r; + if (flags & EVUTIL_EFD_CLOEXEC) { + if (evutil_fast_socket_closeonexec(r) < 0) { + evutil_closesocket(r); + return -1; + } + } + if (flags & EVUTIL_EFD_NONBLOCK) { + if (evutil_fast_socket_nonblocking(r) < 0) { + evutil_closesocket(r); + return -1; + } + } + return r; +#else + return -1; +#endif +} + diff --git a/http.c b/http.c index cf088927b7..632735c998 100644 --- a/http.c +++ b/http.c @@ -3984,17 +3984,13 @@ bind_socket_ai(struct evutil_addrinfo *ai, int reuse) int serrno; /* Create listen socket */ - fd = socket(ai ? ai->ai_family : AF_INET, SOCK_STREAM, 0); + fd = evutil_socket(ai ? ai->ai_family : AF_INET, + SOCK_STREAM|EVUTIL_SOCK_NONBLOCK|EVUTIL_SOCK_CLOEXEC, 0); if (fd == -1) { event_sock_warn(-1, "socket"); return (-1); } - if (evutil_make_socket_nonblocking(fd) < 0) - goto out; - if (evutil_make_socket_closeonexec(fd) < 0) - goto out; - setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (void *)&on, sizeof(on)); if (reuse) evutil_make_listen_socket_reuseable(fd); diff --git a/listener.c b/listener.c index 34be349754..a24ba0734b 100644 --- a/listener.c +++ b/listener.c @@ -80,6 +80,7 @@ struct evconnlistener { void *user_data; unsigned flags; short refcnt; + int accept4_flags; unsigned enabled : 1; }; @@ -186,6 +187,12 @@ evconnlistener_new(struct event_base *base, lev->base.flags = flags; lev->base.refcnt = 1; + lev->base.accept4_flags = 0; + if (!(flags & LEV_OPT_LEAVE_SOCKETS_BLOCKING)) + lev->base.accept4_flags |= EVUTIL_SOCK_NONBLOCK; + if (flags & LEV_OPT_CLOSE_ON_EXEC) + lev->base.accept4_flags |= EVUTIL_SOCK_CLOEXEC; + if (flags & LEV_OPT_THREADSAFE) { EVTHREAD_ALLOC_LOCK(lev->base.lock, EVTHREAD_LOCKTYPE_RECURSIVE); } @@ -193,8 +200,8 @@ evconnlistener_new(struct event_base *base, event_assign(&lev->listener, base, fd, EV_READ|EV_PERSIST, listener_read_cb, lev); - if (!(flags & LEV_OPT_DISABLED)) - evconnlistener_enable(&lev->base); + if (!(flags & LEV_OPT_DISABLED)) + evconnlistener_enable(&lev->base); return &lev->base; } @@ -208,25 +215,17 @@ evconnlistener_new_bind(struct event_base *base, evconnlistener_cb cb, evutil_socket_t fd; int on = 1; int family = sa ? sa->sa_family : AF_UNSPEC; + int socktype = SOCK_STREAM | EVUTIL_SOCK_NONBLOCK; if (backlog == 0) return NULL; - fd = socket(family, SOCK_STREAM, 0); - if (fd == -1) - return NULL; + if (flags & LEV_OPT_CLOSE_ON_EXEC) + socktype |= EVUTIL_SOCK_CLOEXEC; - if (evutil_make_socket_nonblocking(fd) < 0) { - evutil_closesocket(fd); + fd = evutil_socket(family, socktype, 0); + if (fd == -1) return NULL; - } - - if (flags & LEV_OPT_CLOSE_ON_EXEC) { - if (evutil_make_socket_closeonexec(fd) < 0) { - evutil_closesocket(fd); - return NULL; - } - } setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (void*)&on, sizeof(on)); if (flags & LEV_OPT_REUSEABLE) { @@ -389,7 +388,7 @@ listener_read_cb(evutil_socket_t fd, short what, void *p) #else socklen_t socklen = sizeof(ss); #endif - evutil_socket_t new_fd = accept(fd, (struct sockaddr*)&ss, &socklen); + evutil_socket_t new_fd = evutil_accept4(fd, (struct sockaddr*)&ss, &socklen, lev->accept4_flags); if (new_fd < 0) break; if (socklen == 0) { @@ -399,9 +398,6 @@ listener_read_cb(evutil_socket_t fd, short what, void *p) continue; } - if (!(lev->flags & LEV_OPT_LEAVE_SOCKETS_BLOCKING)) - evutil_make_socket_nonblocking(new_fd); - if (lev->cb == NULL) { UNLOCK(lev); return; diff --git a/signal.c b/signal.c index 6c19b4af6e..5d73ea80c5 100644 --- a/signal.c +++ b/signal.c @@ -122,7 +122,7 @@ evsig_set_base(struct event_base *base) EVSIGBASE_LOCK(); evsig_base = base; evsig_base_n_signals_added = base->sig.ev_n_signals_added; - evsig_base_fd = base->sig.ev_signal_pair[0]; + evsig_base_fd = base->sig.ev_signal_pair[1]; EVSIGBASE_UNLOCK(); } @@ -141,7 +141,11 @@ evsig_cb(evutil_socket_t fd, short what, void *arg) memset(&ncaught, 0, sizeof(ncaught)); while (1) { +#ifdef _WIN32 n = recv(fd, signals, sizeof(signals), 0); +#else + n = read(fd, signals, sizeof(signals)); +#endif if (n == -1) { int err = evutil_socket_geterror(fd); if (! EVUTIL_ERR_RW_RETRIABLE(err)) @@ -174,8 +178,7 @@ evsig_init(struct event_base *base) * pair to wake up our event loop. The event loop then scans for * signals that got delivered. */ - if (evutil_socketpair( - AF_UNIX, SOCK_STREAM, 0, base->sig.ev_signal_pair) == -1) { + if (evutil_make_internal_pipe(base->sig.ev_signal_pair) == -1) { #ifdef _WIN32 /* Make this nonfatal on win32, where sometimes people have localhost firewalled. */ @@ -186,18 +189,13 @@ evsig_init(struct event_base *base) return -1; } - evutil_make_socket_closeonexec(base->sig.ev_signal_pair[0]); - evutil_make_socket_closeonexec(base->sig.ev_signal_pair[1]); if (base->sig.sh_old) { mm_free(base->sig.sh_old); } base->sig.sh_old = NULL; base->sig.sh_old_max = 0; - evutil_make_socket_nonblocking(base->sig.ev_signal_pair[0]); - evutil_make_socket_nonblocking(base->sig.ev_signal_pair[1]); - - event_assign(&base->sig.ev_signal, base, base->sig.ev_signal_pair[1], + event_assign(&base->sig.ev_signal, base, base->sig.ev_signal_pair[0], EV_READ | EV_PERSIST, evsig_cb, base); base->sig.ev_signal.ev_flags |= EVLIST_INTERNAL; @@ -297,7 +295,7 @@ evsig_add(struct event_base *base, evutil_socket_t evsignal, short old, short ev } evsig_base = base; evsig_base_n_signals_added = ++sig->ev_n_signals_added; - evsig_base_fd = base->sig.ev_signal_pair[0]; + evsig_base_fd = base->sig.ev_signal_pair[1]; EVSIGBASE_UNLOCK(); event_debug(("%s: %d: changing signal handler", __func__, (int)evsignal)); @@ -396,7 +394,11 @@ evsig_handler(int sig) /* Wake up our notification mechanism */ msg = sig; +#ifdef _WIN32 send(evsig_base_fd, (char*)&msg, 1, 0); +#else + write(evsig_base_fd, (char*)&msg, 1); +#endif errno = save_errno; #ifdef _WIN32 EVUTIL_SET_SOCKET_ERROR(socket_errno); diff --git a/util-internal.h b/util-internal.h index 8139fb984a..162dd04078 100644 --- a/util-internal.h +++ b/util-internal.h @@ -38,6 +38,9 @@ #ifdef _EVENT_HAVE_SYS_SOCKET_H #include #endif +#ifdef _EVENT_HAVE_SYS_EVENTFD_H +#include +#endif #include "event2/util.h" #include "ipv6-internal.h" @@ -316,6 +319,34 @@ HANDLE evutil_load_windows_system_library(const TCHAR *library_name); #endif #endif +evutil_socket_t evutil_socket(int domain, int type, int protocol); +evutil_socket_t evutil_accept4(evutil_socket_t sockfd, struct sockaddr *addr, + socklen_t *addrlen, int flags); +int evutil_make_internal_pipe(evutil_socket_t fd[2]); +evutil_socket_t evutil_eventfd(unsigned initval, int flags); + +#ifdef SOCK_NONBLOCK +#define EVUTIL_SOCK_NONBLOCK SOCK_NONBLOCK +#else +#define EVUTIL_SOCK_NONBLOCK 0x4000000 +#endif +#ifdef SOCK_CLOEXEC +#define EVUTIL_SOCK_CLOEXEC SOCK_CLOEXEC +#else +#define EVUTIL_SOCK_CLOEXEC 0x80000000 +#endif +#ifdef EFD_NONBLOCK +#define EVUTIL_EFD_NONBLOCK EFD_NONBLOCK +#else +#define EVUTIL_EFD_NONBLOCK 0x4000 +#endif +#ifdef EFD_CLOEXEC +#define EVUTIL_EFD_CLOEXEC EFD_CLOEXEC +#else +#define EVUTIL_EFD_CLOEXEC 0x8000 +#endif + + #ifdef __cplusplus } #endif