Skip to content

Commit

Permalink
Fix a nasty bug related to use of dup() with epoll on Linux
Browse files Browse the repository at this point in the history
Current versions of the Linux kernel don't seem to remove the struct
epitem for a given (file,fd) combo when the fd is closed unless the
file itself is also completely closed.  This means that if you do:
   fd = dup(fd_orig);
   add(fd);
   close(fd);
   dup2(fd_orig, fd);
   add(fd);
you will get an EEXIST when you should have gotten a success.  This
could cause warnings and dropped events when using dup and epoll.

The solution is pretty simple: when we get an EEXIST from
EPOLL_CTL_ADD, we retry with EPOLL_CTL_MOD.

Unit test included to demonstrate the bug.

Found due to the patient efforts of Gilad Benjamini; diagnosed with
help from Nicholas Marriott.
  • Loading branch information
nmathewson committed Oct 24, 2010
1 parent bf11e7d commit c281aba
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 13 deletions.
32 changes: 19 additions & 13 deletions epoll.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ epoll_apply_changes(struct event_base *base)
int op, events;

for (i = 0; i < changelist->n_changes; ++i) {
int precautionary_add = 0;
ch = &changelist->changes[i];
events = 0;

Expand Down Expand Up @@ -203,13 +202,12 @@ epoll_apply_changes(struct event_base *base)
on the fd, we need to try the ADD anyway, in
case the fd was closed at some in the
middle. If it wasn't, the ADD operation
will fail with; that's okay.
*/
precautionary_add = 1;
will fail with EEXIST; and we retry it as a
MOD. */
op = EPOLL_CTL_ADD;
} else if (ch->old_events) {
op = EPOLL_CTL_MOD;
}

} else if ((ch->read_change & EV_CHANGE_DEL) ||
(ch->write_change & EV_CHANGE_DEL)) {
/* If we're deleting anything, we'll want to do a MOD
Expand Down Expand Up @@ -255,14 +253,22 @@ epoll_apply_changes(struct event_base *base)
(int)epev.events,
ch->fd));
}
} else if (op == EPOLL_CTL_ADD && errno == EEXIST &&
precautionary_add) {
/* If a precautionary ADD operation fails with
EEXIST, that's fine too.
*/
event_debug(("Epoll ADD(%d) on fd %d gave %s: ADD was redundant",
(int)epev.events,
ch->fd, strerror(errno)));
} else if (op == EPOLL_CTL_ADD && errno == EEXIST) {
/* If an ADD operation fails with EEXIST,
* either the operation was redundant (as with a
* precautionary add), or we ran into a fun
* kernel bug where using dup*() to duplicate the
* same file into the same fd gives you the same epitem
* rather than a fresh one. For the second case,
* we must retry with MOD. */
if (epoll_ctl(epollop->epfd, EPOLL_CTL_MOD, ch->fd, &epev) == -1) {
event_warn("Epoll ADD(%d) on %d retried as MOD; that failed too",
(int)epev.events, ch->fd);
} else {
event_debug(("Epoll ADD(%d) on %d retried as MOD; succeeded.",
(int)epev.events,
ch->fd));
}
} else if (op == EPOLL_CTL_DEL &&
(errno == ENOENT || errno == EBADF ||
errno == EPERM)) {
Expand Down
73 changes: 73 additions & 0 deletions test/regress.c
Original file line number Diff line number Diff line change
Expand Up @@ -2061,6 +2061,76 @@ test_event_pending(void *ptr)
}
}

#ifndef WIN32
/* You can't do this test on windows, since dup2 doesn't work on sockets */

static void
dfd_cb(evutil_socket_t fd, short e, void *data)
{
*(int*)data = (int)e;
}

/* Regression test for our workaround for a fun epoll/linux related bug
* where fd2 = dup(fd1); add(fd2); close(fd2); dup2(fd1,fd2); add(fd2)
* will get you an EEXIST */
static void
test_dup_fd(void *arg)
{
struct basic_test_data *data = arg;
struct event_base *base = data->base;
struct event *ev1=NULL, *ev2=NULL;
int fd, dfd=-1;
int ev1_got, ev2_got;

tt_int_op(write(data->pair[0], "Hello world",
strlen("Hello world")), >, 0);
fd = data->pair[1];

dfd = dup(fd);
tt_int_op(dfd, >=, 0);

ev1 = event_new(base, fd, EV_READ|EV_PERSIST, dfd_cb, &ev1_got);
ev2 = event_new(base, dfd, EV_READ|EV_PERSIST, dfd_cb, &ev2_got);
ev1_got = ev2_got = 0;
event_add(ev1, NULL);
event_add(ev2, NULL);
event_base_loop(base, EVLOOP_ONCE);
tt_int_op(ev1_got, ==, EV_READ);
tt_int_op(ev2_got, ==, EV_READ);

/* Now close and delete dfd then dispatch. We need to do the
* dispatch here so that when we add it later, we think there
* was an intermediate delete. */
close(dfd);
event_del(ev2);
ev1_got = ev2_got = 0;
event_base_loop(base, EVLOOP_ONCE);
tt_want_int_op(ev1_got, ==, EV_READ);
tt_int_op(ev2_got, ==, 0);

/* Re-duplicate the fd. We need to get the same duplicated
* value that we closed to provoke the epoll quirk. Also, we
* need to change the events to write, or else the old lingering
* read event will make the test pass whether the change was
* successful or not. */
tt_int_op(dup2(fd, dfd), ==, dfd);
event_free(ev2);
ev2 = event_new(base, dfd, EV_WRITE|EV_PERSIST, dfd_cb, &ev2_got);
event_add(ev2, NULL);
ev1_got = ev2_got = 0;
event_base_loop(base, EVLOOP_ONCE);
tt_want_int_op(ev1_got, ==, EV_READ);
tt_int_op(ev2_got, ==, EV_WRITE);

end:
if (ev1)
event_free(ev1);
if (ev2)
event_free(ev2);
close(dfd);
}
#endif

#ifdef _EVENT_DISABLE_MM_REPLACEMENT
static void
test_mm_functions(void *arg)
Expand Down Expand Up @@ -2229,6 +2299,9 @@ struct testcase_t main_testcases[] = {
{ "event_once", test_event_once, TT_ISOLATED, &basic_setup, NULL },
{ "event_pending", test_event_pending, TT_ISOLATED, &basic_setup,
NULL },
#ifndef WIN32
{ "dup_fd", test_dup_fd, TT_ISOLATED, &basic_setup, NULL },
#endif
{ "mm_functions", test_mm_functions, TT_FORK, NULL, NULL },
BASIC(many_events, TT_ISOLATED),

Expand Down

0 comments on commit c281aba

Please sign in to comment.