Skip to content

Commit 66be626

Browse files
committed
linux: fix a hang if there are no reads from the tty
avoid a "crun exec" hang when the the other end of the terminal stopped reading. That happened because `copy_fd_to_fd` tried to write everything that it has received from the source fd, so it would hang the current process. Prevent that using non blocking file descriptors and using epoll to detect when the file descriptor is available for write. Fixes: https://issues.redhat.com/browse/OCPBUGS-45632 Signed-off-by: Giuseppe Scrivano <[email protected]>
1 parent c68f412 commit 66be626

File tree

3 files changed

+201
-27
lines changed

3 files changed

+201
-27
lines changed

src/libcrun/container.c

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,21 +1968,34 @@ struct wait_for_process_args
19681968
static int
19691969
wait_for_process (struct wait_for_process_args *args, libcrun_error_t *err)
19701970
{
1971+
cleanup_channel_fd_pair struct channel_fd_pair *from_terminal = NULL;
1972+
cleanup_channel_fd_pair struct channel_fd_pair *to_terminal = NULL;
1973+
int ret, container_exit_code = 0, last_process;
1974+
cleanup_close int terminal_fd_from = -1;
1975+
cleanup_close int terminal_fd_to = -1;
1976+
const size_t max_events = 10;
19711977
cleanup_close int epollfd = -1;
19721978
cleanup_close int signalfd = -1;
1973-
int ret, container_exit_code = 0, last_process;
19741979
sigset_t mask;
1975-
int in_fds[10];
1976-
int in_levelfds[10];
1977-
int in_levelfds_len = 0;
1980+
int in_fds[max_events];
19781981
int in_fds_len = 0;
1982+
int out_fds[max_events];
1983+
int out_fds_len = 0;
1984+
size_t i;
1985+
19791986
cleanup_seccomp_notify_context struct seccomp_notify_context_s *seccomp_notify_ctx = NULL;
19801987

19811988
container_exit_code = 0;
19821989

19831990
if (args == NULL || args->context == NULL)
19841991
return crun_make_error (err, 0, "internal error: context is empty");
19851992

1993+
for (i = 0; i < max_events; i++)
1994+
{
1995+
in_fds[i] = -1;
1996+
out_fds[i] = -1;
1997+
}
1998+
19861999
if (args->context->pid_file)
19872000
{
19882001
char buf[32];
@@ -2054,62 +2067,78 @@ wait_for_process (struct wait_for_process_args *args, libcrun_error_t *err)
20542067
in_fds[in_fds_len++] = args->seccomp_notify_fd;
20552068
}
20562069

2070+
if (args->terminal_fd >= 0)
2071+
{
2072+
/* The terminal_fd is dup()ed so that it can be registered with
2073+
epoll multiple times using different masks. */
2074+
terminal_fd_from = dup (args->terminal_fd);
2075+
if (UNLIKELY (terminal_fd_from < 0))
2076+
return crun_make_error (err, errno, "dup terminal fd");
2077+
terminal_fd_to = dup (args->terminal_fd);
2078+
if (UNLIKELY (terminal_fd_to < 0))
2079+
return crun_make_error (err, errno, "dup terminal fd");
2080+
2081+
int i, non_blocking_fds[] = { terminal_fd_from, terminal_fd_to, 0, 1, -1 };
2082+
for (i = 0; non_blocking_fds[i] >= 0; i++)
2083+
{
2084+
ret = set_blocking_fd (non_blocking_fds[i], false, err);
2085+
if (UNLIKELY (ret < 0))
2086+
return ret;
2087+
}
2088+
2089+
from_terminal = channel_fd_pair_new (terminal_fd_from, 1, BUFSIZ);
2090+
to_terminal = channel_fd_pair_new (0, terminal_fd_to, BUFSIZ);
2091+
}
2092+
20572093
in_fds[in_fds_len++] = signalfd;
20582094
if (args->notify_socket >= 0)
20592095
in_fds[in_fds_len++] = args->notify_socket;
20602096
if (args->terminal_fd >= 0)
20612097
{
20622098
in_fds[in_fds_len++] = 0;
2063-
in_levelfds[in_levelfds_len++] = args->terminal_fd;
2099+
out_fds[out_fds_len++] = terminal_fd_to;
2100+
2101+
in_fds[in_fds_len++] = terminal_fd_from;
2102+
out_fds[out_fds_len++] = 1;
20642103
}
2065-
in_fds[in_fds_len++] = -1;
2066-
in_levelfds[in_levelfds_len++] = -1;
20672104

2068-
epollfd = epoll_helper (in_fds, in_levelfds, NULL, NULL, err);
2105+
epollfd = epoll_helper (in_fds, NULL, out_fds, NULL, err);
20692106
if (UNLIKELY (epollfd < 0))
20702107
return epollfd;
20712108

20722109
while (1)
20732110
{
2111+
struct epoll_event events[max_events];
20742112
struct signalfd_siginfo si;
20752113
struct winsize ws;
2076-
ssize_t res;
2077-
struct epoll_event events[10];
20782114
int i, nr_events;
2115+
ssize_t res;
20792116

2080-
nr_events = TEMP_FAILURE_RETRY (epoll_wait (epollfd, events, 10, -1));
2117+
nr_events = TEMP_FAILURE_RETRY (epoll_wait (epollfd, events, max_events, -1));
20812118
if (UNLIKELY (nr_events < 0))
20822119
return crun_make_error (err, errno, "epoll_wait");
20832120

20842121
for (i = 0; i < nr_events; i++)
20852122
{
2086-
if (events[i].data.fd == 0)
2123+
if (events[i].data.fd == 0 || events[i].data.fd == terminal_fd_to)
20872124
{
2088-
ret = copy_from_fd_to_fd (0, args->terminal_fd, 0, err);
2125+
ret = channel_fd_pair_process (to_terminal, epollfd, err);
20892126
if (UNLIKELY (ret < 0))
20902127
return crun_error_wrap (err, "copy to terminal fd");
20912128
}
2129+
else if (events[i].data.fd == 1 || events[i].data.fd == terminal_fd_from)
2130+
{
2131+
ret = channel_fd_pair_process (from_terminal, epollfd, err);
2132+
if (UNLIKELY (ret < 0))
2133+
return crun_error_wrap (err, "copy from terminal fd");
2134+
}
20922135
else if (events[i].data.fd == args->seccomp_notify_fd)
20932136
{
20942137
ret = libcrun_seccomp_notify_plugins (seccomp_notify_ctx,
20952138
args->seccomp_notify_fd, err);
20962139
if (UNLIKELY (ret < 0))
20972140
return ret;
20982141
}
2099-
else if (events[i].data.fd == args->terminal_fd)
2100-
{
2101-
ret = set_blocking_fd (args->terminal_fd, false, err);
2102-
if (UNLIKELY (ret < 0))
2103-
return crun_error_wrap (err, "set terminal fd not blocking");
2104-
2105-
ret = copy_from_fd_to_fd (args->terminal_fd, 1, 1, err);
2106-
if (UNLIKELY (ret < 0))
2107-
return crun_error_wrap (err, "copy from terminal fd");
2108-
2109-
ret = set_blocking_fd (args->terminal_fd, true, err);
2110-
if (UNLIKELY (ret < 0))
2111-
return crun_error_wrap (err, "set terminal fd blocking");
2112-
}
21132142
else if (events[i].data.fd == args->notify_socket)
21142143
{
21152144
ret = handle_notify_socket (args->notify_socket, err);

src/libcrun/utils.c

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#define _GNU_SOURCE
2020
#include <config.h>
2121
#include "utils.h"
22+
#include "ring_buffer.h"
2223
#include <stdarg.h>
2324
#include <unistd.h>
2425
#include <string.h>
@@ -1241,6 +1242,25 @@ create_signalfd (sigset_t *mask, libcrun_error_t *err)
12411242
return ret;
12421243
}
12431244

1245+
static int
1246+
epoll_helper_toggle (int epollfd, int fd, int events, libcrun_error_t *err)
1247+
{
1248+
struct epoll_event ev = {};
1249+
bool add = events != 0;
1250+
int ret;
1251+
1252+
ev.events = events;
1253+
ev.data.fd = fd;
1254+
ret = epoll_ctl (epollfd, add ? EPOLL_CTL_ADD : EPOLL_CTL_DEL, fd, &ev);
1255+
if (UNLIKELY (ret < 0))
1256+
{
1257+
if (errno == EEXIST || errno == ENOENT)
1258+
return 0;
1259+
return crun_make_error (err, errno, "epoll_ctl `%s` `%d`", add ? "add" : "del", fd);
1260+
}
1261+
return 0;
1262+
}
1263+
12441264
int
12451265
epoll_helper (int *in_fds, int *in_levelfds, int *out_fds, int *out_levelfds, libcrun_error_t *err)
12461266
{
@@ -2650,3 +2670,95 @@ cpuset_string_to_bitmask (const char *str, char **out, size_t *out_size, libcrun
26502670
invalid_input:
26512671
return crun_make_error (err, 0, "cannot parse input `%s`", str);
26522672
}
2673+
2674+
struct channel_fd_pair
2675+
{
2676+
struct ring_buffer rb;
2677+
2678+
int in_fd;
2679+
int out_fd;
2680+
2681+
int infd_epoll_events;
2682+
int outfd_epoll_events;
2683+
};
2684+
2685+
struct channel_fd_pair *
2686+
channel_fd_pair_new (int in_fd, int out_fd, size_t size)
2687+
{
2688+
struct channel_fd_pair *channel = xmalloc (sizeof (struct channel_fd_pair));
2689+
channel->in_fd = in_fd;
2690+
channel->out_fd = out_fd;
2691+
ring_buffer_make (&channel->rb, size);
2692+
channel->infd_epoll_events = -1;
2693+
channel->outfd_epoll_events = -1;
2694+
return channel;
2695+
}
2696+
2697+
void
2698+
channel_fd_pair_free (struct channel_fd_pair *channel)
2699+
{
2700+
if (channel == NULL)
2701+
return;
2702+
2703+
ring_buffer_free (&channel->rb);
2704+
free (channel);
2705+
}
2706+
2707+
int
2708+
channel_fd_pair_process (struct channel_fd_pair *channel, int epollfd, libcrun_error_t *err)
2709+
{
2710+
bool is_input_eagain = false, is_output_eagain = false, repeat;
2711+
int ret, i;
2712+
2713+
/* This function is called from an epoll loop. Use a hard limit to avoid infinite loops
2714+
and prevent other events from being processed. */
2715+
for (i = 0, repeat = true; i < 1000 && repeat; i++)
2716+
{
2717+
repeat = false;
2718+
if (ring_buffer_get_space_available (&(channel->rb)) >= channel->rb.size / 2)
2719+
{
2720+
ret = ring_buffer_read (&(channel->rb), channel->in_fd, &is_input_eagain, err);
2721+
if (UNLIKELY (ret < 0))
2722+
return ret;
2723+
if (ret > 0)
2724+
repeat = true;
2725+
}
2726+
if (ring_buffer_get_data_available (&(channel->rb)) > 0)
2727+
{
2728+
ret = ring_buffer_write (&(channel->rb), channel->out_fd, &is_output_eagain, err);
2729+
if (UNLIKELY (ret < 0))
2730+
return ret;
2731+
if (ret > 0)
2732+
repeat = true;
2733+
}
2734+
}
2735+
2736+
if (epollfd >= 0)
2737+
{
2738+
size_t available = ring_buffer_get_space_available (&(channel->rb));
2739+
size_t used = ring_buffer_get_data_available (&(channel->rb));
2740+
int events;
2741+
2742+
/* If there is space available in the buffer, we want to read more. */
2743+
events = (available > 0) ? (EPOLLIN | (is_input_eagain ? EPOLLET : 0)) : 0;
2744+
if (events != channel->infd_epoll_events)
2745+
{
2746+
ret = epoll_helper_toggle (epollfd, channel->in_fd, events, err);
2747+
if (UNLIKELY (ret < 0))
2748+
return ret;
2749+
channel->infd_epoll_events = events;
2750+
}
2751+
2752+
/* If there is data available in the buffer, we want to write as soon as
2753+
it is possible. */
2754+
events = (used > 0) ? (EPOLLOUT | (is_output_eagain ? EPOLLET : 0)) : 0;
2755+
if (events != channel->outfd_epoll_events)
2756+
{
2757+
ret = epoll_helper_toggle (epollfd, channel->out_fd, events, err);
2758+
if (UNLIKELY (ret < 0))
2759+
return ret;
2760+
channel->outfd_epoll_events = events;
2761+
}
2762+
}
2763+
return 0;
2764+
}

src/libcrun/utils.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,4 +475,37 @@ validate_options (unsigned int specified_options, unsigned int supported_options
475475

476476
extern int cpuset_string_to_bitmask (const char *str, char **out, size_t *out_size, libcrun_error_t *err);
477477

478+
/*
479+
* A channel_fd_pair takes care of copying data between two file descriptors.
480+
* The two file descriptors are expected to be set to non-blocking mode.
481+
* The channel_fd_pair will buffer data read from the input file descriptor and
482+
* write it to the output file descriptor. If the output file descriptor is not
483+
* ready to accept the data, the channel_fd_pair will buffer the data until it
484+
* can be written.
485+
*/
486+
struct channel_fd_pair;
487+
488+
struct channel_fd_pair *channel_fd_pair_new (int in_fd, int out_fd, size_t size);
489+
490+
void channel_fd_pair_free (struct channel_fd_pair *channel);
491+
492+
/* Process the data in the channel_fd_pair. This function will read data from
493+
* the input file descriptor and write it to the output file descriptor. If
494+
* the output file descriptor is not ready to accept the data, the data will be
495+
* buffered. If epollfd is provided, the in_fd and out_fd will be registered
496+
* and unregistered as necessary.
497+
*/
498+
int channel_fd_pair_process (struct channel_fd_pair *channel, int epollfd, libcrun_error_t *err);
499+
500+
static inline void
501+
cleanup_channel_fd_pairp (void *p)
502+
{
503+
struct channel_fd_pair **pp = (struct channel_fd_pair **) p;
504+
if (*pp == NULL)
505+
return;
506+
507+
channel_fd_pair_free (*pp);
508+
}
509+
#define cleanup_channel_fd_pair __attribute__ ((cleanup (cleanup_channel_fd_pairp)))
510+
478511
#endif

0 commit comments

Comments
 (0)