diff --git a/Makefile.am b/Makefile.am index 8980797164..c088e456e5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -35,6 +35,7 @@ endif check_LTLIBRARIES = libcrun_testing.la libcrun_SOURCES = src/libcrun/utils.c \ + src/libcrun/ring_buffer.c \ src/libcrun/blake3/blake3.c \ src/libcrun/blake3/blake3_portable.c \ src/libcrun/cgroup-cgroupfs.c \ @@ -154,12 +155,12 @@ EXTRA_DIST = COPYING COPYING.libcrun README.md NEWS SECURITY.md rpm/crun.spec au src/libcrun/handlers/handler-utils.h \ src/libcrun/linux.h src/libcrun/utils.h src/libcrun/error.h src/libcrun/criu.h \ src/libcrun/scheduler.h src/libcrun/status.h src/libcrun/terminal.h \ - src/libcrun/mount_flags.h src/libcrun/intelrdt.h \ + src/libcrun/mount_flags.h src/libcrun/intelrdt.h src/libcrun/ring_buffer.h \ crun.1.md crun.1 libcrun.lds \ krun.1.md krun.1 \ lua/luacrun.rockspec -UNIT_TESTS = tests/tests_libcrun_utils tests/tests_libcrun_errors tests/tests_libcrun_intelrdt +UNIT_TESTS = tests/tests_libcrun_utils tests/tests_libcrun_ring_buffer tests/tests_libcrun_errors tests/tests_libcrun_intelrdt if ENABLE_CRUN bin_PROGRAMS = crun @@ -181,6 +182,11 @@ tests_tests_libcrun_utils_SOURCES = tests/tests_libcrun_utils.c tests_tests_libcrun_utils_LDADD = $(TESTS_LDADD) tests_tests_libcrun_utils_LDFLAGS = $(crun_LDFLAGS) +tests_tests_libcrun_ring_buffer_CFLAGS = -I $(abs_top_builddir)/libocispec/src -I $(abs_top_srcdir)/libocispec/src -I $(abs_top_builddir)/src -I $(abs_top_srcdir)/src +tests_tests_libcrun_ring_buffer_SOURCES = tests/tests_libcrun_ring_buffer.c +tests_tests_libcrun_ring_buffer_LDADD = $(TESTS_LDADD) +tests_tests_libcrun_ring_buffer_LDFLAGS = $(crun_LDFLAGS) + tests_tests_libcrun_intelrdt_CFLAGS = -I $(abs_top_builddir)/libocispec/src -I $(abs_top_srcdir)/libocispec/src -I $(abs_top_builddir)/src -I $(abs_top_srcdir)/src tests_tests_libcrun_intelrdt_SOURCES = tests/tests_libcrun_intelrdt.c tests_tests_libcrun_intelrdt_LDADD = $(TESTS_LDADD) diff --git a/src/libcrun/container.c b/src/libcrun/container.c index 6d549980d0..ecefc7e766 100644 --- a/src/libcrun/container.c +++ b/src/libcrun/container.c @@ -1968,14 +1968,21 @@ struct wait_for_process_args static int wait_for_process (struct wait_for_process_args *args, libcrun_error_t *err) { + cleanup_channel_fd_pair struct channel_fd_pair *from_terminal = NULL; + cleanup_channel_fd_pair struct channel_fd_pair *to_terminal = NULL; + int ret, container_exit_code = 0, last_process; + cleanup_close int terminal_fd_from = -1; + cleanup_close int terminal_fd_to = -1; + const size_t max_events = 10; cleanup_close int epollfd = -1; cleanup_close int signalfd = -1; - int ret, container_exit_code = 0, last_process; sigset_t mask; - int fds[10]; - int levelfds[10]; - int levelfds_len = 0; - int fds_len = 0; + int in_fds[max_events]; + int in_fds_len = 0; + int out_fds[max_events]; + int out_fds_len = 0; + size_t i; + cleanup_seccomp_notify_context struct seccomp_notify_context_s *seccomp_notify_ctx = NULL; container_exit_code = 0; @@ -1983,6 +1990,12 @@ wait_for_process (struct wait_for_process_args *args, libcrun_error_t *err) if (args == NULL || args->context == NULL) return crun_make_error (err, 0, "internal error: context is empty"); + for (i = 0; i < max_events; i++) + { + in_fds[i] = -1; + out_fds[i] = -1; + } + if (args->context->pid_file) { char buf[32]; @@ -2041,7 +2054,7 @@ wait_for_process (struct wait_for_process_args *args, libcrun_error_t *err) conf.bundle_path = args->context->bundle; conf.oci_config_path = oci_config_path; - ret = set_blocking_fd (args->seccomp_notify_fd, 0, err); + ret = set_blocking_fd (args->seccomp_notify_fd, false, err); if (UNLIKELY (ret < 0)) return ret; @@ -2051,44 +2064,74 @@ wait_for_process (struct wait_for_process_args *args, libcrun_error_t *err) if (UNLIKELY (ret < 0)) return ret; - fds[fds_len++] = args->seccomp_notify_fd; + in_fds[in_fds_len++] = args->seccomp_notify_fd; + } + + if (args->terminal_fd >= 0) + { + /* The terminal_fd is dup()ed so that it can be registered with + epoll multiple times using different masks. */ + terminal_fd_from = dup (args->terminal_fd); + if (UNLIKELY (terminal_fd_from < 0)) + return crun_make_error (err, errno, "dup terminal fd"); + terminal_fd_to = dup (args->terminal_fd); + if (UNLIKELY (terminal_fd_to < 0)) + return crun_make_error (err, errno, "dup terminal fd"); + + int i, non_blocking_fds[] = { terminal_fd_from, terminal_fd_to, 0, 1, -1 }; + for (i = 0; non_blocking_fds[i] >= 0; i++) + { + ret = set_blocking_fd (non_blocking_fds[i], false, err); + if (UNLIKELY (ret < 0)) + return ret; + } + + from_terminal = channel_fd_pair_new (terminal_fd_from, 1, BUFSIZ); + to_terminal = channel_fd_pair_new (0, terminal_fd_to, BUFSIZ); } - fds[fds_len++] = signalfd; + in_fds[in_fds_len++] = signalfd; if (args->notify_socket >= 0) - fds[fds_len++] = args->notify_socket; + in_fds[in_fds_len++] = args->notify_socket; if (args->terminal_fd >= 0) { - fds[fds_len++] = 0; - levelfds[levelfds_len++] = args->terminal_fd; + in_fds[in_fds_len++] = 0; + out_fds[out_fds_len++] = terminal_fd_to; + + in_fds[in_fds_len++] = terminal_fd_from; + out_fds[out_fds_len++] = 1; } - fds[fds_len++] = -1; - levelfds[levelfds_len++] = -1; - epollfd = epoll_helper (fds, levelfds, err); + epollfd = epoll_helper (in_fds, NULL, out_fds, NULL, err); if (UNLIKELY (epollfd < 0)) return epollfd; while (1) { + struct epoll_event events[max_events]; struct signalfd_siginfo si; struct winsize ws; - ssize_t res; - struct epoll_event events[10]; int i, nr_events; + ssize_t res; - nr_events = TEMP_FAILURE_RETRY (epoll_wait (epollfd, events, 10, -1)); + nr_events = TEMP_FAILURE_RETRY (epoll_wait (epollfd, events, max_events, -1)); if (UNLIKELY (nr_events < 0)) return crun_make_error (err, errno, "epoll_wait"); for (i = 0; i < nr_events; i++) { - if (events[i].data.fd == 0) + if (events[i].data.fd == 0 || events[i].data.fd == terminal_fd_to) { - ret = copy_from_fd_to_fd (0, args->terminal_fd, 0, err); + ret = channel_fd_pair_process (to_terminal, epollfd, err); if (UNLIKELY (ret < 0)) return crun_error_wrap (err, "copy to terminal fd"); } + else if (events[i].data.fd == 1 || events[i].data.fd == terminal_fd_from) + { + ret = channel_fd_pair_process (from_terminal, epollfd, err); + if (UNLIKELY (ret < 0)) + return crun_error_wrap (err, "copy from terminal fd"); + } else if (events[i].data.fd == args->seccomp_notify_fd) { ret = libcrun_seccomp_notify_plugins (seccomp_notify_ctx, @@ -2096,20 +2139,6 @@ wait_for_process (struct wait_for_process_args *args, libcrun_error_t *err) if (UNLIKELY (ret < 0)) return ret; } - else if (events[i].data.fd == args->terminal_fd) - { - ret = set_blocking_fd (args->terminal_fd, 0, err); - if (UNLIKELY (ret < 0)) - return crun_error_wrap (err, "set terminal fd not blocking"); - - ret = copy_from_fd_to_fd (args->terminal_fd, 1, 1, err); - if (UNLIKELY (ret < 0)) - return crun_error_wrap (err, "copy from terminal fd"); - - ret = set_blocking_fd (args->terminal_fd, 1, err); - if (UNLIKELY (ret < 0)) - return crun_error_wrap (err, "set terminal fd blocking"); - } else if (events[i].data.fd == args->notify_socket) { ret = handle_notify_socket (args->notify_socket, err); diff --git a/src/libcrun/ring_buffer.c b/src/libcrun/ring_buffer.c new file mode 100644 index 0000000000..cb2f61b372 --- /dev/null +++ b/src/libcrun/ring_buffer.c @@ -0,0 +1,234 @@ +/* + * crun - OCI runtime written in C + * + * Copyright (C) 2024 Giuseppe Scrivano + * crun is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * crun is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with crun. If not, see . + */ +#define _GNU_SOURCE +#include +#include + +#include "ring_buffer.h" +#include "utils.h" + +struct ring_buffer +{ + char *buffer; + size_t size; + size_t head; + size_t tail; +}; + +/* + * It returns up to two regions in `iov` that can be read from. + */ +static int +ring_buffer_get_read_iov (struct ring_buffer *rb, struct iovec *iov) +{ + int iov_count = 0; + + /* Buffer is empty. */ + if (rb->head == rb->tail) + return 0; + + /* Head before tail. There is only one region to read from, up to tail. */ + if (rb->tail > rb->head) + { + iov[iov_count].iov_base = rb->buffer + rb->head; + iov[iov_count].iov_len = rb->tail - rb->head; + iov_count++; + } + /* Head after tail. There are two regions to read from, up to the + * end of the buffer and from the beginning of the buffer to tail. */ + else + { + iov[iov_count].iov_base = rb->buffer + rb->head; + iov[iov_count].iov_len = rb->size - rb->head; + iov_count++; + + if (rb->tail > 0) + { + iov[iov_count].iov_base = rb->buffer; + iov[iov_count].iov_len = rb->tail; + iov_count++; + } + } + return iov_count; +} + +/* + * It returns up to two regions in `iov` that can be written to without overwriting + * existing data. + */ +static int +ring_buffer_get_write_iov (struct ring_buffer *rb, struct iovec *iov) +{ + int iov_count = 0; + + /* Buffer is full. */ + if (rb->tail + 1 == rb->head) + return 0; + + /* Tail before head. There is only one region to write to, up to head. */ + if (rb->head > rb->tail + 1) + { + iov[iov_count].iov_base = rb->buffer + rb->tail; + iov[iov_count].iov_len = rb->head - rb->tail - 1; + iov_count++; + } + /* Tail after or equal to head. There are two regions to write to, up to the + * end of the buffer and from the beginning of the buffer to head. */ + else + { + iov[iov_count].iov_base = rb->buffer + rb->tail; + iov[iov_count].iov_len = rb->size - rb->tail; + iov_count++; + + if (rb->head > 1) + { + iov[iov_count].iov_base = rb->buffer; + iov[iov_count].iov_len = rb->head - 1; + iov_count++; + } + } + return iov_count; +} + +/* manually advance the head after a successful read. */ +static void +ring_buffer_advance_nocheck_head (struct ring_buffer *rb, size_t amount) +{ + rb->head = (rb->head + amount) % rb->size; +} + +/* manually advance the tail after a successful write. */ +static void +ring_buffer_advance_nocheck_tail (struct ring_buffer *rb, size_t amount) +{ + rb->tail = (rb->tail + amount) % rb->size; +} + +size_t +ring_buffer_get_data_available (struct ring_buffer *rb) +{ + if (rb->head <= rb->tail) + return rb->tail - rb->head; + + return rb->size - rb->head + rb->tail; +} + +size_t +ring_buffer_get_size (struct ring_buffer *rb) +{ + return rb->size - 1; +} + +size_t +ring_buffer_get_space_available (struct ring_buffer *rb) +{ + return rb->size - ring_buffer_get_data_available (rb) - 1; +} + +int +ring_buffer_read (struct ring_buffer *rb, int fd, bool *is_eagain, libcrun_error_t *err) +{ + struct iovec iov[2]; + int iov_count = 0; + ssize_t ret; + + *is_eagain = false; + + iov_count = ring_buffer_get_write_iov (rb, iov); + if (iov_count == 0) + { + *is_eagain = true; + return 0; + } + + ret = readv (fd, iov, iov_count); + if (UNLIKELY (ret < 0)) + { + if (errno == EIO) + return 0; + if (errno == EAGAIN || errno == EWOULDBLOCK) + { + *is_eagain = true; + return 0; + } + return crun_make_error (err, errno, "readv"); + } + ring_buffer_advance_nocheck_tail (rb, ret); + return ret; +} + +int +ring_buffer_write (struct ring_buffer *rb, int fd, bool *is_eagain, libcrun_error_t *err) +{ + ssize_t ret; + struct iovec iov[2]; + int iov_count = 0; + + *is_eagain = false; + + iov_count = ring_buffer_get_read_iov (rb, iov); + if (iov_count == 0) + { + *is_eagain = true; + return 0; + } + + ret = writev (fd, iov, iov_count); + if (UNLIKELY (ret < 0)) + { + if (errno == EIO) + return 0; + if (errno == EAGAIN || errno == EWOULDBLOCK) + { + *is_eagain = true; + return 0; + } + return crun_make_error (err, errno, "writev"); + } + ring_buffer_advance_nocheck_head (rb, ret); + /* If the buffer is empty, reset the head and tail. */ + if (rb->head == rb->tail) + { + rb->head = 0; + rb->tail = 0; + } + return ret; +} + +struct ring_buffer * +ring_buffer_make (size_t size) +{ + struct ring_buffer *rb = xmalloc (sizeof (struct ring_buffer)); + + /* The extra byte is used to distinguish between full and empty buffer. */ + rb->size = size + 1; + rb->buffer = xmalloc (rb->size); + rb->head = 0; + rb->tail = 0; + + return rb; +} + +void +ring_buffer_free (struct ring_buffer *rb) +{ + if (rb == NULL) + return; + free (rb->buffer); + free (rb); +} diff --git a/src/libcrun/ring_buffer.h b/src/libcrun/ring_buffer.h new file mode 100644 index 0000000000..1b3e19851d --- /dev/null +++ b/src/libcrun/ring_buffer.h @@ -0,0 +1,52 @@ +/* + * crun - OCI runtime written in C + * + * Copyright (C) 2024 Giuseppe Scrivano + * crun is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * crun is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with crun. If not, see . + */ +#ifndef RING_BUFFER_H +#define RING_BUFFER_H + +#include + +#include "error.h" +#include "utils.h" + +struct ring_buffer; + +size_t ring_buffer_get_data_available (struct ring_buffer *rb); + +size_t ring_buffer_get_space_available (struct ring_buffer *rb); + +size_t ring_buffer_get_size (struct ring_buffer *rb); + +int ring_buffer_read (struct ring_buffer *rb, int fd, bool *is_eagain, libcrun_error_t *err); + +int ring_buffer_write (struct ring_buffer *rb, int fd, bool *is_eagain, libcrun_error_t *err); + +struct ring_buffer *ring_buffer_make (size_t size); + +void ring_buffer_free (struct ring_buffer *rb); + +#define cleanup_ring_buffer __attribute__ ((cleanup (cleanup_ring_bufferp))) + +static inline void +cleanup_ring_bufferp (struct ring_buffer **p) +{ + struct ring_buffer *rb = *p; + if (rb) + ring_buffer_free (rb); +} + +#endif diff --git a/src/libcrun/utils.c b/src/libcrun/utils.c index a133769d6c..55b64eb3ac 100644 --- a/src/libcrun/utils.c +++ b/src/libcrun/utils.c @@ -19,6 +19,7 @@ #define _GNU_SOURCE #include #include "utils.h" +#include "ring_buffer.h" #include #include #include @@ -1241,8 +1242,27 @@ create_signalfd (sigset_t *mask, libcrun_error_t *err) return ret; } +static int +epoll_helper_toggle (int epollfd, int fd, int events, libcrun_error_t *err) +{ + struct epoll_event ev = {}; + bool add = events != 0; + int ret; + + ev.events = events; + ev.data.fd = fd; + ret = epoll_ctl (epollfd, add ? EPOLL_CTL_ADD : EPOLL_CTL_DEL, fd, &ev); + if (UNLIKELY (ret < 0)) + { + if (errno == EEXIST || errno == ENOENT) + return 0; + return crun_make_error (err, errno, "epoll_ctl `%s` `%d`", add ? "add" : "del", fd); + } + return 0; +} + int -epoll_helper (int *fds, int *levelfds, libcrun_error_t *err) +epoll_helper (int *in_fds, int *in_levelfds, int *out_fds, int *out_levelfds, libcrun_error_t *err) { struct epoll_event ev; cleanup_close int epollfd = -1; @@ -1253,22 +1273,24 @@ epoll_helper (int *fds, int *levelfds, libcrun_error_t *err) if (UNLIKELY (epollfd < 0)) return crun_make_error (err, errno, "epoll_create1"); - for (it = fds; *it >= 0; it++) - { - ev.events = EPOLLIN; - ev.data.fd = *it; - ret = epoll_ctl (epollfd, EPOLL_CTL_ADD, *it, &ev); - if (UNLIKELY (ret < 0)) - return crun_make_error (err, errno, "epoll_ctl add `%d`", *it); - } - for (it = levelfds; *it >= 0; it++) - { - ev.events = EPOLLIN | EPOLLET; - ev.data.fd = *it; - ret = epoll_ctl (epollfd, EPOLL_CTL_ADD, *it, &ev); - if (UNLIKELY (ret < 0)) - return crun_make_error (err, errno, "epoll_ctl add `%d`", *it); - } +#define ADD_FDS(FDS, EVENTS) \ + for (it = FDS; *it >= 0; it++) \ + { \ + ev.events = EVENTS; \ + ev.data.fd = *it; \ + ret = epoll_ctl (epollfd, EPOLL_CTL_ADD, *it, &ev); \ + if (UNLIKELY (ret < 0)) \ + return crun_make_error (err, errno, "epoll_ctl add `%d`", *it); \ + } + + if (in_fds) + ADD_FDS (in_fds, EPOLLIN); + if (in_levelfds) + ADD_FDS (in_levelfds, EPOLLIN | EPOLLET); + if (out_fds) + ADD_FDS (out_fds, EPOLLOUT); + if (out_levelfds) + ADD_FDS (out_levelfds, EPOLLOUT | EPOLLET); ret = epollfd; epollfd = -1; @@ -1281,23 +1303,31 @@ copy_from_fd_to_fd (int src, int dst, int consume, libcrun_error_t *err) int ret; ssize_t nread; size_t pagesize = get_page_size (); +#ifdef HAVE_COPY_FILE_RANGE + bool can_copy_file_range = true; +#endif do { cleanup_free char *buffer = NULL; ssize_t remaining; #ifdef HAVE_COPY_FILE_RANGE - nread = copy_file_range (src, NULL, dst, NULL, pagesize, 0); - if (nread < 0 && (errno == EINVAL || errno == EXDEV)) - goto fallback; - if (consume && nread < 0 && errno == EAGAIN) - return 0; - if (nread < 0 && errno == EIO) - return 0; - if (UNLIKELY (nread < 0)) - return crun_make_error (err, errno, "copy_file_range"); - continue; - + if (can_copy_file_range) + { + nread = copy_file_range (src, NULL, dst, NULL, pagesize, 0); + if (nread < 0 && (errno == EINVAL || errno == EXDEV)) + { + can_copy_file_range = false; + goto fallback; + } + if (consume && nread < 0 && errno == EAGAIN) + return 0; + if (nread < 0 && errno == EIO) + return 0; + if (UNLIKELY (nread < 0)) + return crun_make_error (err, errno, "copy_file_range"); + continue; + } fallback: #endif @@ -1823,7 +1853,7 @@ get_current_timestamp (char *out, size_t len) } int -set_blocking_fd (int fd, int blocking, libcrun_error_t *err) +set_blocking_fd (int fd, bool blocking, libcrun_error_t *err) { int ret, flags = fcntl (fd, F_GETFL, 0); if (UNLIKELY (flags < 0)) @@ -2640,3 +2670,95 @@ cpuset_string_to_bitmask (const char *str, char **out, size_t *out_size, libcrun invalid_input: return crun_make_error (err, 0, "cannot parse input `%s`", str); } + +struct channel_fd_pair +{ + struct ring_buffer *rb; + + int in_fd; + int out_fd; + + int infd_epoll_events; + int outfd_epoll_events; +}; + +struct channel_fd_pair * +channel_fd_pair_new (int in_fd, int out_fd, size_t size) +{ + struct channel_fd_pair *channel = xmalloc (sizeof (struct channel_fd_pair)); + channel->in_fd = in_fd; + channel->out_fd = out_fd; + channel->infd_epoll_events = -1; + channel->outfd_epoll_events = -1; + channel->rb = ring_buffer_make (size); + return channel; +} + +void +channel_fd_pair_free (struct channel_fd_pair *channel) +{ + if (channel == NULL) + return; + + ring_buffer_free (channel->rb); + free (channel); +} + +int +channel_fd_pair_process (struct channel_fd_pair *channel, int epollfd, libcrun_error_t *err) +{ + bool is_input_eagain = false, is_output_eagain = false, repeat; + int ret, i; + + /* This function is called from an epoll loop. Use a hard limit to avoid infinite loops + and prevent other events from being processed. */ + for (i = 0, repeat = true; i < 1000 && repeat; i++) + { + repeat = false; + if (ring_buffer_get_space_available (channel->rb) >= ring_buffer_get_size (channel->rb)) + { + ret = ring_buffer_read (channel->rb, channel->in_fd, &is_input_eagain, err); + if (UNLIKELY (ret < 0)) + return ret; + if (ret > 0) + repeat = true; + } + if (ring_buffer_get_data_available (channel->rb) > 0) + { + ret = ring_buffer_write (channel->rb, channel->out_fd, &is_output_eagain, err); + if (UNLIKELY (ret < 0)) + return ret; + if (ret > 0) + repeat = true; + } + } + + if (epollfd >= 0) + { + size_t available = ring_buffer_get_space_available (channel->rb); + size_t used = ring_buffer_get_data_available (channel->rb); + int events; + + /* If there is space available in the buffer, we want to read more. */ + events = (available > 0) ? (EPOLLIN | (is_input_eagain ? EPOLLET : 0)) : 0; + if (events != channel->infd_epoll_events) + { + ret = epoll_helper_toggle (epollfd, channel->in_fd, events, err); + if (UNLIKELY (ret < 0)) + return ret; + channel->infd_epoll_events = events; + } + + /* If there is data available in the buffer, we want to write as soon as + it is possible. */ + events = (used > 0) ? (EPOLLOUT | (is_output_eagain ? EPOLLET : 0)) : 0; + if (events != channel->outfd_epoll_events) + { + ret = epoll_helper_toggle (epollfd, channel->out_fd, events, err); + if (UNLIKELY (ret < 0)) + return ret; + channel->outfd_epoll_events = events; + } + } + return 0; +} diff --git a/src/libcrun/utils.h b/src/libcrun/utils.h index 584b9ea47b..f41dd5aed5 100644 --- a/src/libcrun/utils.h +++ b/src/libcrun/utils.h @@ -325,7 +325,7 @@ int receive_fd_from_socket_with_payload (int from, char *payload, size_t payload int create_signalfd (sigset_t *mask, libcrun_error_t *err); -int epoll_helper (int *fds, int *levelfds, libcrun_error_t *err); +int epoll_helper (int *in_fds, int *in_levelfds, int *out_fds, int *out_levelfds, libcrun_error_t *err); int copy_from_fd_to_fd (int src, int dst, int consume, libcrun_error_t *err); @@ -340,7 +340,7 @@ int mark_or_close_fds_ge_than (int n, bool close_now, libcrun_error_t *err); void get_current_timestamp (char *out, size_t len); -int set_blocking_fd (int fd, int blocking, libcrun_error_t *err); +int set_blocking_fd (int fd, bool blocking, libcrun_error_t *err); int parse_json_file (yajl_val *out, const char *jsondata, struct parser_context *ctx, libcrun_error_t *err); @@ -475,4 +475,37 @@ validate_options (unsigned int specified_options, unsigned int supported_options extern int cpuset_string_to_bitmask (const char *str, char **out, size_t *out_size, libcrun_error_t *err); +/* + * A channel_fd_pair takes care of copying data between two file descriptors. + * The two file descriptors are expected to be set to non-blocking mode. + * The channel_fd_pair will buffer data read from the input file descriptor and + * write it to the output file descriptor. If the output file descriptor is not + * ready to accept the data, the channel_fd_pair will buffer the data until it + * can be written. + */ +struct channel_fd_pair; + +struct channel_fd_pair *channel_fd_pair_new (int in_fd, int out_fd, size_t size); + +void channel_fd_pair_free (struct channel_fd_pair *channel); + +/* Process the data in the channel_fd_pair. This function will read data from + * the input file descriptor and write it to the output file descriptor. If + * the output file descriptor is not ready to accept the data, the data will be + * buffered. If epollfd is provided, the in_fd and out_fd will be registered + * and unregistered as necessary. + */ +int channel_fd_pair_process (struct channel_fd_pair *channel, int epollfd, libcrun_error_t *err); + +static inline void +cleanup_channel_fd_pairp (void *p) +{ + struct channel_fd_pair **pp = (struct channel_fd_pair **) p; + if (*pp == NULL) + return; + + channel_fd_pair_free (*pp); +} +#define cleanup_channel_fd_pair __attribute__ ((cleanup (cleanup_channel_fd_pairp))) + #endif diff --git a/tests/test_mounts.py b/tests/test_mounts.py index 3f88255ca3..45547226fd 100755 --- a/tests/test_mounts.py +++ b/tests/test_mounts.py @@ -591,11 +591,14 @@ def test_cgroup_mount_without_netns(): out, _ = run_and_get_output(conf) print(out) + # validate there are two mounts + count = 0 for i in out.split("\n"): if i.find("/sys/fs/cgroup") >= 0: - if i.find("tmpfs") >= 0: - print("tmpfs temporary mount still present with cgroupns=%s %s" % (cgroupns, i)) - return -1 + count = count + 1 + if count < 2: + print("fail with cgroupns=%s, got %s" % (cgroupns, out)) + return -1 return 0 all_tests = { diff --git a/tests/tests_libcrun_ring_buffer.c b/tests/tests_libcrun_ring_buffer.c new file mode 100644 index 0000000000..4507377cf6 --- /dev/null +++ b/tests/tests_libcrun_ring_buffer.c @@ -0,0 +1,282 @@ +/* + * crun - OCI runtime written in C + * + * Copyright (C) 2017, 2018, 2019, 2024 Giuseppe Scrivano + * crun is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * crun is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with crun. If not, see . + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include + +typedef int (*test) (); + +static void +fill_data (char *buffer, size_t size) +{ + size_t i; + buffer[0] = rand () % 256; + for (i = 1; i < size; i++) + buffer[i] = buffer[i - 1] + 13; +} + +static int +do_test_ring_buffer_read_write (int max_data_size, int rb_size) +{ + const int repeat = 2048; + cleanup_free char *buffer_w = xmalloc (max_data_size); + cleanup_free char *buffer_r = xmalloc (max_data_size); + libcrun_error_t err = NULL; + int fds_to_close[5] = { + -1, + }; + int fds_to_close_n = 0; + cleanup_close_vec int *autocleanup_fds = fds_to_close; + cleanup_ring_buffer struct ring_buffer *rb = NULL; + int ret = 0; + int fd_w[2]; + int fd_r[2]; + size_t i; + + if (max_data_size > rb_size) + { + fprintf (stderr, "max_data_size must be smaller than rb_size\n"); + return 1; + } + if (pipe2 (fd_w, O_NONBLOCK) < 0) + { + fprintf (stderr, "failed to create pipe\n"); + return 1; + } + if (pipe2 (fd_r, O_NONBLOCK) < 0) + { + fprintf (stderr, "failed to create pipe\n"); + return 1; + } + + /* use a bigger buffer size for the pipe to be sure synchronization + * between reads and writes is not just a side effect of the + * underlying buffer size. */ + ret = fcntl (fd_w[0], F_SETPIPE_SZ, max_data_size * 2); + if (ret < 0) + { + fprintf (stderr, "failed to set pipe size\n"); + return 1; + } + ret = fcntl (fd_r[0], F_SETPIPE_SZ, max_data_size * 2); + if (ret < 0) + { + fprintf (stderr, "failed to set pipe size\n"); + return 1; + } + + fds_to_close[fds_to_close_n++] = fd_w[0]; + fds_to_close[fds_to_close_n++] = fd_w[1]; + fds_to_close[fds_to_close_n++] = fd_r[0]; + fds_to_close[fds_to_close_n++] = fd_r[1]; + fds_to_close[fds_to_close_n++] = -1; + + rb = ring_buffer_make (rb_size); + + fill_data (buffer_w, max_data_size); + + for (i = 0; i < repeat; i++) + { + bool is_eagain = false; + size_t avail; + size_t data_size = 1 + (i % max_data_size); + + memset (buffer_r, 0, max_data_size); + + fill_data (buffer_w, data_size); + avail = ring_buffer_get_size (rb); + if (avail != rb_size) + { + fprintf (stderr, "wrong get_size\n"); + return 1; + } + + avail = ring_buffer_get_data_available (rb); + if (avail != 0) + { + fprintf (stderr, "wrong get_data_available for empty ring buffer\n"); + return 1; + } + + ret = write (fd_r[1], buffer_w, data_size); + if (ret != data_size) + { + fprintf (stderr, "write failed\n"); + return 1; + } + + ret = ring_buffer_read (rb, fd_r[0], &is_eagain, &err); + if (ret < 0) + { + libcrun_error_release (&err); + fprintf (stderr, "read from ring_buffer failed\n"); + return 1; + } + if (is_eagain) + { + fprintf (stderr, "read from ring_buffer failed with EAGAIN\n"); + return 1; + } + avail = ring_buffer_get_data_available (rb); + if (avail != ret) + { + fprintf (stderr, "wrong get_data_available got %zu instead of %zu\n", avail, ret); + return 1; + } + avail = ring_buffer_get_space_available (rb); + if (avail != rb_size - ret) + { + fprintf (stderr, "wrong get_space_available got %zu instead of %zu\n", avail, rb_size - ret); + return 1; + } + + ret = ring_buffer_write (rb, fd_w[1], &is_eagain, &err); + if (ret < 0) + { + libcrun_error_release (&err); + fprintf (stderr, "write to ring_buffer failed\n"); + return 1; + } + if (is_eagain) + { + fprintf (stderr, "write failed with EAGAIN\n"); + return 1; + } + if (ret != data_size) + { + fprintf (stderr, "write to ring_buffer wrong size\n"); + return 1; + } + avail = ring_buffer_get_data_available (rb); + if (avail != 0) + { + fprintf (stderr, "wrong get_data_available got %zu instead of 0\n", avail); + return 1; + } + avail = ring_buffer_get_space_available (rb); + if (avail != rb_size) + { + fprintf (stderr, "wrong get_space_available got %zu instead of %zu\n", avail, rb_size); + return 1; + } + + ret = read (fd_w[0], buffer_r, data_size); + if (ret != data_size) + { + fprintf (stderr, "read wrong size\n"); + return 1; + } + if (memcmp (buffer_w, buffer_r, data_size) != 0) + { + fprintf (stderr, "data mismatch\n"); + return 1; + } + + /* Try again with an empty fd and an empty ring buffer. */ + is_eagain = false; + ret = ring_buffer_read (rb, fd_r[0], &is_eagain, &err); + if (ret < 0) + { + libcrun_error_release (&err); + fprintf (stderr, "read to ring_buffer failed\n"); + return 1; + } + if (! is_eagain) + { + fprintf (stderr, "read should have returned EAGAIN\n"); + return 1; + } + + is_eagain = false; + ret = ring_buffer_write (rb, fd_w[1], &is_eagain, &err); + if (ret < 0) + { + libcrun_error_release (&err); + fprintf (stderr, "write to ring_buffer failed\n"); + return 1; + } + if (! is_eagain) + { + fprintf (stderr, "write should have returned EAGAIN\n"); + return 1; + } + } + + return 0; +} + +static int +test_ring_buffer_read_write () +{ + int max_data_sizes[] = { 1, 7, 10, 101, 1024, 4096, 4096, 7919, 8191, 8192 }; + int rb_sizes[] = { 11, 16, 128, 512, 2048, 4096, 4096, 8192, 8192, 8192 }; + int ret; + int i; + + if (sizeof (max_data_sizes) != sizeof (rb_sizes)) + { + fprintf (stderr, "internal error: max_data_sizes and rb_sizes must have the same length\n"); + return 1; + } + + for (i = 0; i < sizeof (max_data_sizes) / sizeof (max_data_sizes[0]); i++) + { + ret = do_test_ring_buffer_read_write (max_data_sizes[i], rb_sizes[i]); + if (ret < 0) + { + fprintf (stderr, "test failed with data_size=%d, rb_size=%d\n", max_data_sizes[i], rb_sizes[i]); + return ret; + } + } + return 0; +} + +static void +run_and_print_test_result (const char *name, int id, test t) +{ + int ret = t (); + if (ret == 0) + printf ("ok %d - %s\n", id, name); + else if (ret == 77) + printf ("ok %d - %s #SKIP\n", id, name); + else + printf ("not ok %d - %s\n", id, name); +} + +#define RUN_TEST(T) \ + do \ + { \ + run_and_print_test_result (#T, id++, T); \ + } while (0) + +int +main () +{ + int id = 1; + printf ("1..1\n"); + + RUN_TEST (test_ring_buffer_read_write); + return 0; +}