Skip to content

Commit

Permalink
Allow preads from non-owner processes
Browse files Browse the repository at this point in the history
At the C API level pread [1] can be called from multiple threads. In Erlang,
however, all the calls must be made from a single owner process.

In a busy concurrent environment that means all operations for a file
descriptor must be serialized, and could pile up in the controller's mailbox
having to wait for potentially slower operations to complete.

To fix it, add the ability to make raw concurrent pread calls from Erlang as
well. File descriptor lifetime is still tied to the owner process, so it gets
closed when owner dies. Other processes are only allowed to make pread calls,
all others still require going through the owner.

Other file layers, like compression and delayed IO, still keep the previous
behavior. They have their own `get_fd_data/1` functions per layer which check
controller ownership. Concurrent preads are not allowed in those layers.

In unix_prim_file.c the seek+read fallback would have required exposing a flag
in Erlang in order to keep the old behavior since another process could see the
temparily changed position. However, before adding a new flag looked where
pread might not be supported, and it seems most Unix-like OSes since Sun0S
5(Solaris 2.5) and AT&T Sys V Rel4 (so all modern BSD) seem to have it. So
perhaps, it's safe to remove the fallback altogether and simplify the code? As
a precaution kept a configure check with an early failure and a clear message
about it.

This necessitates updating preloaded beam files so an OTP team member would
have to take over the PR, if the idea is acceptable to start with. I didn't
commit the beam files so any CI tests might not run either?

Issue: erlang#9239

[1] https://www.man7.org/linux/man-pages/man2/pread.2.html
  • Loading branch information
nickva committed Dec 26, 2024
1 parent f4caf91 commit a74c067
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 59 deletions.
6 changes: 0 additions & 6 deletions erts/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -916,9 +916,6 @@
/* Define to 1 if you have the 'ppoll' function. */
#undef HAVE_PPOLL

/* Define to 1 if you have the 'pread' function. */
#undef HAVE_PREAD

/* Define if you have processor_bind functionality */
#undef HAVE_PROCESSOR_BIND

Expand All @@ -934,9 +931,6 @@
/* Define if you have putc_unlocked */
#undef HAVE_PUTC_UNLOCKED

/* Define to 1 if you have the 'pwrite' function. */
#undef HAVE_PWRITE

/* Define to 1 if you have the 'res_gethostbyname' function. */
#undef HAVE_RES_GETHOSTBYNAME

Expand Down
12 changes: 6 additions & 6 deletions erts/configure
Original file line number Diff line number Diff line change
Expand Up @@ -20756,16 +20756,16 @@ then :

fi
ac_fn_c_check_func "$LINENO" "pread" "ac_cv_func_pread"
if test "x$ac_cv_func_pread" = xyes
if test "x$ac_cv_func_pread" != xyes
then :
printf "%s\n" "#define HAVE_PREAD 1" >>confdefs.h

printf "%s\n" "$0: pread function not available"
exit 1
fi
ac_fn_c_check_func "$LINENO" "pwrite" "ac_cv_func_pwrite"
if test "x$ac_cv_func_pwrite" = xyes
if test "x$ac_cv_func_pwrite" != xyes
then :
printf "%s\n" "#define HAVE_PWRITE 1" >>confdefs.h

printf "%s\n" "$0: pwrite function not available"
exit 1
fi
ac_fn_c_check_func "$LINENO" "memmove" "ac_cv_func_memmove"
if test "x$ac_cv_func_memmove" = xyes
Expand Down
47 changes: 2 additions & 45 deletions erts/emulator/nifs/unix/unix_prim_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,18 +393,6 @@ Sint64 efile_preadv(efile_data_t *d, Sint64 offset, SysIOVec *iov, int iovlen) {
Uint64 bytes_read;
Sint64 result;

#if !defined(HAVE_PREADV) && !defined(HAVE_PREAD)
/* This function is documented as leaving the file position undefined, but
* the old driver always reset it so there's probably code in the wild that
* relies on this behavior. */
off_t original_position = lseek(u->fd, 0, SEEK_CUR);

if(original_position < 0 || lseek(u->fd, offset, SEEK_SET) < 0) {
u->common.posix_errno = errno;
return -1;
}
#endif

bytes_read = 0;

do {
Expand All @@ -415,10 +403,8 @@ Sint64 efile_preadv(efile_data_t *d, Sint64 offset, SysIOVec *iov, int iovlen) {

#if defined(HAVE_PREADV)
result = preadv(u->fd, iov, MIN(IOV_MAX, iovlen), offset);
#elif defined(HAVE_PREAD)
result = pread(u->fd, iov->iov_base, iov->iov_len, offset);
#else
result = read(u->fd, iov->iov_base, iov->iov_len);
result = pread(u->fd, iov->iov_base, iov->iov_len, offset);
#endif

if(result > 0) {
Expand All @@ -430,15 +416,6 @@ Sint64 efile_preadv(efile_data_t *d, Sint64 offset, SysIOVec *iov, int iovlen) {

u->common.posix_errno = errno;

#if !defined(HAVE_PREADV) && !defined(HAVE_PREAD)
if(result >= 0) {
if(lseek(u->fd, original_position, SEEK_SET) < 0) {
u->common.posix_errno = errno;
return -1;
}
}
#endif

if(result == 0 && bytes_read > 0) {
return bytes_read;
}
Expand All @@ -452,15 +429,6 @@ Sint64 efile_pwritev(efile_data_t *d, Sint64 offset, SysIOVec *iov, int iovlen)
Sint64 bytes_written;
ssize_t result;

#if !defined(HAVE_PWRITEV) && !defined(HAVE_PWRITE)
off_t original_position = lseek(u->fd, 0, SEEK_CUR);

if(original_position < 0 || lseek(u->fd, offset, SEEK_SET) < 0) {
u->common.posix_errno = errno;
return -1;
}
#endif

bytes_written = 0;

do {
Expand All @@ -471,10 +439,8 @@ Sint64 efile_pwritev(efile_data_t *d, Sint64 offset, SysIOVec *iov, int iovlen)

#if defined(HAVE_PWRITEV)
result = pwritev(u->fd, iov, MIN(IOV_MAX, iovlen), offset);
#elif defined(HAVE_PWRITE)
result = pwrite(u->fd, iov->iov_base, iov->iov_len, offset);
#else
result = write(u->fd, iov->iov_base, iov->iov_len);
result = pwrite(u->fd, iov->iov_base, iov->iov_len, offset);
#endif

if(result > 0) {
Expand All @@ -486,15 +452,6 @@ Sint64 efile_pwritev(efile_data_t *d, Sint64 offset, SysIOVec *iov, int iovlen)

u->common.posix_errno = errno;

#if !defined(HAVE_PWRITEV) && !defined(HAVE_PWRITE)
if(result >= 0) {
if(lseek(u->fd, original_position, SEEK_SET) < 0) {
u->common.posix_errno = errno;
return -1;
}
}
#endif

if(result == 0 && bytes_written > 0) {
return bytes_written;
}
Expand Down
4 changes: 2 additions & 2 deletions erts/preloaded/src/prim_file.erl
Original file line number Diff line number Diff line change
Expand Up @@ -325,15 +325,15 @@ position_1(Fd, Mark, Offset) ->

pread(Fd, Offset, Size) ->
try
#{ handle := FRef } = get_fd_data(Fd),
#{ handle := FRef } = Fd#file_descriptor.data,
pread_nif(FRef, Offset, Size)
catch
error:badarg -> {error, badarg}
end.

pread(Fd, LocNums) ->
try
#{ handle := FRef } = get_fd_data(Fd),
#{ handle := FRef } = Fd#file_descriptor.data,
pread_list(FRef, LocNums, [])
catch
error:badarg -> {error, badarg}
Expand Down

0 comments on commit a74c067

Please sign in to comment.