Skip to content

Commit 0804417

Browse files
committed
std: optimize dlsym! macro and add a test for it
The `dlsym!` macro always ensures that the name string is nul-terminated, so there is no need to perform the check at runtime. Also, acquire loads are generally faster than a load and a barrier, so use them. This is only false in the case where the symbol is missing, but that shouldn't matter too much.
1 parent fe55364 commit 0804417

File tree

2 files changed

+66
-45
lines changed

2 files changed

+66
-45
lines changed

library/std/src/sys/pal/unix/weak.rs

Lines changed: 46 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,14 @@
2222
#![allow(dead_code, unused_macros)]
2323
#![forbid(unsafe_op_in_unsafe_fn)]
2424

25-
use crate::ffi::CStr;
25+
use crate::ffi::{CStr, c_char, c_void};
2626
use crate::marker::PhantomData;
27-
use crate::sync::atomic::{self, Atomic, AtomicPtr, Ordering};
27+
use crate::sync::atomic::{Atomic, AtomicPtr, Ordering};
2828
use crate::{mem, ptr};
2929

30+
#[cfg(test)]
31+
mod tests;
32+
3033
// We can use true weak linkage on ELF targets.
3134
#[cfg(all(unix, not(target_vendor = "apple")))]
3235
pub(crate) macro weak {
@@ -64,7 +67,7 @@ impl<F: Copy> ExternWeak<F> {
6467

6568
pub(crate) macro dlsym {
6669
(fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => (
67-
dlsym!(
70+
dlsym!(
6871
#[link_name = stringify!($name)]
6972
fn $name($($param : $t),*) -> $ret;
7073
);
@@ -78,62 +81,65 @@ pub(crate) macro dlsym {
7881
let $name = &DLSYM;
7982
)
8083
}
84+
8185
pub(crate) struct DlsymWeak<F> {
82-
name: &'static str,
86+
/// A pointer to the nul-terminated name of the symbol.
87+
// Use a pointer instead of `&'static CStr` to save space.
88+
name: *const c_char,
8389
func: Atomic<*mut libc::c_void>,
8490
_marker: PhantomData<F>,
8591
}
8692

8793
impl<F> DlsymWeak<F> {
8894
pub(crate) const fn new(name: &'static str) -> Self {
95+
let Ok(name) = CStr::from_bytes_with_nul(name.as_bytes()) else {
96+
panic!("not a nul-terminated string")
97+
};
98+
8999
DlsymWeak {
90-
name,
100+
name: name.as_ptr(),
91101
func: AtomicPtr::new(ptr::without_provenance_mut(1)),
92102
_marker: PhantomData,
93103
}
94104
}
95105

96106
#[inline]
97107
pub(crate) fn get(&self) -> Option<F> {
98-
unsafe {
99-
// Relaxed is fine here because we fence before reading through the
100-
// pointer (see the comment below).
101-
match self.func.load(Ordering::Relaxed) {
102-
func if func.addr() == 1 => self.initialize(),
103-
func if func.is_null() => None,
104-
func => {
105-
let func = mem::transmute_copy::<*mut libc::c_void, F>(&func);
106-
// The caller is presumably going to read through this value
107-
// (by calling the function we've dlsymed). This means we'd
108-
// need to have loaded it with at least C11's consume
109-
// ordering in order to be guaranteed that the data we read
110-
// from the pointer isn't from before the pointer was
111-
// stored. Rust has no equivalent to memory_order_consume,
112-
// so we use an acquire fence (sorry, ARM).
113-
//
114-
// Now, in practice this likely isn't needed even on CPUs
115-
// where relaxed and consume mean different things. The
116-
// symbols we're loading are probably present (or not) at
117-
// init, and even if they aren't the runtime dynamic loader
118-
// is extremely likely have sufficient barriers internally
119-
// (possibly implicitly, for example the ones provided by
120-
// invoking `mprotect`).
121-
//
122-
// That said, none of that's *guaranteed*, and so we fence.
123-
atomic::fence(Ordering::Acquire);
124-
Some(func)
125-
}
126-
}
108+
// The caller is presumably going to read through this value
109+
// (by calling the function we've dlsymed). This means we'd
110+
// need to have loaded it with at least C11's consume
111+
// ordering in order to be guaranteed that the data we read
112+
// from the pointer isn't from before the pointer was
113+
// stored. Rust has no equivalent to memory_order_consume,
114+
// so we use an acquire load (sorry, ARM).
115+
//
116+
// Now, in practice this likely isn't needed even on CPUs
117+
// where relaxed and consume mean different things. The
118+
// symbols we're loading are probably present (or not) at
119+
// init, and even if they aren't the runtime dynamic loader
120+
// is extremely likely have sufficient barriers internally
121+
// (possibly implicitly, for example the ones provided by
122+
// invoking `mprotect`).
123+
//
124+
// That said, none of that's *guaranteed*, so we use acquire.
125+
match self.func.load(Ordering::Acquire) {
126+
func if func.addr() == 1 => self.initialize(),
127+
func if func.is_null() => None,
128+
func => Some(unsafe { mem::transmute_copy::<*mut c_void, F>(&func) }),
127129
}
128130
}
129131

130132
// Cold because it should only happen during first-time initialization.
131133
#[cold]
132-
unsafe fn initialize(&self) -> Option<F> {
133-
assert_eq!(size_of::<F>(), size_of::<*mut libc::c_void>());
134+
fn initialize(&self) -> Option<F> {
135+
const {
136+
if size_of::<F>() != size_of::<*mut libc::c_void>() {
137+
panic!("not a function pointer")
138+
}
139+
}
134140

135-
let val = unsafe { fetch(self.name) };
136-
// This synchronizes with the acquire fence in `get`.
141+
let val = unsafe { libc::dlsym(libc::RTLD_DEFAULT, self.name) };
142+
// This synchronizes with the acquire load in `get`.
137143
self.func.store(val, Ordering::Release);
138144

139145
if val.is_null() {
@@ -144,13 +150,8 @@ impl<F> DlsymWeak<F> {
144150
}
145151
}
146152

147-
unsafe fn fetch(name: &str) -> *mut libc::c_void {
148-
let name = match CStr::from_bytes_with_nul(name.as_bytes()) {
149-
Ok(cstr) => cstr,
150-
Err(..) => return ptr::null_mut(),
151-
};
152-
unsafe { libc::dlsym(libc::RTLD_DEFAULT, name.as_ptr()) }
153-
}
153+
unsafe impl<F> Send for DlsymWeak<F> {}
154+
unsafe impl<F> Sync for DlsymWeak<F> {}
154155

155156
#[cfg(not(any(target_os = "linux", target_os = "android")))]
156157
pub(crate) macro syscall {
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
use super::*;
2+
use crate::ffi::c_int;
3+
4+
#[test]
5+
fn dlsym() {
6+
// Try to find a symbol that definitely exists.
7+
dlsym! {
8+
fn abs(i: c_int) -> c_int;
9+
}
10+
11+
let abs = abs.get().unwrap();
12+
assert_eq!(unsafe { abs(-1) }, 1);
13+
14+
// Try to find a symbol that definitely does not exist.
15+
dlsym! {
16+
fn test_symbol_that_does_not_exist() -> c_int;
17+
}
18+
19+
assert!(test_symbol_that_does_not_exist.get().is_none());
20+
}

0 commit comments

Comments
 (0)