diff --git a/Cargo.lock b/Cargo.lock index 395c37e9ad..5bd49a867a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,6 +89,26 @@ dependencies = [ "serde", ] +[[package]] +name = "bindgen" +version = "0.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] + [[package]] name = "bitflags" version = "2.10.0" @@ -176,6 +196,15 @@ dependencies = [ "shlex", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -217,6 +246,17 @@ dependencies = [ "inout", ] +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading 0.8.9", +] + [[package]] name = "clap" version = "4.5.51" @@ -446,6 +486,12 @@ dependencies = [ "syn", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "encode_unicode" version = "1.0.0" @@ -576,6 +622,12 @@ dependencies = [ "url", ] +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + [[package]] name = "hashbrown" version = "0.16.0" @@ -740,6 +792,15 @@ dependencies = [ "windows", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -816,6 +877,16 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link 0.2.1", +] + [[package]] name = "libloading" version = "0.9.0" @@ -826,6 +897,17 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "libproc" +version = "0.14.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78a09b56be5adbcad5aa1197371688dc6bb249a26da3bca2011ee2fb987ebfb" +dependencies = [ + "bindgen", + "errno", + "libc", +] + [[package]] name = "libredox" version = "0.1.10" @@ -884,6 +966,15 @@ version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +[[package]] +name = "mach2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d640282b302c0bb0a2a8e0233ead9035e3bed871f0b7e81fe4a1ec829765db44" +dependencies = [ + "libc", +] + [[package]] name = "measureme" version = "12.0.3" @@ -913,6 +1004,12 @@ dependencies = [ "libc", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -949,9 +1046,10 @@ dependencies = [ "ipc-channel", "libc", "libffi", - "libloading", + "libloading 0.9.0", "measureme", "nix", + "proc-maps", "rand", "regex", "rustc_version", @@ -974,6 +1072,16 @@ dependencies = [ "libc", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1135,6 +1243,16 @@ dependencies = [ "owo-colors", ] +[[package]] +name = "prettyplease" +version = "0.2.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.103" @@ -1144,6 +1262,20 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-maps" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db44c5aa60e193a25fcd93bb9ed27423827e8f118897866f946e2cf936c44fb" +dependencies = [ + "anyhow", + "bindgen", + "libc", + "libproc", + "mach2", + "winapi", +] + [[package]] name = "quote" version = "1.0.42" @@ -1715,6 +1847,22 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.11" @@ -1724,6 +1872,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows" version = "0.61.3" diff --git a/Cargo.toml b/Cargo.toml index 4a54a7e0eb..593d1b991d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,7 @@ serde = { version = "1.0.219", features = ["derive"], optional = true } nix = { version = "0.30.1", features = ["mman", "ptrace", "signal"], optional = true } ipc-channel = { version = "0.20.0", optional = true } capstone = { version = "0.13", optional = true } +proc-maps = { version = "0.4.0", optional = true } [target.'cfg(all(target_os = "linux", target_pointer_width = "64", target_endian = "little"))'.dependencies] genmc-sys = { path = "./genmc-sys/", version = "0.1.0", optional = true } @@ -66,8 +67,8 @@ genmc = ["dep:genmc-sys"] stack-cache = [] expensive-consistency-checks = ["stack-cache"] tracing = ["serde_json"] -native-lib = ["dep:libffi", "dep:libloading", "dep:capstone", "dep:ipc-channel", "dep:nix", "dep:serde"] jemalloc = [] +native-lib = ["dep:libffi", "dep:libloading", "dep:capstone", "dep:ipc-channel", "dep:nix", "dep:serde", "dep:proc-maps"] [lints.rust.unexpected_cfgs] level = "warn" diff --git a/src/shims/native_lib/mod.rs b/src/shims/native_lib/mod.rs index 12abe841c0..e6ace1bee7 100644 --- a/src/shims/native_lib/mod.rs +++ b/src/shims/native_lib/mod.rs @@ -27,6 +27,7 @@ mod ffi; pub mod trace; use self::ffi::OwnedArg; +use self::trace::EvalContextExt as _; use crate::*; /// The final results of an FFI trace, containing every relevant event detected @@ -85,13 +86,9 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { libffi_args: &mut [OwnedArg], ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option)> { let this = self.eval_context_mut(); - #[cfg(target_os = "linux")] - let alloc = this.machine.allocator.as_ref().unwrap(); - #[cfg(not(target_os = "linux"))] - // Placeholder value. - let alloc = (); - trace::Supervisor::do_ffi(alloc, || { + let ty_is_sized = dest.layout.ty.is_sized(*this.tcx, this.typing_env()); + this.do_ffi(|| { // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value // as the specified primitive integer type let scalar = match dest.layout.ty.kind() { @@ -117,7 +114,12 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { } ty::Int(IntTy::Isize) => { let x = unsafe { ffi::call::(fun, libffi_args) }; - Scalar::from_target_isize(x.try_into().unwrap(), this) + // We already know native-lib mode means target == host, so + // this is ok. + Scalar::from_int( + i128::try_from(x).unwrap(), + Size::from_bytes(size_of::()), + ) } // uints ty::Uint(UintTy::U8) => { @@ -138,7 +140,10 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { } ty::Uint(UintTy::Usize) => { let x = unsafe { ffi::call::(fun, libffi_args) }; - Scalar::from_target_usize(x.try_into().unwrap(), this) + Scalar::from_uint( + u128::try_from(x).unwrap(), + Size::from_bytes(size_of::()), + ) } ty::Float(FloatTy::F32) => { let x = unsafe { ffi::call::(fun, libffi_args) }; @@ -154,10 +159,10 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { unsafe { ffi::call::<()>(fun, libffi_args) }; return interp_ok(ImmTy::uninit(dest.layout)); } - ty::RawPtr(ty, ..) if ty.is_sized(*this.tcx, this.typing_env()) => { + ty::RawPtr(ty, ..) if ty_is_sized => { let x = unsafe { ffi::call::<*const ()>(fun, libffi_args) }; let ptr = StrictPointer::new(Provenance::Wildcard, Size::from_bytes(x.addr())); - Scalar::from_pointer(ptr, this) + Scalar::Ptr(ptr, u8::try_from(size_of::<*const ()>()).unwrap()) } _ => return Err(err_unsup_format!( @@ -225,7 +230,6 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { /// assumed to be exact. fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> { let this = self.eval_context_mut(); - for evt in events.acc_events { let evt_rg = evt.get_range(); // LLVM at least permits vectorising accesses to adjacent allocations, @@ -235,7 +239,11 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { let Some(alloc_id) = this.alloc_id_from_addr(curr.to_u64(), rg.len().try_into().unwrap()) else { - throw_ub_format!("Foreign code did an out-of-bounds access!") + throw_ub_format!( + "Foreign code did an out-of-bounds access at {:#0x} for {:#0x} bytes!", + curr, + rg.len(), + ); }; let alloc = this.get_alloc_raw(alloc_id)?; // The logical and physical address of the allocation coincide, so we can use @@ -526,7 +534,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { this.call_native_with_args(link_name, dest, code_ptr, &mut libffi_args)?; if tracing { - this.tracing_apply_accesses(maybe_memevents.unwrap())?; + let mm = maybe_memevents.unwrap(); + this.tracing_apply_accesses(mm)?; } this.write_immediate(*ret, dest)?; diff --git a/src/shims/native_lib/trace/child.rs b/src/shims/native_lib/trace/child.rs index 795ad4a320..e3f7342656 100644 --- a/src/shims/native_lib/trace/child.rs +++ b/src/shims/native_lib/trace/child.rs @@ -1,6 +1,5 @@ -use std::cell::RefCell; use std::ptr::NonNull; -use std::rc::Rc; +use std::sync::atomic::Ordering; use ipc_channel::ipc; use nix::sys::{mman, ptrace, signal}; @@ -10,8 +9,9 @@ use rustc_const_eval::interpret::InterpResult; use super::CALLBACK_STACK_SIZE; use super::messages::{Confirmation, StartFfiInfo, TraceRequest}; use super::parent::{ChildListener, sv_loop}; -use crate::alloc::isolated_alloc::IsolatedAlloc; use crate::shims::native_lib::MemEvents; +use crate::shims::native_lib::trace::parent::{PAGE_ADDR, PAGE_COUNT, PAGE_SIZE}; +use crate::*; /// A handle to the single, shared supervisor process across all `MiriMachine`s. /// Since it would be very difficult to trace multiple FFI calls in parallel, we @@ -32,7 +32,7 @@ pub struct Supervisor { /// parent process has handled the request from `message_tx`. confirm_rx: ipc::IpcReceiver, /// Receiver for memory acceses that ocurred during the FFI call. - event_rx: ipc::IpcReceiver, + event_rx: Option>, } /// Marker representing that an error occurred during creation of the supervisor. @@ -45,7 +45,7 @@ impl Supervisor { SUPERVISOR.lock().unwrap().is_some() } - unsafe fn protect_pages( + pub unsafe fn protect_pages( pages: impl Iterator, usize)>, prot: mman::ProtFlags, ) -> Result<(), nix::errno::Errno> { @@ -54,14 +54,19 @@ impl Supervisor { } Ok(()) } +} +impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} +pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { /// Performs an arbitrary FFI call, enabling tracing from the supervisor. /// As this locks the supervisor via a mutex, no other threads may enter FFI /// until this function returns. - pub fn do_ffi<'tcx>( - alloc: &Rc>, + fn do_ffi( + &mut self, f: impl FnOnce() -> InterpResult<'tcx, crate::ImmTy<'tcx>>, ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option)> { + let this = self.eval_context_mut(); + let machine_ptr = &raw mut *this; let mut sv_guard = SUPERVISOR.lock().unwrap(); // If the supervisor is not initialised for whatever reason, fast-return. // As a side-effect, even on platforms where ptracing @@ -69,9 +74,17 @@ impl Supervisor { // happens at a time. let Some(sv) = sv_guard.as_mut() else { return f().map(|v| (v, None)) }; + // Save the machine pointer to a location where the libc interceptors can use it, + // since we can't pass in arguments. + super::parent::MACHINE_PTR.store(machine_ptr.cast(), std::sync::atomic::Ordering::Relaxed); + // Give the libc interceptors the event channel. + let mut e_rx = super::parent::EVT_RX.lock().unwrap(); + e_rx.replace(sv.event_rx.take().unwrap()); + drop(e_rx); + // Get pointers to all the pages the supervisor must allow accesses in // and prepare the callback stack. - let alloc = alloc.borrow(); + let alloc = this.machine.allocator.as_ref().unwrap().borrow(); let page_size = alloc.page_size(); let page_ptrs = alloc .pages() @@ -87,6 +100,9 @@ impl Supervisor { let stack_ptr = raw_stack_ptr.expose_provenance(); let start_info = StartFfiInfo { page_ptrs, stack_ptr }; + let pages: Vec<_> = alloc.pages().collect(); + // If native code allocates, we'll need to get access to the machine's allocator. + drop(alloc); // Unwinding might be messed up due to partly protected memory, so let's abort if something // breaks inside here. let res = std::panic::abort_unwind(|| { @@ -106,14 +122,19 @@ impl Supervisor { // working as normal, just with extra tracing. So even if the compiler moves memory // accesses down to after the `mprotect`, they won't actually segfault. unsafe { - Self::protect_pages(alloc.pages(), mman::ProtFlags::PROT_NONE).unwrap(); + Supervisor::protect_pages(pages.into_iter(), mman::ProtFlags::PROT_NONE).unwrap(); } let res = f(); + // The original `this` was used during the FFI call, so + // acquire a new mutable reference from the used pointer. + let this = unsafe { &mut *machine_ptr }; + + let alloc = this.machine.allocator.as_ref().unwrap().borrow(); // SAFETY: We set memory back to normal, so this is safe. unsafe { - Self::protect_pages( + Supervisor::protect_pages( alloc.pages(), mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE, ) @@ -129,14 +150,21 @@ impl Supervisor { res }); - // SAFETY: Caller upholds that this pointer was allocated as a box with - // this type. + // SAFETY: We know that this pointer was allocated as a box with this type. unsafe { drop(Box::from_raw(raw_stack_ptr)); } + + // Now get the event channel back. + let mut e_rx = super::parent::EVT_RX.lock().unwrap(); + sv.event_rx = Some(e_rx.take().unwrap()); + drop(e_rx); + // On the off-chance something really weird happens, don't block forever. let events = sv .event_rx + .as_ref() + .unwrap() .try_recv_timeout(std::time::Duration::from_secs(5)) .map_err(|e| { match e { @@ -245,6 +273,7 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> { // First make sure the parent succeeded with ptracing us! signal::raise(signal::SIGSTOP).unwrap(); // If we're the child process, save the supervisor info. + let event_rx = Some(event_rx); *lock = Some(Supervisor { message_tx, confirm_rx, event_rx }); } } @@ -261,3 +290,52 @@ pub fn register_retcode_sv(code: i32) { sv.confirm_rx.recv().unwrap(); } } + +// These are functions that should not be called directly, and can only be reached +// by offseting the instruction pointer into them. However, they are here because +// they execute in the child process. + +/// Disables protections on the page whose address is currently in `PAGE_ADDR`. +/// +/// SAFETY: `PAGE_ADDR` should be set to a page-aligned pointer to an owned page, +/// `PAGE_SIZE` should be the host pagesize, and the range from `PAGE_ADDR` to +/// `PAGE_SIZE` * `PAGE_COUNT` must be owned and allocated memory. No other threads +/// should be running. +pub unsafe extern "C" fn mempr_off() { + // Again, cannot allow unwinds to happen here. + let len = PAGE_SIZE.load(Ordering::SeqCst).saturating_mul(PAGE_COUNT.load(Ordering::SeqCst)); + // SAFETY: Upheld by "caller". + unsafe { + // It's up to the caller to make sure this doesn't actually overflow, but + // we mustn't unwind from here, so... + if libc::mprotect( + PAGE_ADDR.load(Ordering::SeqCst).cast(), + len, + libc::PROT_READ | libc::PROT_WRITE, + ) != 0 + { + // Can't return or unwind, but we can do this. + std::process::exit(-1); + } + } + // If this fails somehow we're doomed. + if signal::raise(signal::SIGSTOP).is_err() { + std::process::exit(-1); + } +} + +/// Reenables protection on the page set by `PAGE_ADDR`. +/// +/// SAFETY: See `mempr_off()`. +pub unsafe extern "C" fn mempr_on() { + let len = PAGE_SIZE.load(Ordering::SeqCst).wrapping_mul(PAGE_COUNT.load(Ordering::SeqCst)); + // SAFETY: Upheld by "caller". + unsafe { + if libc::mprotect(PAGE_ADDR.load(Ordering::SeqCst).cast(), len, libc::PROT_NONE) != 0 { + std::process::exit(-1); + } + } + if signal::raise(signal::SIGSTOP).is_err() { + std::process::exit(-1); + } +} diff --git a/src/shims/native_lib/trace/mod.rs b/src/shims/native_lib/trace/mod.rs index c8abacfb5e..77185edc08 100644 --- a/src/shims/native_lib/trace/mod.rs +++ b/src/shims/native_lib/trace/mod.rs @@ -2,7 +2,7 @@ mod child; pub mod messages; mod parent; -pub use self::child::{Supervisor, init_sv, register_retcode_sv}; +pub use self::child::{EvalContextExt, Supervisor, init_sv, register_retcode_sv}; /// The size of the temporary stack we use for callbacks that the server executes in the client. /// This should be big enough that `mempr_on` and `mempr_off` can safely be jumped into with the diff --git a/src/shims/native_lib/trace/parent.rs b/src/shims/native_lib/trace/parent.rs index 5476cccc02..b367b222cf 100644 --- a/src/shims/native_lib/trace/parent.rs +++ b/src/shims/native_lib/trace/parent.rs @@ -1,4 +1,5 @@ -use std::sync::atomic::{AtomicPtr, AtomicUsize}; +use std::sync; +use std::sync::atomic::{AtomicBool, AtomicPtr, AtomicUsize, Ordering}; use ipc_channel::ipc; use nix::sys::{ptrace, signal, wait}; @@ -23,14 +24,42 @@ const ARCH_WORD_SIZE: usize = 8; // See vol. 3B section 24.25. const ARCH_MAX_INSTR_SIZE: usize = 15; -/// The address of the page set to be edited, initialised to a sentinel null -/// pointer. -static PAGE_ADDR: AtomicPtr = AtomicPtr::new(std::ptr::null_mut()); +/// Opcode for an instruction to raise SIGTRAP, to be written in the child process. +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +const BREAKPT_INSTR: i16 = 0xCC; + +/// The size of the breakpoint-triggering instruction, in bytes. +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +const BREAKPT_INSTR_SIZE: usize = 1; + /// The host pagesize, initialised to a sentinel zero value. pub static PAGE_SIZE: AtomicUsize = AtomicUsize::new(0); +/// The address of the page set to be edited, initialised to a sentinel null +/// pointer. +pub(super) static PAGE_ADDR: AtomicPtr<()> = AtomicPtr::new(std::ptr::null_mut()); /// How many consecutive pages to unprotect. 1 by default, unlikely to be set /// higher than 2. -static PAGE_COUNT: AtomicUsize = AtomicUsize::new(1); +pub(super) static PAGE_COUNT: AtomicUsize = AtomicUsize::new(1); +/// A pointer to the `MiriInterpCx` for use within the libc shims. +pub(super) static MACHINE_PTR: AtomicPtr<()> = AtomicPtr::new(std::ptr::null_mut()); +/// Is the return address within the libc-mapped area(s)? +pub(super) static RET_IS_LIBC: AtomicBool = AtomicBool::new(false); + +/// Information about which pages were allocated/deallocated after a single +/// libc intercepted event. After use, these are reset to 0. +/// +/// INVARIANT: A single libc event can only allocate/deallocate one contiguous +/// block of pages (as would be the case in a large `realloc`). +pub(super) static NEW_PAGES_ADDR: AtomicPtr<()> = AtomicPtr::new(std::ptr::null_mut()); +pub(super) static NEW_PAGES_COUNT: AtomicUsize = AtomicUsize::new(0); +pub(super) static DEL_PAGES_ADDR: AtomicPtr<()> = AtomicPtr::new(std::ptr::null_mut()); +pub(super) static DEL_PAGES_COUNT: AtomicUsize = AtomicUsize::new(0); + +/// The `event_rx` channel from the supervisor struct. The libc interceptors must +/// know which accesses happened before they were triggered, so e.g. an access in +/// an allocation that was later freed before the FFI call returned doesn't mistakenly +/// get marked as incorrect. +pub(super) static EVT_RX: sync::Mutex>> = sync::Mutex::new(None); /// Allows us to get common arguments from the `user_regs_t` across architectures. /// Normally this would land us ABI hell, but thankfully all of our usecases @@ -39,6 +68,8 @@ static PAGE_COUNT: AtomicUsize = AtomicUsize::new(1); trait ArchIndependentRegs { /// Gets the address of the instruction pointer. fn ip(&self) -> usize; + /// Gets the address of the stack pointer. + fn sp(&self) -> usize; /// Set the instruction pointer; remember to also set the stack pointer, or /// else the stack might get messed up! fn set_ip(&mut self, ip: usize); @@ -54,6 +85,8 @@ impl ArchIndependentRegs for libc::user_regs_struct { #[inline] fn ip(&self) -> usize { self.rip.try_into().unwrap() } #[inline] + fn sp(&self) -> usize { self.rsp.try_into().unwrap() } + #[inline] fn set_ip(&mut self, ip: usize) { self.rip = ip.try_into().unwrap() } #[inline] fn set_sp(&mut self, sp: usize) { self.rsp = sp.try_into().unwrap() } @@ -65,6 +98,8 @@ impl ArchIndependentRegs for libc::user_regs_struct { #[inline] fn ip(&self) -> usize { self.eip.cast_unsigned().try_into().unwrap() } #[inline] + fn sp(&self) -> usize { self.esp.cast_unsigned().try_into().unwrap() } + #[inline] fn set_ip(&mut self, ip: usize) { self.eip = ip.cast_signed().try_into().unwrap() } #[inline] fn set_sp(&mut self, sp: usize) { self.esp = sp.cast_signed().try_into().unwrap() } @@ -83,7 +118,7 @@ pub enum ExecEvent { Status(unistd::Pid, signal::Signal), /// The child process with the specified pid entered or existed a syscall. Syscall(unistd::Pid), - /// A child process exited or was killed; if we have a return code, it is + /// The child exited or was killed; if we have a return code, it is /// specified. Died(Option), } @@ -204,7 +239,7 @@ pub fn sv_loop( confirm_tx: ipc::IpcSender, ) -> Result { // Get the pagesize set and make sure it isn't still on the zero sentinel value! - let page_size = PAGE_SIZE.load(std::sync::atomic::Ordering::Relaxed); + let page_size = PAGE_SIZE.load(Ordering::Relaxed); assert_ne!(page_size, 0); // Things that we return to the child process. @@ -222,7 +257,7 @@ pub fn sv_loop( let mut curr_pid = init_pid; // There's an initial sigstop we need to deal with. - wait_for_signal(Some(curr_pid), signal::SIGSTOP, InitialCont::No)?; + wait_for_signal(Some(curr_pid), signal::SIGSTOP, InitialCont::No, None)?; ptrace::cont(curr_pid, None).unwrap(); for evt in listener { @@ -241,12 +276,16 @@ pub fn sv_loop( confirm_tx.send(Confirmation).unwrap(); // We can't trust simply calling `Pid::this()` in the child process to give the right // PID for us, so we get it this way. - curr_pid = wait_for_signal(None, signal::SIGSTOP, InitialCont::No).unwrap(); + curr_pid = wait_for_signal(None, signal::SIGSTOP, InitialCont::No, None).unwrap(); + // Intercept libc events we care about. + trap_libc(curr_pid); // Continue until next syscall. ptrace::syscall(curr_pid, None).unwrap(); } // Child wants to end tracing. ExecEvent::End => { + // Stop intercepting libc events. + fixup_libc(curr_pid); // Hand over the access info we traced. event_tx.send(MemEvents { acc_events }).unwrap(); // And reset our values. @@ -262,13 +301,15 @@ pub fn sv_loop( // If it was a segfault, check if it was an artificial one // caused by it trying to access the MiriMachine memory. signal::SIGSEGV => - handle_segfault( + handle_segfault(pid, &ch_pages, ch_stack.unwrap(), &cs, &mut acc_events)?, + signal::SIGTRAP => + handle_sigtrap( pid, - &ch_pages, + &mut ch_pages, + &event_tx, + &mut acc_events, ch_stack.unwrap(), - page_size, &cs, - &mut acc_events, )?, // Something weird happened. _ => { @@ -285,15 +326,63 @@ pub fn sv_loop( ExecEvent::Syscall(pid) => { ptrace::syscall(pid, None).unwrap(); } - ExecEvent::Died(code) => { - return Err(ExecEnd(code)); - } + ExecEvent::Died(code) => return Err(ExecEnd(code)), } } unreachable!() } +/// Set up SIGTRAPs on the first few bytes of malloc/free/etc. +#[expect(clippy::as_conversions)] +fn trap_libc(pid: unistd::Pid) { + ptrace::write(pid, libc::malloc as *mut _, BREAKPT_INSTR.into()).unwrap(); + ptrace::write(pid, libc::calloc as *mut _, BREAKPT_INSTR.into()).unwrap(); + ptrace::write(pid, libc::posix_memalign as *mut _, BREAKPT_INSTR.into()).unwrap(); + ptrace::write(pid, libc::aligned_alloc as *mut _, BREAKPT_INSTR.into()).unwrap(); + ptrace::write(pid, libc::realloc as *mut _, BREAKPT_INSTR.into()).unwrap(); + ptrace::write(pid, libc::free as *mut _, BREAKPT_INSTR.into()).unwrap(); +} + +/// Fix up the libc values. +#[expect(clippy::as_conversions)] +fn fixup_libc(pid: unistd::Pid) { + unsafe { + ptrace::write( + pid, + libc::malloc as *mut _, + (libc::malloc as *mut libc::c_long).read_volatile(), + ) + .unwrap(); + ptrace::write( + pid, + libc::calloc as *mut _, + (libc::calloc as *mut libc::c_long).read_volatile(), + ) + .unwrap(); + ptrace::write( + pid, + libc::posix_memalign as *mut _, + (libc::posix_memalign as *mut libc::c_long).read_volatile(), + ) + .unwrap(); + ptrace::write( + pid, + libc::aligned_alloc as *mut _, + (libc::aligned_alloc as *mut libc::c_long).read_volatile(), + ) + .unwrap(); + ptrace::write( + pid, + libc::realloc as *mut _, + (libc::realloc as *mut libc::c_long).read_volatile(), + ) + .unwrap(); + ptrace::write(pid, libc::free as *mut _, (libc::free as *mut libc::c_long).read_volatile()) + .unwrap(); + } +} + /// Spawns a Capstone disassembler for the host architecture. #[rustfmt::skip] fn get_disasm() -> capstone::Capstone { @@ -310,6 +399,13 @@ fn get_disasm() -> capstone::Capstone { .unwrap() } +struct SegfaultCatchingStuff<'a, 'b, 'c> { + ch_pages: &'a [usize], + ch_stack: usize, + cs: &'b capstone::Capstone, + acc_events: &'c mut Vec, +} + /// Waits for `wait_signal`. If `init_cont`, it will first do a `ptrace::cont`. /// We want to avoid that in some cases, like at the beginning of FFI. /// @@ -318,6 +414,7 @@ fn wait_for_signal( pid: Option, wait_signal: signal::Signal, init_cont: InitialCont, + mut catch_segfaults: Option>, ) -> Result { if matches!(init_cont, InitialCont::Yes) { ptrace::cont(pid.unwrap(), None).unwrap(); @@ -346,6 +443,11 @@ fn wait_for_signal( }; if signal == wait_signal { return Ok(pid); + } else if let Some(ref mut sf) = catch_segfaults + && signal == signal::SIGSEGV + { + // Segfaults occuring during a wait should still be logged. + handle_segfault(pid, sf.ch_pages, sf.ch_stack, sf.cs, sf.acc_events)?; } else { ptrace::cont(pid, signal).map_err(|_| ExecEnd(None))?; } @@ -430,16 +532,19 @@ fn capstone_disassemble( Ok(()) } +// THIS NEEDS TO SOMEHOW CATCH SEGFAULTS INSIDE IT!!!! AND ALSO IN THE FULL ONE IT +// NEEDS TO GET THEM TO LOG THOSE ACCESSES AAAAAAAAAAAAAAAAA + /// Grabs the access that caused a segfault and logs it down if it's to our memory, /// or kills the child and returns the appropriate error otherwise. fn handle_segfault( pid: unistd::Pid, ch_pages: &[usize], ch_stack: usize, - page_size: usize, cs: &capstone::Capstone, acc_events: &mut Vec, ) -> Result<(), ExecEnd> { + let page_size = PAGE_SIZE.load(Ordering::Relaxed); // Get information on what caused the segfault. This contains the address // that triggered it. let siginfo = ptrace::getsiginfo(pid).unwrap(); @@ -501,7 +606,7 @@ fn handle_segfault( // Move the instr ptr into the deprotection code. #[expect(clippy::as_conversions)] - new_regs.set_ip(mempr_off as *const () as usize); + new_regs.set_ip(super::child::mempr_off as *const () as usize); // Don't mess up the stack by accident! new_regs.set_sp(stack_ptr); @@ -522,7 +627,7 @@ fn handle_segfault( ptrace::setregs(pid, new_regs).unwrap(); // Our mempr_* functions end with a raise(SIGSTOP). - wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes)?; + wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes, None)?; // Step 1 instruction. ptrace::setregs(pid, regs_bak).unwrap(); @@ -553,64 +658,163 @@ fn handle_segfault( // Reprotect everything and continue. #[expect(clippy::as_conversions)] - new_regs.set_ip(mempr_on as *const () as usize); + new_regs.set_ip(super::child::mempr_on as *const () as usize); new_regs.set_sp(stack_ptr); ptrace::setregs(pid, new_regs).unwrap(); - wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes)?; + wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes, None)?; ptrace::setregs(pid, regs_bak).unwrap(); ptrace::syscall(pid, None).unwrap(); Ok(()) } -// We only get dropped into these functions via offsetting the instr pointer -// manually, so we *must not ever* unwind from them. +/// Determines what libc function was called that caused a sigtrap, giving control +/// to our shims to handle it instead. +fn handle_sigtrap( + pid: unistd::Pid, + pages: &mut Vec, + _event_tx: &ipc::IpcSender, + acc_events: &mut Vec, + ch_stack: usize, + cs: &capstone::Capstone, +) -> Result<(), ExecEnd> { + /// The libc functions we shim. + enum LibcFn { + Malloc, + Calloc, + AlignedAlloc, + PosixMemalign, + Realloc, + Free, + } -/// Disables protections on the page whose address is currently in `PAGE_ADDR`. -/// -/// SAFETY: `PAGE_ADDR` should be set to a page-aligned pointer to an owned page, -/// `PAGE_SIZE` should be the host pagesize, and the range from `PAGE_ADDR` to -/// `PAGE_SIZE` * `PAGE_COUNT` must be owned and allocated memory. No other threads -/// should be running. -pub unsafe extern "C" fn mempr_off() { - use std::sync::atomic::Ordering; - - // Again, cannot allow unwinds to happen here. - let len = PAGE_SIZE.load(Ordering::Relaxed).saturating_mul(PAGE_COUNT.load(Ordering::Relaxed)); - // SAFETY: Upheld by "caller". - unsafe { - // It's up to the caller to make sure this doesn't actually overflow, but - // we mustn't unwind from here, so... - if libc::mprotect( - PAGE_ADDR.load(Ordering::Relaxed).cast(), - len, - libc::PROT_READ | libc::PROT_WRITE, - ) != 0 - { - // Can't return or unwind, but we can do this. - std::process::exit(-1); + /// Gets the libc function that a given instruction pointer corresponds to. + fn get_libc_fn(addr: usize) -> Option { + // We'll be one instruction past the start + #[expect(clippy::as_conversions)] + match addr.strict_sub(BREAKPT_INSTR_SIZE) { + a if a == (libc::malloc as *const () as usize) => Some(LibcFn::Malloc), + a if a == (libc::calloc as *const () as usize) => Some(LibcFn::Calloc), + a if a == (libc::aligned_alloc as *const () as usize) => Some(LibcFn::AlignedAlloc), + a if a == (libc::posix_memalign as *const () as usize) => Some(LibcFn::PosixMemalign), + a if a == (libc::realloc as *const () as usize) => Some(LibcFn::Realloc), + a if a == (libc::free as *const () as usize) => Some(LibcFn::Free), + _ => None, } } - // If this fails somehow we're doomed. - if signal::raise(signal::SIGSTOP).is_err() { - std::process::exit(-1); - } -} -/// Reenables protection on the page set by `PAGE_ADDR`. -/// -/// SAFETY: See `mempr_off()`. -pub unsafe extern "C" fn mempr_on() { - use std::sync::atomic::Ordering; + let page_size = PAGE_SIZE.load(Ordering::Relaxed); + let regs = ptrace::getregs(pid).unwrap(); + match get_libc_fn(regs.ip()) { + Some(_) => { + // We'll possibly want to call libc functions in the interceptor shims, + // so make sure they're working. + fixup_libc(pid); + // On x86, the return address will be the last item on the stack. + let ret_addr: usize = ptrace::read(pid, std::ptr::without_provenance_mut(regs.sp())) + .unwrap() + .cast_unsigned() + .try_into() + .unwrap(); + + // When libc is calling its own functions, we explicitly need to not + // intercept them; therefore, we parse the process maps to determine + // whether this is happening. + let child_mappings = proc_maps::get_process_maps(pid.as_raw()).unwrap(); + // We know for sure libc functions are mapped *somewhere*, and they will be in a file + // (unless something has gone awfully wrong). + let libc_name = child_mappings + .iter() + .find(|&mp| { + // We use exit and not malloc since it seems malloc can be + // reported as being inside of the Miri binary's address space. + #[expect(clippy::as_conversions)] + (mp.start()..mp.start().strict_add(mp.size())) + .contains(&(libc::exit as *const () as usize)) + }) + .unwrap() + .filename() + .unwrap(); + // Is the return address inside of a block mapped from the same + // file as libc functions? + let ret_is_libc = child_mappings.iter().any(|mp| { + if mp.filename().iter().any(|&name| name == libc_name) { + (mp.start()..mp.start().strict_add(mp.size())).contains(&ret_addr) + } else { + false + } + }); + ptrace::write(pid, RET_IS_LIBC.as_ptr().cast(), ret_is_libc.into()).unwrap(); + + // Override the return address to give us another sigtrap + // (but save the original bytes). + let ret_addr_bytes = + ptrace::read(pid, std::ptr::without_provenance_mut(ret_addr)).unwrap(); + ptrace::write(pid, std::ptr::without_provenance_mut(ret_addr), BREAKPT_INSTR.into()) + .unwrap(); + let catch_segfaults = + SegfaultCatchingStuff { ch_pages: &*pages, ch_stack, cs, acc_events }; + // TODO: This should probably only log writes & not reads, since reads + // in these functions will never expose provenance to the rest of the native + // code. However, these functions likely won't even do any reads, so... + wait_for_signal(Some(pid), signal::SIGTRAP, InitialCont::Yes, Some(catch_segfaults)) + .unwrap(); + + // Unset the breakpoint stuff and move the ip back an instruction to compensate. + ptrace::write(pid, std::ptr::without_provenance_mut(ret_addr), ret_addr_bytes).unwrap(); + let mut regs = ptrace::getregs(pid).unwrap(); + regs.set_ip(regs.ip().strict_sub(BREAKPT_INSTR_SIZE)); + ptrace::setregs(pid, regs).unwrap(); + + // If the intercept modified the list of pages we need to monitor, + // update our list accordingly. + let new_pg_addr: usize = ptrace::read(pid, NEW_PAGES_ADDR.as_ptr().cast()) + .unwrap() + .cast_unsigned() + .try_into() + .unwrap(); + if new_pg_addr != 0 { + let new_pg_count: usize = ptrace::read(pid, NEW_PAGES_COUNT.as_ptr().cast()) + .unwrap() + .cast_unsigned() + .try_into() + .unwrap(); + for add_fac in 0..new_pg_count { + pages.push(new_pg_addr.strict_add(add_fac.strict_mul(page_size))); + } + } - let len = PAGE_SIZE.load(Ordering::Relaxed).wrapping_mul(PAGE_COUNT.load(Ordering::Relaxed)); - // SAFETY: Upheld by "caller". - unsafe { - if libc::mprotect(PAGE_ADDR.load(Ordering::Relaxed).cast(), len, libc::PROT_NONE) != 0 { - std::process::exit(-1); + let del_pg_addr: usize = ptrace::read(pid, DEL_PAGES_ADDR.as_ptr().cast()) + .unwrap() + .cast_unsigned() + .try_into() + .unwrap(); + if del_pg_addr != 0 { + let del_pg_count: usize = ptrace::read(pid, DEL_PAGES_COUNT.as_ptr().cast()) + .unwrap() + .cast_unsigned() + .try_into() + .unwrap(); + for add_fac in 0..del_pg_count { + let pos = pages + .iter() + .position(|&pg| pg == del_pg_addr.strict_add(add_fac.strict_mul(page_size))) + .unwrap(); + pages.remove(pos); + } + } + // Now reenable stopping the process on libc calls. + trap_libc(pid); } - } - if signal::raise(signal::SIGSTOP).is_err() { - std::process::exit(-1); - } + // This is a random sigtrap unrelated to our code. + None => { + eprintln!( + "Process got an unexpected SIGTRAP at addr {:#0x?}; continuing...", + regs.ip() + ); + } + }; + // Continue the process. + ptrace::syscall(pid, None).unwrap(); + Ok(()) } diff --git a/src/shims/native_lib/trace/stub.rs b/src/shims/native_lib/trace/stub.rs index 22787a6f6f..92e7747c29 100644 --- a/src/shims/native_lib/trace/stub.rs +++ b/src/shims/native_lib/trace/stub.rs @@ -12,9 +12,12 @@ impl Supervisor { pub fn is_enabled() -> bool { false } +} - pub fn do_ffi<'tcx, T>( - _: T, +impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} +pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { + fn do_ffi( + &mut self, f: impl FnOnce() -> InterpResult<'tcx, crate::ImmTy<'tcx>>, ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option)> { // We acquire the lock to ensure that no two FFI calls run concurrently.