Skip to content

Commit

Permalink
procfs: add support for fsopen() and open_tree()
Browse files Browse the repository at this point in the history
Because we depend on procfs to be correct when operating on other
filesystems, having a safe procfs handle is vital. Unfortunately, if we
are an administrative program running inside a container that can modify
its mount configuration, our current checks are not sufficient to avoid
being tricked into thinking a path is real.

Luckily, with fsopen() and open_tree() it is possible to create a
completely private procfs instance that an attacker cannot modify. Note
that while they are both safe, they are safe for different reasons:

 1. fsopen() is safe because the created mount is completely separate to
    any other procfs mount, so any changes to mounts on the host are
    irrelevant. fsopen() can fail if we trip the mnt_too_revealing()
    check, so we may have to fall back to open_tree() in some cases.

 2. open_tree() creates a clone of a snapshot of the mount tree (or just
    the top mount if can avoid using AT_RECURSIVE, but
    mnt_too_revealing() may force us to use AT_RECURSIVE). While the
    tree we clone might have been messed with by an attacker, after
    cloning there is no way for the attacker to affect our clone (even
    mount propagation won't propagate into a clone[1]).

    The only risk is whether there are over-mounts. I haven't yet added
    logic to check for this because it's quite ugly, but at least there
    is no risk of racing attackers for now. Unfortunately, we cannot use
    mountinfo because anonymous mounts are not showing in mountinfo
    (instead, we would need to statx to get the mount ids or
    listmounts).

This is based on similar logic I'm working on for libpathrs.

[1]: This is true since at least Linux 5.12. See commit ee2e3f50629f
     ("mount: fix mounting of detached mounts onto targets that reside
     on shared mounts").

Signed-off-by: Aleksa Sarai <[email protected]>
  • Loading branch information
cyphar committed Jun 27, 2024
1 parent 9bc1b05 commit b08169a
Showing 1 changed file with 115 additions and 8 deletions.
123 changes: 115 additions & 8 deletions procfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,112 @@ func verifyProcRoot(procRoot *os.File) error {
return nil
}

var (
hasPrivateMountsBool bool
hasPrivateMountsOnce sync.Once
)

func hasPrivateMounts() bool {
hasPrivateMountsOnce.Do(func() {
// Just try to use open_tree to open a file without OPEN_TREE_CLONE.
// This is equivalent to openat, but checks if the new mount syscalls
// are available.
fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC)
if err == nil {
hasPrivateMountsBool = true
_ = unix.Close(fd)
}
})
return hasPrivateMountsBool
}

func fsopen(fsName string, flags int) (*os.File, error) {
// Make sure we always set O_CLOEXEC.
flags |= unix.FSOPEN_CLOEXEC
fd, err := unix.Fsopen(fsName, flags)
if err != nil {
return nil, os.NewSyscallError("fsopen "+fsName, err)
}
return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil
}

func fsmount(ctx *os.File, flags, mountAttrs int) (*os.File, error) {
// Make sure we always set O_CLOEXEC.
flags |= unix.FSMOUNT_CLOEXEC
fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs)
if err != nil {
return nil, os.NewSyscallError("fsmount "+ctx.Name(), err)
}
return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil
}

func newPrivateProcMount() (*os.File, error) {
procfsCtx, err := fsopen("proc", unix.FSOPEN_CLOEXEC)
if err != nil {
return nil, err
}
defer procfsCtx.Close()

// Try to configure hidepid=ptraceable,subset=pid if possible, but ignore errors.
_ = unix.FsconfigSetString(int(procfsCtx.Fd()), "hidepid", "ptraceable")
_ = unix.FsconfigSetString(int(procfsCtx.Fd()), "subset", "pid")

// Get an actual handle.
if err := unix.FsconfigCreate(int(procfsCtx.Fd())); err != nil {
return nil, os.NewSyscallError("fsconfig create procfs", err)
}
return fsmount(procfsCtx, unix.FSMOUNT_CLOEXEC, unix.MS_RDONLY|unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_NOSUID)
}

func openTree(dir *os.File, path string, flags uint) (*os.File, error) {
dirFd := -int(unix.EBADF)
if dir != nil {
dirFd = int(dir.Fd())
}
// Make sure we always set O_CLOEXEC.
flags |= unix.OPEN_TREE_CLOEXEC
fd, err := unix.OpenTree(dirFd, path, flags)
if err != nil {
return nil, &os.PathError{Op: "open_tree", Path: path, Err: err}
}
return os.NewFile(uintptr(fd), dir.Name()+"/"+path), nil
}

func clonePrivateProcMount() (*os.File, error) {
// Try to make a clone without using AT_RECURSIVE if we can. If this works,
// we can be sure there are no over-mounts and so if the root is valid then
// we're golden. Otherwise, we have to deal with over-mounts.
procfsHandle, err := openTree(nil, "/proc", unix.OPEN_TREE_CLONE)
if err != nil {
procfsHandle, err = openTree(nil, "/proc", unix.OPEN_TREE_CLONE|unix.AT_RECURSIVE)
}
if err != nil {
return nil, fmt.Errorf("creating a detached procfs clone: %w", err)
}
if err := verifyProcRoot(procfsHandle); err != nil {
_ = procfsHandle.Close()
return nil, err
}
// TODO: Add support for checking for overmounts inside our /proc clone by
// using listmounts(2) or checking stx_mnt_id from statx() when doing
// procSelfFdReadlink().
return procfsHandle, nil
}

func privateProcRoot() (*os.File, error) {
if !hasPrivateMounts() {
return nil, fmt.Errorf("new mount api: %w", unix.ENOTSUP)
}
// Try to create a new procfs mount from scratch if we can. This ensures we
// can get a procfs mount even if /proc is fake (for whatever reason).
procRoot, err := newPrivateProcMount()
if err != nil {
// Try to clone /proc then...
procRoot, err = clonePrivateProcMount()
}
return procRoot, err
}

var (
procRootHandle *os.File
procRootError error
Expand All @@ -63,15 +169,16 @@ var (
)

func doGetProcRoot() (*os.File, error) {
// TODO: Use fsopen or open_tree to get a safe handle that cannot be
// over-mounted and we can absolutely verify.

procRoot, err := os.OpenFile("/proc", unix.O_PATH|unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
procRoot, err := privateProcRoot()
if err != nil {
return nil, err
}
if err := verifyProcRoot(procRoot); err != nil {
return nil, err
// Fall back to using a /proc handle if making a private mount failed.
procRoot, err = os.OpenFile("/proc", unix.O_PATH|unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
if err := verifyProcRoot(procRoot); err != nil {
return nil, err
}
}
return procRoot, nil
}
Expand Down

0 comments on commit b08169a

Please sign in to comment.