diff options
| author | Valentin Popov <valentin@popov.link> | 2024-01-08 00:21:28 +0300 | 
|---|---|---|
| committer | Valentin Popov <valentin@popov.link> | 2024-01-08 00:21:28 +0300 | 
| commit | 1b6a04ca5504955c571d1c97504fb45ea0befee4 (patch) | |
| tree | 7579f518b23313e8a9748a88ab6173d5e030b227 /vendor/rustix/src/thread | |
| parent | 5ecd8cf2cba827454317368b68571df0d13d7842 (diff) | |
| download | fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.tar.xz fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.zip  | |
Initial vendor packages
Signed-off-by: Valentin Popov <valentin@popov.link>
Diffstat (limited to 'vendor/rustix/src/thread')
| -rw-r--r-- | vendor/rustix/src/thread/clock.rs | 100 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/futex.rs | 38 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/id.rs | 115 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/libcap.rs | 185 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/mod.rs | 30 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/prctl.rs | 1007 | ||||
| -rw-r--r-- | vendor/rustix/src/thread/setns.rs | 137 | 
7 files changed, 1612 insertions, 0 deletions
diff --git a/vendor/rustix/src/thread/clock.rs b/vendor/rustix/src/thread/clock.rs new file mode 100644 index 0000000..986e1f9 --- /dev/null +++ b/vendor/rustix/src/thread/clock.rs @@ -0,0 +1,100 @@ +use crate::{backend, io}; + +pub use crate::timespec::Timespec; + +#[cfg(not(any( +    apple, +    target_os = "dragonfly", +    target_os = "espidf", +    target_os = "freebsd", // FreeBSD 12 has clock_nanosleep, but libc targets FreeBSD 11. +    target_os = "openbsd", +    target_os = "redox", +    target_os = "vita", +    target_os = "wasi", +)))] +pub use crate::clockid::ClockId; + +/// `clock_nanosleep(id, 0, request, remain)`—Sleeps for a duration on a +/// given clock. +/// +/// This is `clock_nanosleep` specialized for the case of a relative sleep +/// interval. See [`clock_nanosleep_absolute`] for absolute intervals. +/// +/// # References +///  - [POSIX] +///  - [Linux] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/clock_nanosleep.html +/// [Linux]: https://man7.org/linux/man-pages/man2/clock_nanosleep.2.html +#[cfg(not(any( +    apple, +    target_os = "dragonfly", +    target_os = "emscripten", +    target_os = "espidf", +    target_os = "freebsd", // FreeBSD 12 has clock_nanosleep, but libc targets FreeBSD 11. +    target_os = "haiku", +    target_os = "openbsd", +    target_os = "redox", +    target_os = "vita", +    target_os = "wasi", +)))] +#[inline] +pub fn clock_nanosleep_relative(id: ClockId, request: &Timespec) -> NanosleepRelativeResult { +    backend::thread::syscalls::clock_nanosleep_relative(id, request) +} + +/// `clock_nanosleep(id, TIMER_ABSTIME, request, NULL)`—Sleeps until an +/// absolute time on a given clock. +/// +/// This is `clock_nanosleep` specialized for the case of an absolute sleep +/// interval. See [`clock_nanosleep_relative`] for relative intervals. +/// +/// # References +///  - [POSIX] +///  - [Linux] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/clock_nanosleep.html +/// [Linux]: https://man7.org/linux/man-pages/man2/clock_nanosleep.2.html +#[cfg(not(any( +    apple, +    target_os = "dragonfly", +    target_os = "emscripten", +    target_os = "espidf", +    target_os = "freebsd", // FreeBSD 12 has clock_nanosleep, but libc targets FreeBSD 11. +    target_os = "haiku", +    target_os = "openbsd", +    target_os = "redox", +    target_os = "vita", +    target_os = "wasi", +)))] +#[inline] +pub fn clock_nanosleep_absolute(id: ClockId, request: &Timespec) -> io::Result<()> { +    backend::thread::syscalls::clock_nanosleep_absolute(id, request) +} + +/// `nanosleep(request, remain)`—Sleeps for a duration. +/// +/// This effectively uses the system monotonic clock. +/// +/// # References +///  - [POSIX] +///  - [Linux] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/nanosleep.html +/// [Linux]: https://man7.org/linux/man-pages/man2/nanosleep.2.html +#[inline] +pub fn nanosleep(request: &Timespec) -> NanosleepRelativeResult { +    backend::thread::syscalls::nanosleep(request) +} + +/// A return type for `nanosleep` and `clock_nanosleep_relative`. +#[derive(Debug, Clone)] +#[must_use] +pub enum NanosleepRelativeResult { +    /// The sleep completed normally. +    Ok, +    /// The sleep was interrupted, the remaining time is returned. +    Interrupted(Timespec), +    /// An invalid time value was provided. +    Err(io::Errno), +} diff --git a/vendor/rustix/src/thread/futex.rs b/vendor/rustix/src/thread/futex.rs new file mode 100644 index 0000000..47947c8 --- /dev/null +++ b/vendor/rustix/src/thread/futex.rs @@ -0,0 +1,38 @@ +//! Linux `futex`. +//! +//! # Safety +//! +//! Futex is a very low-level mechanism for implementing concurrency +//! primitives. +#![allow(unsafe_code)] + +use crate::thread::Timespec; +use crate::{backend, io}; + +pub use backend::thread::futex::{FutexFlags, FutexOperation}; + +/// `futex(uaddr, op, val, utime, uaddr2, val3)` +/// +/// # References +///  - [Linux `futex` system call] +///  - [Linux `futex` feature] +/// +/// # Safety +/// +/// This is a very low-level feature for implementing synchronization +/// primitives. See the references links above. +/// +/// [Linux `futex` system call]: https://man7.org/linux/man-pages/man2/futex.2.html +/// [Linux `futex` feature]: https://man7.org/linux/man-pages/man7/futex.7.html +#[inline] +pub unsafe fn futex( +    uaddr: *mut u32, +    op: FutexOperation, +    flags: FutexFlags, +    val: u32, +    utime: *const Timespec, +    uaddr2: *mut u32, +    val3: u32, +) -> io::Result<usize> { +    backend::thread::syscalls::futex(uaddr, op, flags, val, utime, uaddr2, val3) +} diff --git a/vendor/rustix/src/thread/id.rs b/vendor/rustix/src/thread/id.rs new file mode 100644 index 0000000..11193c9 --- /dev/null +++ b/vendor/rustix/src/thread/id.rs @@ -0,0 +1,115 @@ +use crate::{backend, io}; + +pub use crate::pid::{Pid, RawPid}; +pub use crate::ugid::{Gid, RawGid, RawUid, Uid}; + +/// `gettid()`—Returns the thread ID. +/// +/// This returns the OS thread ID, which is not necessarily the same as the +/// `rust::thread::Thread::id` or the pthread ID. +/// +/// # References +///  - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/gettid.2.html +#[inline] +#[must_use] +pub fn gettid() -> Pid { +    backend::thread::syscalls::gettid() +} + +/// `setuid(uid)` +/// +/// # Warning +/// +/// This is not the setxid you are looking for… POSIX requires xids to be +/// process granular, but on Linux they are per-thread. Thus, this call only +/// changes the xid for the current *thread*, not the entire process even +/// though that is in violation of the POSIX standard. +/// +/// For details on this distinction, see the C library vs. kernel differences +/// in the [manual page][linux_notes]. This call implements the kernel +/// behavior. +/// +/// # References +///  - [POSIX] +///  - [Linux] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/setuid.html +/// [Linux]: https://man7.org/linux/man-pages/man2/setuid.2.html +/// [linux_notes]: https://man7.org/linux/man-pages/man2/setuid.2.html#NOTES +#[inline] +pub fn set_thread_uid(uid: Uid) -> io::Result<()> { +    backend::thread::syscalls::setuid_thread(uid) +} + +/// `setresuid(ruid, euid, suid)` +/// +/// # Warning +/// +/// This is not the setresxid you are looking for… POSIX requires xids to be +/// process granular, but on Linux they are per-thread. Thus, this call only +/// changes the xid for the current *thread*, not the entire process even +/// though that is in violation of the POSIX standard. +/// +/// For details on this distinction, see the C library vs. kernel differences +/// in the [manual page][linux_notes] and the notes in [`set_thread_uid`]. This +/// call implements the kernel behavior. +/// +/// # References +///  - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/setresuid.2.html +/// [linux_notes]: https://man7.org/linux/man-pages/man2/setresuid.2.html#NOTES +#[inline] +pub fn set_thread_res_uid(ruid: Uid, euid: Uid, suid: Uid) -> io::Result<()> { +    backend::thread::syscalls::setresuid_thread(ruid, euid, suid) +} + +/// `setgid(gid)` +/// +/// # Warning +/// +/// This is not the setxid you are looking for… POSIX requires xids to be +/// process granular, but on Linux they are per-thread. Thus, this call only +/// changes the xid for the current *thread*, not the entire process even +/// though that is in violation of the POSIX standard. +/// +/// For details on this distinction, see the C library vs. kernel differences +/// in the [manual page][linux_notes]. This call implements the kernel +/// behavior. +/// +/// # References +///  - [POSIX] +///  - [Linux] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/setgid.html +/// [Linux]: https://man7.org/linux/man-pages/man2/setgid.2.html +/// [linux_notes]: https://man7.org/linux/man-pages/man2/setgid.2.html#NOTES +#[inline] +pub fn set_thread_gid(gid: Gid) -> io::Result<()> { +    backend::thread::syscalls::setgid_thread(gid) +} + +/// `setresgid(rgid, egid, sgid)` +/// +/// # Warning +/// +/// This is not the setresxid you are looking for… POSIX requires xids to be +/// process granular, but on Linux they are per-thread. Thus, this call only +/// changes the xid for the current *thread*, not the entire process even +/// though that is in violation of the POSIX standard. +/// +/// For details on this distinction, see the C library vs. kernel differences +/// in the [manual page][linux_notes] and the notes in [`set_thread_gid`]. This +/// call implements the kernel behavior. +/// +/// # References +///  - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/setresgid.2.html +/// [linux_notes]: https://man7.org/linux/man-pages/man2/setresgid.2.html#NOTES +#[inline] +pub fn set_thread_res_gid(rgid: Gid, egid: Gid, sgid: Gid) -> io::Result<()> { +    backend::thread::syscalls::setresgid_thread(rgid, egid, sgid) +} diff --git a/vendor/rustix/src/thread/libcap.rs b/vendor/rustix/src/thread/libcap.rs new file mode 100644 index 0000000..0a0fbb4 --- /dev/null +++ b/vendor/rustix/src/thread/libcap.rs @@ -0,0 +1,185 @@ +use bitflags::bitflags; +use core::mem::MaybeUninit; + +use crate::pid::Pid; +use crate::{backend, io}; + +/// `__user_cap_data_struct` +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct CapabilitySets { +    /// `__user_cap_data_struct.effective` +    pub effective: CapabilityFlags, +    /// `__user_cap_data_struct.permitted` +    pub permitted: CapabilityFlags, +    /// `__user_cap_data_struct.inheritable` +    pub inheritable: CapabilityFlags, +} + +bitflags! { +    /// `CAP_*` constants. +    #[repr(transparent)] +    #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +    pub struct CapabilityFlags: u64 { +        /// `CAP_CHOWN` +        const CHOWN = 1 << linux_raw_sys::general::CAP_CHOWN; +        /// `CAP_DAC_OVERRIDE` +        const DAC_OVERRIDE = 1 << linux_raw_sys::general::CAP_DAC_OVERRIDE; +        /// `CAP_DAC_READ_SEARCH` +        const DAC_READ_SEARCH = 1 << linux_raw_sys::general::CAP_DAC_READ_SEARCH; +        /// `CAP_FOWNER` +        const FOWNER = 1 << linux_raw_sys::general::CAP_FOWNER; +        /// `CAP_FSETID` +        const FSETID = 1 << linux_raw_sys::general::CAP_FSETID; +        /// `CAP_KILL` +        const KILL = 1 << linux_raw_sys::general::CAP_KILL; +        /// `CAP_SETGID` +        const SETGID = 1 << linux_raw_sys::general::CAP_SETGID; +        /// `CAP_SETUID` +        const SETUID = 1 << linux_raw_sys::general::CAP_SETUID; +        /// `CAP_SETPCAP` +        const SETPCAP = 1 << linux_raw_sys::general::CAP_SETPCAP; +        /// `CAP_LINUX_IMMUTABLE` +        const LINUX_IMMUTABLE = 1 << linux_raw_sys::general::CAP_LINUX_IMMUTABLE; +        /// `CAP_NET_BIND_SERVICE` +        const NET_BIND_SERVICE = 1 << linux_raw_sys::general::CAP_NET_BIND_SERVICE; +        /// `CAP_NET_BROADCAST` +        const NET_BROADCAST = 1 << linux_raw_sys::general::CAP_NET_BROADCAST; +        /// `CAP_NET_ADMIN` +        const NET_ADMIN = 1 << linux_raw_sys::general::CAP_NET_ADMIN; +        /// `CAP_NET_RAW` +        const NET_RAW = 1 << linux_raw_sys::general::CAP_NET_RAW; +        /// `CAP_IPC_LOCK` +        const IPC_LOCK = 1 << linux_raw_sys::general::CAP_IPC_LOCK; +        /// `CAP_IPC_OWNER` +        const IPC_OWNER = 1 << linux_raw_sys::general::CAP_IPC_OWNER; +        /// `CAP_SYS_MODULE` +        const SYS_MODULE = 1 << linux_raw_sys::general::CAP_SYS_MODULE; +        /// `CAP_SYS_RAWIO` +        const SYS_RAWIO = 1 << linux_raw_sys::general::CAP_SYS_RAWIO; +        /// `CAP_SYS_CHROOT` +        const SYS_CHROOT = 1 << linux_raw_sys::general::CAP_SYS_CHROOT; +        /// `CAP_SYS_PTRACE` +        const SYS_PTRACE = 1 << linux_raw_sys::general::CAP_SYS_PTRACE; +        /// `CAP_SYS_PACCT` +        const SYS_PACCT = 1 << linux_raw_sys::general::CAP_SYS_PACCT; +        /// `CAP_SYS_ADMIN` +        const SYS_ADMIN = 1 << linux_raw_sys::general::CAP_SYS_ADMIN; +        /// `CAP_SYS_BOOT` +        const SYS_BOOT = 1 << linux_raw_sys::general::CAP_SYS_BOOT; +        /// `CAP_SYS_NICE` +        const SYS_NICE = 1 << linux_raw_sys::general::CAP_SYS_NICE; +        /// `CAP_SYS_RESOURCE` +        const SYS_RESOURCE = 1 << linux_raw_sys::general::CAP_SYS_RESOURCE; +        /// `CAP_SYS_TIME` +        const SYS_TIME = 1 << linux_raw_sys::general::CAP_SYS_TIME; +        /// `CAP_SYS_TTY_CONFIG` +        const SYS_TTY_CONFIG = 1 << linux_raw_sys::general::CAP_SYS_TTY_CONFIG; +        /// `CAP_MKNOD` +        const MKNOD = 1 << linux_raw_sys::general::CAP_MKNOD; +        /// `CAP_LEASE` +        const LEASE = 1 << linux_raw_sys::general::CAP_LEASE; +        /// `CAP_AUDIT_WRITE` +        const AUDIT_WRITE = 1 << linux_raw_sys::general::CAP_AUDIT_WRITE; +        /// `CAP_AUDIT_CONTROL` +        const AUDIT_CONTROL = 1 << linux_raw_sys::general::CAP_AUDIT_CONTROL; +        /// `CAP_SETFCAP` +        const SETFCAP = 1 << linux_raw_sys::general::CAP_SETFCAP; +        /// `CAP_MAC_OVERRIDE` +        const MAC_OVERRIDE = 1 << linux_raw_sys::general::CAP_MAC_OVERRIDE; +        /// `CAP_MAC_ADMIN` +        const MAC_ADMIN = 1 << linux_raw_sys::general::CAP_MAC_ADMIN; +        /// `CAP_SYSLOG` +        const SYSLOG = 1 << linux_raw_sys::general::CAP_SYSLOG; +        /// `CAP_WAKE_ALARM` +        const WAKE_ALARM = 1 << linux_raw_sys::general::CAP_WAKE_ALARM; +        /// `CAP_BLOCK_SUSPEND` +        const BLOCK_SUSPEND = 1 << linux_raw_sys::general::CAP_BLOCK_SUSPEND; +        /// `CAP_AUDIT_READ` +        const AUDIT_READ = 1 << linux_raw_sys::general::CAP_AUDIT_READ; +        /// `CAP_PERFMON` +        const PERFMON = 1 << linux_raw_sys::general::CAP_PERFMON; +        /// `CAP_BPF` +        const BPF = 1 << linux_raw_sys::general::CAP_BPF; +        /// `CAP_CHECKPOINT_RESTORE` +        const CHECKPOINT_RESTORE = 1 << linux_raw_sys::general::CAP_CHECKPOINT_RESTORE; + +        /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags> +        const _ = !0; +    } +} + +/// `capget(_LINUX_CAPABILITY_VERSION_3, pid)` +/// +/// # References +///  - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/capget.2.html +#[inline] +#[doc(alias = "capget")] +pub fn capabilities(pid: Option<Pid>) -> io::Result<CapabilitySets> { +    capget(pid) +} + +/// `capset(_LINUX_CAPABILITY_VERSION_3, pid, effective, permitted, +/// inheritable)` +/// +/// # References +///  - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/capget.2.html +#[inline] +#[doc(alias = "capset")] +pub fn set_capabilities(pid: Option<Pid>, sets: CapabilitySets) -> io::Result<()> { +    capset(pid, sets) +} + +#[inline] +#[allow(unsafe_code)] +fn capget(pid: Option<Pid>) -> io::Result<CapabilitySets> { +    let mut data = [MaybeUninit::<linux_raw_sys::general::__user_cap_data_struct>::uninit(); 2]; + +    let data = { +        let mut header = linux_raw_sys::general::__user_cap_header_struct { +            version: linux_raw_sys::general::_LINUX_CAPABILITY_VERSION_3, +            pid: Pid::as_raw(pid) as backend::c::c_int, +        }; + +        backend::thread::syscalls::capget(&mut header, &mut data)?; +        // SAFETY: v3 is a 64-bit implementation, so the kernel filled in both +        // data structs. +        unsafe { (data[0].assume_init(), data[1].assume_init()) } +    }; + +    let effective = u64::from(data.0.effective) | (u64::from(data.1.effective) << u32::BITS); +    let permitted = u64::from(data.0.permitted) | (u64::from(data.1.permitted) << u32::BITS); +    let inheritable = u64::from(data.0.inheritable) | (u64::from(data.1.inheritable) << u32::BITS); + +    // The kernel returns a partitioned bitset that we just combined above. +    Ok(CapabilitySets { +        effective: CapabilityFlags::from_bits_retain(effective), +        permitted: CapabilityFlags::from_bits_retain(permitted), +        inheritable: CapabilityFlags::from_bits_retain(inheritable), +    }) +} + +#[inline] +fn capset(pid: Option<Pid>, sets: CapabilitySets) -> io::Result<()> { +    let mut header = linux_raw_sys::general::__user_cap_header_struct { +        version: linux_raw_sys::general::_LINUX_CAPABILITY_VERSION_3, +        pid: Pid::as_raw(pid) as backend::c::c_int, +    }; +    let data = [ +        linux_raw_sys::general::__user_cap_data_struct { +            effective: sets.effective.bits() as u32, +            permitted: sets.permitted.bits() as u32, +            inheritable: sets.inheritable.bits() as u32, +        }, +        linux_raw_sys::general::__user_cap_data_struct { +            effective: (sets.effective.bits() >> u32::BITS) as u32, +            permitted: (sets.permitted.bits() >> u32::BITS) as u32, +            inheritable: (sets.inheritable.bits() >> u32::BITS) as u32, +        }, +    ]; + +    backend::thread::syscalls::capset(&mut header, &data) +} diff --git a/vendor/rustix/src/thread/mod.rs b/vendor/rustix/src/thread/mod.rs new file mode 100644 index 0000000..6f19635 --- /dev/null +++ b/vendor/rustix/src/thread/mod.rs @@ -0,0 +1,30 @@ +//! Thread-associated operations. + +#[cfg(not(target_os = "redox"))] +mod clock; +#[cfg(linux_kernel)] +mod futex; +#[cfg(linux_kernel)] +mod id; +#[cfg(linux_kernel)] +mod libcap; +#[cfg(linux_kernel)] +mod prctl; +#[cfg(linux_kernel)] +mod setns; + +#[cfg(not(target_os = "redox"))] +pub use clock::*; +#[cfg(linux_kernel)] +pub use futex::{futex, FutexFlags, FutexOperation}; +#[cfg(linux_kernel)] +pub use id::{ +    gettid, set_thread_gid, set_thread_res_gid, set_thread_res_uid, set_thread_uid, Gid, Pid, +    RawGid, RawPid, RawUid, Uid, +}; +#[cfg(linux_kernel)] +pub use libcap::{capabilities, set_capabilities, CapabilityFlags, CapabilitySets}; +#[cfg(linux_kernel)] +pub use prctl::*; +#[cfg(linux_kernel)] +pub use setns::*; diff --git a/vendor/rustix/src/thread/prctl.rs b/vendor/rustix/src/thread/prctl.rs new file mode 100644 index 0000000..9d9e6b2 --- /dev/null +++ b/vendor/rustix/src/thread/prctl.rs @@ -0,0 +1,1007 @@ +//! Linux `prctl` wrappers. +//! +//! Rustix wraps variadic/dynamic-dispatch functions like `prctl` in +//! type-safe wrappers. +//! +//! # Safety +//! +//! The inner `prctl` calls are dynamically typed and must be called +//! correctly. +#![allow(unsafe_code)] + +use core::mem::MaybeUninit; +use core::num::NonZeroU64; +use core::ptr; +use core::ptr::NonNull; +use core::sync::atomic::AtomicU8; + +use bitflags::bitflags; + +use crate::backend::c::{c_int, c_uint, c_void}; +use crate::backend::prctl::syscalls; +use crate::ffi::CStr; +#[cfg(feature = "alloc")] +use crate::ffi::CString; +use crate::io; +use crate::pid::Pid; +use crate::prctl::{ +    prctl_1arg, prctl_2args, prctl_3args, prctl_get_at_arg2_optional, PointerAuthenticationKeys, +}; +use crate::utils::as_ptr; + +// +// PR_GET_KEEPCAPS/PR_SET_KEEPCAPS +// + +const PR_GET_KEEPCAPS: c_int = 7; + +/// Get the current state of the calling thread's `keep capabilities` flag. +/// +/// # References +///  - [`prctl(PR_GET_KEEPCAPS,...)`] +/// +/// [`prctl(PR_GET_KEEPCAPS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn get_keep_capabilities() -> io::Result<bool> { +    unsafe { prctl_1arg(PR_GET_KEEPCAPS) }.map(|r| r != 0) +} + +const PR_SET_KEEPCAPS: c_int = 8; + +/// Set the state of the calling thread's `keep capabilities` flag. +/// +/// # References +///  - [`prctl(PR_SET_KEEPCAPS,...)`] +/// +/// [`prctl(PR_SET_KEEPCAPS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn set_keep_capabilities(enable: bool) -> io::Result<()> { +    unsafe { prctl_2args(PR_SET_KEEPCAPS, usize::from(enable) as *mut _) }.map(|_r| ()) +} + +// +// PR_GET_NAME/PR_SET_NAME +// + +#[cfg(feature = "alloc")] +const PR_GET_NAME: c_int = 16; + +/// Get the name of the calling thread. +/// +/// # References +///  - [`prctl(PR_GET_NAME,...)`] +/// +/// [`prctl(PR_GET_NAME,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +#[cfg(feature = "alloc")] +pub fn name() -> io::Result<CString> { +    let mut buffer = [0_u8; 16]; +    unsafe { prctl_2args(PR_GET_NAME, buffer.as_mut_ptr().cast())? }; + +    let len = buffer.iter().position(|&x| x == 0_u8).unwrap_or(0); +    CString::new(&buffer[..len]).map_err(|_r| io::Errno::ILSEQ) +} + +const PR_SET_NAME: c_int = 15; + +/// Set the name of the calling thread. +/// +/// Unlike `pthread_setname_np`, this function silently truncates the name to +/// 16 bytes, as the Linux syscall does. +/// +/// # References +///  - [`prctl(PR_SET_NAME,...)`] +/// +/// [`prctl(PR_SET_NAME,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn set_name(name: &CStr) -> io::Result<()> { +    unsafe { prctl_2args(PR_SET_NAME, name.as_ptr() as *mut _) }.map(|_r| ()) +} + +// +// PR_GET_SECCOMP/PR_SET_SECCOMP +// + +//const PR_GET_SECCOMP: c_int = 21; + +const SECCOMP_MODE_DISABLED: i32 = 0; +const SECCOMP_MODE_STRICT: i32 = 1; +const SECCOMP_MODE_FILTER: i32 = 2; + +/// `SECCOMP_MODE_*`. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[repr(i32)] +pub enum SecureComputingMode { +    /// Secure computing is not in use. +    Disabled = SECCOMP_MODE_DISABLED, +    /// Use hard-coded filter. +    Strict = SECCOMP_MODE_STRICT, +    /// Use user-supplied filter. +    Filter = SECCOMP_MODE_FILTER, +} + +impl TryFrom<i32> for SecureComputingMode { +    type Error = io::Errno; + +    fn try_from(value: i32) -> Result<Self, Self::Error> { +        match value { +            SECCOMP_MODE_DISABLED => Ok(Self::Disabled), +            SECCOMP_MODE_STRICT => Ok(Self::Strict), +            SECCOMP_MODE_FILTER => Ok(Self::Filter), +            _ => Err(io::Errno::RANGE), +        } +    } +} + +/* +/// Get the secure computing mode of the calling thread. +/// +/// If the caller is not in secure computing mode, this returns +/// [`SecureComputingMode::Disabled`]. If the caller is in strict secure +/// computing mode, then this call will cause a [`Signal::Kill`] signal to be +/// sent to the process. If the caller is in filter mode, and this system call +/// is allowed by the seccomp filters, it returns +/// [`SecureComputingMode::Filter`]; otherwise, the process is killed with a +/// [`Signal::Kill`] signal. +/// +/// Since Linux 3.8, the Seccomp field of the `/proc/[pid]/status` file +/// provides a method of obtaining the same information, without the risk that +/// the process is killed; see [the `proc` manual page]. +/// +/// # References +///  - [`prctl(PR_GET_SECCOMP,...)`] +/// +/// [`prctl(PR_GET_SECCOMP,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +/// [the `proc` manual page]: https://man7.org/linux/man-pages/man5/proc.5.html +#[inline] +pub fn secure_computing_mode() -> io::Result<SecureComputingMode> { +    unsafe { prctl_1arg(PR_GET_SECCOMP) }.and_then(TryInto::try_into) +} +*/ + +const PR_SET_SECCOMP: c_int = 22; + +/// Set the secure computing mode for the calling thread, to limit the +/// available system calls. +/// +/// # References +///  - [`prctl(PR_SET_SECCOMP,...)`] +/// +/// [`prctl(PR_SET_SECCOMP,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn set_secure_computing_mode(mode: SecureComputingMode) -> io::Result<()> { +    unsafe { prctl_2args(PR_SET_SECCOMP, mode as usize as *mut _) }.map(|_r| ()) +} + +// +// PR_CAPBSET_READ/PR_CAPBSET_DROP +// + +const PR_CAPBSET_READ: c_int = 23; + +/// Linux per-thread capability. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[repr(u32)] +pub enum Capability { +    /// In a system with the `_POSIX_CHOWN_RESTRICTED` option defined, this +    /// overrides the restriction of changing file ownership and group +    /// ownership. +    ChangeOwnership = linux_raw_sys::general::CAP_CHOWN, +    /// Override all DAC access, including ACL execute access if `_POSIX_ACL` +    /// is defined. Excluding DAC access covered by +    /// [`Capability::LinuxImmutable`]. +    DACOverride = linux_raw_sys::general::CAP_DAC_OVERRIDE, +    /// Overrides all DAC restrictions regarding read and search on files and +    /// directories, including ACL restrictions if `_POSIX_ACL` is defined. +    /// Excluding DAC access covered by [`Capability::LinuxImmutable`]. +    DACReadSearch = linux_raw_sys::general::CAP_DAC_READ_SEARCH, +    /// Overrides all restrictions about allowed operations on files, where +    /// file owner ID must be equal to the user ID, except where +    /// [`Capability::FileSetID`] is applicable. It doesn't override MAC +    /// and DAC restrictions. +    FileOwner = linux_raw_sys::general::CAP_FOWNER, +    /// Overrides the following restrictions that the effective user ID shall +    /// match the file owner ID when setting the `S_ISUID` and `S_ISGID` +    /// bits on that file; that the effective group ID (or one of the +    /// supplementary group IDs) shall match the file owner ID when setting the +    /// `S_ISGID` bit on that file; that the `S_ISUID` and `S_ISGID` bits are +    /// cleared on successful return from `chown` (not implemented). +    FileSetID = linux_raw_sys::general::CAP_FSETID, +    /// Overrides the restriction that the real or effective user ID of a +    /// process sending a signal must match the real or effective user ID of +    /// the process receiving the signal. +    Kill = linux_raw_sys::general::CAP_KILL, +    /// Allows `setgid` manipulation. Allows `setgroups`. Allows forged gids on +    /// socket credentials passing. +    SetGroupID = linux_raw_sys::general::CAP_SETGID, +    /// Allows `set*uid` manipulation (including fsuid). Allows forged pids on +    /// socket credentials passing. +    SetUserID = linux_raw_sys::general::CAP_SETUID, +    /// Without VFS support for capabilities: +    ///  - Transfer any capability in your permitted set to any pid. +    ///  - remove any capability in your permitted set from any pid. With VFS +    ///    support for capabilities (neither of above, but) +    ///  - Add any capability from current's capability bounding set to the +    ///    current process' inheritable set. +    ///  - Allow taking bits out of capability bounding set. +    ///  - Allow modification of the securebits for a process. +    SetPermittedCapabilities = linux_raw_sys::general::CAP_SETPCAP, +    /// Allow modification of `S_IMMUTABLE` and `S_APPEND` file attributes. +    LinuxImmutable = linux_raw_sys::general::CAP_LINUX_IMMUTABLE, +    /// Allows binding to TCP/UDP sockets below 1024. Allows binding to ATM +    /// VCIs below 32. +    NetBindService = linux_raw_sys::general::CAP_NET_BIND_SERVICE, +    /// Allow broadcasting, listen to multicast. +    NetBroadcast = linux_raw_sys::general::CAP_NET_BROADCAST, +    /// Allow interface configuration. Allow administration of IP firewall, +    /// masquerading and accounting. Allow setting debug option on sockets. +    /// Allow modification of routing tables. Allow setting arbitrary +    /// process / process group ownership on sockets. Allow binding to any +    /// address for transparent proxying (also via [`Capability::NetRaw`]). +    /// Allow setting TOS (type of service). Allow setting promiscuous +    /// mode. Allow clearing driver statistics. Allow multicasting. Allow +    /// read/write of device-specific registers. Allow activation of ATM +    /// control sockets. +    NetAdmin = linux_raw_sys::general::CAP_NET_ADMIN, +    /// Allow use of `RAW` sockets. Allow use of `PACKET` sockets. Allow +    /// binding to any address for transparent proxying (also via +    /// [`Capability::NetAdmin`]). +    NetRaw = linux_raw_sys::general::CAP_NET_RAW, +    /// Allow locking of shared memory segments. Allow mlock and mlockall +    /// (which doesn't really have anything to do with IPC). +    IPCLock = linux_raw_sys::general::CAP_IPC_LOCK, +    /// Override IPC ownership checks. +    IPCOwner = linux_raw_sys::general::CAP_IPC_OWNER, +    /// Insert and remove kernel modules - modify kernel without limit. +    SystemModule = linux_raw_sys::general::CAP_SYS_MODULE, +    /// Allow ioperm/iopl access. Allow sending USB messages to any device via +    /// `/dev/bus/usb`. +    SystemRawIO = linux_raw_sys::general::CAP_SYS_RAWIO, +    /// Allow use of `chroot`. +    SystemChangeRoot = linux_raw_sys::general::CAP_SYS_CHROOT, +    /// Allow `ptrace` of any process. +    SystemProcessTrace = linux_raw_sys::general::CAP_SYS_PTRACE, +    /// Allow configuration of process accounting. +    SystemProcessAccounting = linux_raw_sys::general::CAP_SYS_PACCT, +    /// Allow configuration of the secure attention key. Allow administration +    /// of the random device. Allow examination and configuration of disk +    /// quotas. Allow setting the domainname. Allow setting the hostname. +    /// Allow `mount` and `umount`, setting up new smb connection. +    /// Allow some autofs root ioctls. Allow nfsservctl. Allow +    /// `VM86_REQUEST_IRQ`. Allow to read/write pci config on alpha. Allow +    /// `irix_prctl` on mips (setstacksize). Allow flushing all cache on +    /// m68k (`sys_cacheflush`). Allow removing semaphores. Used instead of +    /// [`Capability::ChangeOwnership`] to "chown" IPC message queues, +    /// semaphores and shared memory. Allow locking/unlocking of shared +    /// memory segment. Allow turning swap on/off. Allow forged pids on +    /// socket credentials passing. Allow setting readahead and +    /// flushing buffers on block devices. Allow setting geometry in floppy +    /// driver. Allow turning DMA on/off in `xd` driver. Allow +    /// administration of md devices (mostly the above, but some +    /// extra ioctls). Allow tuning the ide driver. Allow access to the nvram +    /// device. Allow administration of `apm_bios`, serial and bttv (TV) +    /// device. Allow manufacturer commands in isdn CAPI support driver. +    /// Allow reading non-standardized portions of pci configuration space. +    /// Allow DDI debug ioctl on sbpcd driver. Allow setting up serial ports. +    /// Allow sending raw qic-117 commands. Allow enabling/disabling tagged +    /// queuing on SCSI controllers and sending arbitrary SCSI commands. +    /// Allow setting encryption key on loopback filesystem. Allow setting +    /// zone reclaim policy. Allow everything under +    /// [`Capability::BerkeleyPacketFilters`] and +    /// [`Capability::PerformanceMonitoring`] for backward compatibility. +    SystemAdmin = linux_raw_sys::general::CAP_SYS_ADMIN, +    /// Allow use of `reboot`. +    SystemBoot = linux_raw_sys::general::CAP_SYS_BOOT, +    /// Allow raising priority and setting priority on other (different UID) +    /// processes. Allow use of FIFO and round-robin (realtime) scheduling +    /// on own processes and setting the scheduling algorithm used by +    /// another process. Allow setting cpu affinity on other processes. +    /// Allow setting realtime ioprio class. Allow setting ioprio class on +    /// other processes. +    SystemNice = linux_raw_sys::general::CAP_SYS_NICE, +    /// Override resource limits. Set resource limits. Override quota limits. +    /// Override reserved space on ext2 filesystem. Modify data journaling +    /// mode on ext3 filesystem (uses journaling resources). NOTE: ext2 +    /// honors fsuid when checking for resource overrides, so you can +    /// override using fsuid too. Override size restrictions on IPC message +    /// queues. Allow more than 64hz interrupts from the real-time clock. +    /// Override max number of consoles on console allocation. Override max +    /// number of keymaps. Control memory reclaim behavior. +    SystemResource = linux_raw_sys::general::CAP_SYS_RESOURCE, +    /// Allow manipulation of system clock. Allow `irix_stime` on mips. Allow +    /// setting the real-time clock. +    SystemTime = linux_raw_sys::general::CAP_SYS_TIME, +    /// Allow configuration of tty devices. Allow `vhangup` of tty. +    SystemTTYConfig = linux_raw_sys::general::CAP_SYS_TTY_CONFIG, +    /// Allow the privileged aspects of `mknod`. +    MakeNode = linux_raw_sys::general::CAP_MKNOD, +    /// Allow taking of leases on files. +    Lease = linux_raw_sys::general::CAP_LEASE, +    /// Allow writing the audit log via unicast netlink socket. +    AuditWrite = linux_raw_sys::general::CAP_AUDIT_WRITE, +    /// Allow configuration of audit via unicast netlink socket. +    AuditControl = linux_raw_sys::general::CAP_AUDIT_CONTROL, +    /// Set or remove capabilities on files. Map `uid=0` into a child user +    /// namespace. +    SetFileCapabilities = linux_raw_sys::general::CAP_SETFCAP, +    /// Override MAC access. The base kernel enforces no MAC policy. An LSM may +    /// enforce a MAC policy, and if it does and it chooses to implement +    /// capability based overrides of that policy, this is the capability +    /// it should use to do so. +    MACOverride = linux_raw_sys::general::CAP_MAC_OVERRIDE, +    /// Allow MAC configuration or state changes. The base kernel requires no +    /// MAC configuration. An LSM may enforce a MAC policy, and if it does +    /// and it chooses to implement capability based +    /// checks on modifications to that policy or the data required to maintain +    /// it, this is the capability it should use to do so. +    MACAdmin = linux_raw_sys::general::CAP_MAC_ADMIN, +    /// Allow configuring the kernel's `syslog` (`printk` behaviour). +    SystemLog = linux_raw_sys::general::CAP_SYSLOG, +    /// Allow triggering something that will wake the system. +    WakeAlarm = linux_raw_sys::general::CAP_WAKE_ALARM, +    /// Allow preventing system suspends. +    BlockSuspend = linux_raw_sys::general::CAP_BLOCK_SUSPEND, +    /// Allow reading the audit log via multicast netlink socket. +    AuditRead = linux_raw_sys::general::CAP_AUDIT_READ, +    /// Allow system performance and observability privileged operations using +    /// `perf_events`, `i915_perf` and other kernel subsystems. +    PerformanceMonitoring = linux_raw_sys::general::CAP_PERFMON, +    /// This capability allows the following BPF operations: +    ///  - Creating all types of BPF maps +    ///  - Advanced verifier features +    ///     - Indirect variable access +    ///     - Bounded loops +    ///     - BPF to BPF function calls +    ///     - Scalar precision tracking +    ///     - Larger complexity limits +    ///     - Dead code elimination +    ///     - And potentially other features +    ///  - Loading BPF Type Format (BTF) data +    ///  - Retrieve `xlated` and JITed code of BPF programs +    ///  - Use `bpf_spin_lock` helper +    /// +    /// [`Capability::PerformanceMonitoring`] relaxes the verifier checks +    /// further: +    ///  - BPF progs can use of pointer-to-integer conversions +    ///  - speculation attack hardening measures are bypassed +    ///  - `bpf_probe_read` to read arbitrary kernel memory is allowed +    ///  - `bpf_trace_printk` to print kernel memory is allowed +    /// +    /// [`Capability::SystemAdmin`] is required to use bpf_probe_write_user. +    /// +    /// [`Capability::SystemAdmin`] is required to iterate system-wide loaded +    /// programs, maps, links, and BTFs, and convert their IDs to file +    /// descriptors. +    /// +    /// [`Capability::PerformanceMonitoring`] and +    /// [`Capability::BerkeleyPacketFilters`] are required to load tracing +    /// programs. [`Capability::NetAdmin`] and +    /// [`Capability::BerkeleyPacketFilters`] are required to load +    /// networking programs. +    BerkeleyPacketFilters = linux_raw_sys::general::CAP_BPF, +    /// Allow checkpoint/restore related operations. Allow PID selection during +    /// `clone3`. Allow writing to `ns_last_pid`. +    CheckpointRestore = linux_raw_sys::general::CAP_CHECKPOINT_RESTORE, +} + +/// Check if the specified capability is in the calling thread's capability +/// bounding set. +/// +/// # References +///  - [`prctl(PR_CAPBSET_READ,...)`] +/// +/// [`prctl(PR_CAPBSET_READ,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn capability_is_in_bounding_set(capability: Capability) -> io::Result<bool> { +    unsafe { prctl_2args(PR_CAPBSET_READ, capability as usize as *mut _) }.map(|r| r != 0) +} + +const PR_CAPBSET_DROP: c_int = 24; + +/// If the calling thread has the [`Capability::SetPermittedCapabilities`] +/// capability within its user namespace, then drop the specified capability +/// from the thread's capability bounding set. +/// +/// # References +///  - [`prctl(PR_CAPBSET_DROP,...)`] +/// +/// [`prctl(PR_CAPBSET_DROP,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn remove_capability_from_bounding_set(capability: Capability) -> io::Result<()> { +    unsafe { prctl_2args(PR_CAPBSET_DROP, capability as usize as *mut _) }.map(|_r| ()) +} + +// +// PR_GET_SECUREBITS/PR_SET_SECUREBITS +// + +const PR_GET_SECUREBITS: c_int = 27; + +bitflags! { +    /// `SECBIT_*`. +    #[repr(transparent)] +    #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +    pub struct CapabilitiesSecureBits: u32 { +        /// If this bit is set, then the kernel does not grant capabilities +        /// when a `set-user-ID-root` program is executed, or when a process +        /// with an effective or real UID of 0 calls `execve`. +        const NO_ROOT = 1_u32 << 0; +        /// Set [`NO_ROOT`] irreversibly. +        const NO_ROOT_LOCKED = 1_u32 << 1; +        /// Setting this flag stops the kernel from adjusting the process' +        /// permitted, effective, and ambient capability sets when the thread's +        /// effective and filesystem UIDs are switched between zero and nonzero +        /// values. +        const NO_SETUID_FIXUP = 1_u32 << 2; +        /// Set [`NO_SETUID_FIXUP`] irreversibly. +        const NO_SETUID_FIXUP_LOCKED = 1_u32 << 3; +        /// Setting this flag allows a thread that has one or more 0 UIDs to +        /// retain capabilities in its permitted set when it switches all of +        /// its UIDs to nonzero values. +        const KEEP_CAPS = 1_u32 << 4; +        /// Set [`KEEP_CAPS`] irreversibly. +        const KEEP_CAPS_LOCKED = 1_u32 << 5; +        /// Setting this flag disallows raising ambient capabilities via the +        /// `prctl`'s `PR_CAP_AMBIENT_RAISE` operation. +        const NO_CAP_AMBIENT_RAISE = 1_u32 << 6; +        /// Set [`NO_CAP_AMBIENT_RAISE`] irreversibly. +        const NO_CAP_AMBIENT_RAISE_LOCKED = 1_u32 << 7; + +        /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags> +        const _ = !0; +    } +} + +/// Get the `securebits` flags of the calling thread. +/// +/// # References +///  - [`prctl(PR_GET_SECUREBITS,...)`] +/// +/// [`prctl(PR_GET_SECUREBITS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn capabilities_secure_bits() -> io::Result<CapabilitiesSecureBits> { +    let r = unsafe { prctl_1arg(PR_GET_SECUREBITS)? } as c_uint; +    CapabilitiesSecureBits::from_bits(r).ok_or(io::Errno::RANGE) +} + +const PR_SET_SECUREBITS: c_int = 28; + +/// Set the `securebits` flags of the calling thread. +/// +/// # References +///  - [`prctl(PR_SET_SECUREBITS,...)`] +/// +/// [`prctl(PR_SET_SECUREBITS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn set_capabilities_secure_bits(bits: CapabilitiesSecureBits) -> io::Result<()> { +    unsafe { prctl_2args(PR_SET_SECUREBITS, bits.bits() as usize as *mut _) }.map(|_r| ()) +} + +// +// PR_GET_TIMERSLACK/PR_SET_TIMERSLACK +// + +const PR_GET_TIMERSLACK: c_int = 30; + +/// Get the `current` timer slack value of the calling thread. +/// +/// # References +///  - [`prctl(PR_GET_TIMERSLACK,...)`] +/// +/// [`prctl(PR_GET_TIMERSLACK,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn current_timer_slack() -> io::Result<u64> { +    unsafe { prctl_1arg(PR_GET_TIMERSLACK) }.map(|r| r as u64) +} + +const PR_SET_TIMERSLACK: c_int = 29; + +/// Sets the `current` timer slack value for the calling thread. +/// +/// # References +///  - [`prctl(PR_SET_TIMERSLACK,...)`] +/// +/// [`prctl(PR_SET_TIMERSLACK,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn set_current_timer_slack(value: Option<NonZeroU64>) -> io::Result<()> { +    let value = usize::try_from(value.map_or(0, NonZeroU64::get)).map_err(|_r| io::Errno::RANGE)?; +    unsafe { prctl_2args(PR_SET_TIMERSLACK, value as *mut _) }.map(|_r| ()) +} + +// +// PR_GET_NO_NEW_PRIVS/PR_SET_NO_NEW_PRIVS +// + +const PR_GET_NO_NEW_PRIVS: c_int = 39; + +/// Get the value of the `no_new_privs` attribute for the calling thread. +/// +/// # References +///  - [`prctl(PR_GET_NO_NEW_PRIVS,...)`] +/// +/// [`prctl(PR_GET_NO_NEW_PRIVS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn no_new_privs() -> io::Result<bool> { +    unsafe { prctl_1arg(PR_GET_NO_NEW_PRIVS) }.map(|r| r != 0) +} + +const PR_SET_NO_NEW_PRIVS: c_int = 38; + +/// Set the calling thread's `no_new_privs` attribute. +/// +/// # References +///  - [`prctl(PR_SET_NO_NEW_PRIVS,...)`] +/// +/// [`prctl(PR_SET_NO_NEW_PRIVS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn set_no_new_privs(no_new_privs: bool) -> io::Result<()> { +    unsafe { prctl_2args(PR_SET_NO_NEW_PRIVS, usize::from(no_new_privs) as *mut _) }.map(|_r| ()) +} + +// +// PR_GET_TID_ADDRESS +// + +const PR_GET_TID_ADDRESS: c_int = 40; + +/// Get the `clear_child_tid` address set by `set_tid_address` +/// and `clone`'s `CLONE_CHILD_CLEARTID` flag. +/// +/// # References +///  - [`prctl(PR_GET_TID_ADDRESS,...)`] +/// +/// [`prctl(PR_GET_TID_ADDRESS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn get_clear_child_tid_address() -> io::Result<Option<NonNull<c_void>>> { +    unsafe { prctl_get_at_arg2_optional::<*mut c_void>(PR_GET_TID_ADDRESS) }.map(NonNull::new) +} + +// +// PR_GET_THP_DISABLE/PR_SET_THP_DISABLE +// + +const PR_GET_THP_DISABLE: c_int = 42; + +/// Get the current setting of the `THP disable` flag for the calling thread. +/// +/// # References +///  - [`prctl(PR_GET_THP_DISABLE,...)`] +/// +/// [`prctl(PR_GET_THP_DISABLE,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn transparent_huge_pages_are_disabled() -> io::Result<bool> { +    unsafe { prctl_1arg(PR_GET_THP_DISABLE) }.map(|r| r != 0) +} + +const PR_SET_THP_DISABLE: c_int = 41; + +/// Set the state of the `THP disable` flag for the calling thread. +/// +/// # References +///  - [`prctl(PR_SET_THP_DISABLE,...)`] +/// +/// [`prctl(PR_SET_THP_DISABLE,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn disable_transparent_huge_pages(thp_disable: bool) -> io::Result<()> { +    unsafe { prctl_2args(PR_SET_THP_DISABLE, usize::from(thp_disable) as *mut _) }.map(|_r| ()) +} + +// +// PR_CAP_AMBIENT +// + +const PR_CAP_AMBIENT: c_int = 47; + +const PR_CAP_AMBIENT_IS_SET: usize = 1; + +/// Check if the specified capability is in the ambient set. +/// +/// # References +///  - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,...)`] +/// +/// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn capability_is_in_ambient_set(capability: Capability) -> io::Result<bool> { +    let cap = capability as usize as *mut _; +    unsafe { prctl_3args(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET as *mut _, cap) }.map(|r| r != 0) +} + +const PR_CAP_AMBIENT_CLEAR_ALL: usize = 4; + +/// Remove all capabilities from the ambient set. +/// +/// # References +///  - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,...)`] +/// +/// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn clear_ambient_capability_set() -> io::Result<()> { +    unsafe { prctl_2args(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL as *mut _) }.map(|_r| ()) +} + +const PR_CAP_AMBIENT_RAISE: usize = 2; +const PR_CAP_AMBIENT_LOWER: usize = 3; + +/// Add or remove the specified capability to the ambient set. +/// +/// # References +///  - [`prctl(PR_CAP_AMBIENT,...)`] +/// +/// [`prctl(PR_CAP_AMBIENT,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn configure_capability_in_ambient_set(capability: Capability, enable: bool) -> io::Result<()> { +    let sub_operation = if enable { +        PR_CAP_AMBIENT_RAISE +    } else { +        PR_CAP_AMBIENT_LOWER +    }; +    let cap = capability as usize as *mut _; + +    unsafe { prctl_3args(PR_CAP_AMBIENT, sub_operation as *mut _, cap) }.map(|_r| ()) +} + +// +// PR_SVE_GET_VL/PR_SVE_SET_VL +// + +const PR_SVE_GET_VL: c_int = 51; + +const PR_SVE_VL_LEN_MASK: u32 = 0xffff; +const PR_SVE_VL_INHERIT: u32 = 1_u32 << 17; + +/// Scalable Vector Extension vector length configuration. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct SVEVectorLengthConfig { +    /// Vector length in bytes. +    pub vector_length_in_bytes: u32, +    /// Vector length inherited across `execve`. +    pub vector_length_inherited_across_execve: bool, +} + +/// Get the thread's current SVE vector length configuration. +/// +/// # References +///  - [`prctl(PR_SVE_GET_VL,...)`] +/// +/// [`prctl(PR_SVE_GET_VL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn sve_vector_length_configuration() -> io::Result<SVEVectorLengthConfig> { +    let bits = unsafe { prctl_1arg(PR_SVE_GET_VL)? } as c_uint; +    Ok(SVEVectorLengthConfig { +        vector_length_in_bytes: bits & PR_SVE_VL_LEN_MASK, +        vector_length_inherited_across_execve: (bits & PR_SVE_VL_INHERIT) != 0, +    }) +} + +const PR_SVE_SET_VL: c_int = 50; + +const PR_SVE_SET_VL_ONEXEC: u32 = 1_u32 << 18; + +/// Configure the thread's vector length of Scalable Vector Extension. +/// +/// # References +///  - [`prctl(PR_SVE_SET_VL,...)`] +/// +/// # Safety +/// +/// Please ensure the conditions necessary to safely call this function, +/// as detailed in the references above. +/// +/// [`prctl(PR_SVE_SET_VL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub unsafe fn set_sve_vector_length_configuration( +    vector_length_in_bytes: usize, +    vector_length_inherited_across_execve: bool, +    defer_change_to_next_execve: bool, +) -> io::Result<()> { +    let vector_length_in_bytes = +        u32::try_from(vector_length_in_bytes).map_err(|_r| io::Errno::RANGE)?; + +    let mut bits = vector_length_in_bytes & PR_SVE_VL_LEN_MASK; + +    if vector_length_inherited_across_execve { +        bits |= PR_SVE_VL_INHERIT; +    } + +    if defer_change_to_next_execve { +        bits |= PR_SVE_SET_VL_ONEXEC; +    } + +    prctl_2args(PR_SVE_SET_VL, bits as usize as *mut _).map(|_r| ()) +} + +// +// PR_PAC_RESET_KEYS +// + +const PR_PAC_RESET_KEYS: c_int = 54; + +/// Securely reset the thread's pointer authentication keys to fresh random +/// values generated by the kernel. +/// +/// # References +///  - [`prctl(PR_PAC_RESET_KEYS,...)`] +/// +/// # Safety +/// +/// Please ensure the conditions necessary to safely call this function, +/// as detailed in the references above. +/// +/// [`prctl(PR_PAC_RESET_KEYS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub unsafe fn reset_pointer_authentication_keys( +    keys: Option<PointerAuthenticationKeys>, +) -> io::Result<()> { +    let keys = keys.as_ref().map_or(0_u32, PointerAuthenticationKeys::bits); +    prctl_2args(PR_PAC_RESET_KEYS, keys as usize as *mut _).map(|_r| ()) +} + +// +// PR_GET_TAGGED_ADDR_CTRL/PR_SET_TAGGED_ADDR_CTRL +// + +const PR_GET_TAGGED_ADDR_CTRL: c_int = 56; + +const PR_MTE_TAG_SHIFT: u32 = 3; +const PR_MTE_TAG_MASK: u32 = 0xffff_u32 << PR_MTE_TAG_SHIFT; + +bitflags! { +    /// Zero means addresses that are passed for the purpose of being +    /// dereferenced by the kernel must be untagged. +    #[repr(transparent)] +    #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +    pub struct TaggedAddressMode: u32 { +        /// Addresses that are passed for the purpose of being dereferenced by +        /// the kernel may be tagged. +        const ENABLED = 1_u32 << 0; +        /// Synchronous tag check fault mode. +        const TCF_SYNC = 1_u32 << 1; +        /// Asynchronous tag check fault mode. +        const TCF_ASYNC = 1_u32 << 2; + +        /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags> +        const _ = !0; +    } +} + +/// Get the current tagged address mode for the calling thread. +/// +/// # References +///  - [`prctl(PR_GET_TAGGED_ADDR_CTRL,...)`] +/// +/// [`prctl(PR_GET_TAGGED_ADDR_CTRL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub fn current_tagged_address_mode() -> io::Result<(Option<TaggedAddressMode>, u32)> { +    let r = unsafe { prctl_1arg(PR_GET_TAGGED_ADDR_CTRL)? } as c_uint; +    let mode = r & 0b111_u32; +    let mte_tag = (r & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT; +    Ok((TaggedAddressMode::from_bits(mode), mte_tag)) +} + +const PR_SET_TAGGED_ADDR_CTRL: c_int = 55; + +/// Controls support for passing tagged user-space addresses to the kernel. +/// +/// # References +///  - [`prctl(PR_SET_TAGGED_ADDR_CTRL,...)`] +/// +/// # Safety +/// +/// Please ensure the conditions necessary to safely call this function, as +/// detailed in the references above. +/// +/// [`prctl(PR_SET_TAGGED_ADDR_CTRL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub unsafe fn set_current_tagged_address_mode( +    mode: Option<TaggedAddressMode>, +    mte_tag: u32, +) -> io::Result<()> { +    let config = mode.as_ref().map_or(0_u32, TaggedAddressMode::bits) +        | ((mte_tag << PR_MTE_TAG_SHIFT) & PR_MTE_TAG_MASK); +    prctl_2args(PR_SET_TAGGED_ADDR_CTRL, config as usize as *mut _).map(|_r| ()) +} + +// +// PR_SET_SYSCALL_USER_DISPATCH +// + +const PR_SET_SYSCALL_USER_DISPATCH: c_int = 59; + +const PR_SYS_DISPATCH_OFF: usize = 0; + +/// Disable Syscall User Dispatch mechanism. +/// +/// # References +///  - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,...)`] +/// +/// # Safety +/// +/// Please ensure the conditions necessary to safely call this function, as +/// detailed in the references above. +/// +/// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub unsafe fn disable_syscall_user_dispatch() -> io::Result<()> { +    prctl_2args(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_OFF as *mut _).map(|_r| ()) +} + +const PR_SYS_DISPATCH_ON: usize = 1; + +/// Allow system calls to be executed. +const SYSCALL_DISPATCH_FILTER_ALLOW: u8 = 0; +/// Block system calls from executing. +const SYSCALL_DISPATCH_FILTER_BLOCK: u8 = 1; + +/// Value of the fast switch flag controlling system calls user dispatch +/// mechanism without the need to issue a syscall. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[repr(u8)] +pub enum SysCallUserDispatchFastSwitch { +    /// System calls are allowed to execute. +    Allow = SYSCALL_DISPATCH_FILTER_ALLOW, +    /// System calls are blocked from executing. +    Block = SYSCALL_DISPATCH_FILTER_BLOCK, +} + +impl TryFrom<u8> for SysCallUserDispatchFastSwitch { +    type Error = io::Errno; + +    fn try_from(value: u8) -> Result<Self, Self::Error> { +        match value { +            SYSCALL_DISPATCH_FILTER_ALLOW => Ok(Self::Allow), +            SYSCALL_DISPATCH_FILTER_BLOCK => Ok(Self::Block), +            _ => Err(io::Errno::RANGE), +        } +    } +} + +/// Enable Syscall User Dispatch mechanism. +/// +/// # References +///  - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,...)`] +/// +/// # Safety +/// +/// Please ensure the conditions necessary to safely call this function, as +/// detailed in the references above. +/// +/// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html +#[inline] +pub unsafe fn enable_syscall_user_dispatch( +    always_allowed_region: &[u8], +    fast_switch_flag: &AtomicU8, +) -> io::Result<()> { +    syscalls::prctl( +        PR_SET_SYSCALL_USER_DISPATCH, +        PR_SYS_DISPATCH_ON as *mut _, +        always_allowed_region.as_ptr() as *mut _, +        always_allowed_region.len() as *mut _, +        as_ptr(fast_switch_flag) as *mut _, +    ) +    .map(|_r| ()) +} + +// +// PR_SCHED_CORE +// + +const PR_SCHED_CORE: c_int = 62; + +const PR_SCHED_CORE_GET: usize = 0; + +const PR_SCHED_CORE_SCOPE_THREAD: u32 = 0; +const PR_SCHED_CORE_SCOPE_THREAD_GROUP: u32 = 1; +const PR_SCHED_CORE_SCOPE_PROCESS_GROUP: u32 = 2; + +/// `PR_SCHED_CORE_SCOPE_*`. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[repr(u32)] +pub enum CoreSchedulingScope { +    /// Operation will be performed for the thread. +    Thread = PR_SCHED_CORE_SCOPE_THREAD, +    /// Operation will be performed for all tasks in the task group of the +    /// process. +    ThreadGroup = PR_SCHED_CORE_SCOPE_THREAD_GROUP, +    /// Operation will be performed for all processes in the process group. +    ProcessGroup = PR_SCHED_CORE_SCOPE_PROCESS_GROUP, +} + +impl TryFrom<u32> for CoreSchedulingScope { +    type Error = io::Errno; + +    fn try_from(value: u32) -> Result<Self, Self::Error> { +        match value { +            PR_SCHED_CORE_SCOPE_THREAD => Ok(Self::Thread), +            PR_SCHED_CORE_SCOPE_THREAD_GROUP => Ok(Self::ThreadGroup), +            PR_SCHED_CORE_SCOPE_PROCESS_GROUP => Ok(Self::ProcessGroup), +            _ => Err(io::Errno::RANGE), +        } +    } +} + +/// Get core scheduling cookie of a process. +/// +/// # References +///  - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,...)`] +/// +/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html +#[inline] +pub fn core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<u64> { +    let mut value: MaybeUninit<u64> = MaybeUninit::uninit(); +    unsafe { +        syscalls::prctl( +            PR_SCHED_CORE, +            PR_SCHED_CORE_GET as *mut _, +            pid.as_raw_nonzero().get() as usize as *mut _, +            scope as usize as *mut _, +            value.as_mut_ptr().cast(), +        )?; +        Ok(value.assume_init()) +    } +} + +const PR_SCHED_CORE_CREATE: usize = 1; + +/// Create unique core scheduling cookie. +/// +/// # References +///  - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,...)`] +/// +/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html +#[inline] +pub fn create_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> { +    unsafe { +        syscalls::prctl( +            PR_SCHED_CORE, +            PR_SCHED_CORE_CREATE as *mut _, +            pid.as_raw_nonzero().get() as usize as *mut _, +            scope as usize as *mut _, +            ptr::null_mut(), +        ) +        .map(|_r| ()) +    } +} + +const PR_SCHED_CORE_SHARE_TO: usize = 2; + +/// Push core scheduling cookie to a process. +/// +/// # References +///  - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,...)`] +/// +/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html +#[inline] +pub fn push_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> { +    unsafe { +        syscalls::prctl( +            PR_SCHED_CORE, +            PR_SCHED_CORE_SHARE_TO as *mut _, +            pid.as_raw_nonzero().get() as usize as *mut _, +            scope as usize as *mut _, +            ptr::null_mut(), +        ) +        .map(|_r| ()) +    } +} + +const PR_SCHED_CORE_SHARE_FROM: usize = 3; + +/// Pull core scheduling cookie from a process. +/// +/// # References +///  - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,...)`] +/// +/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html +#[inline] +pub fn pull_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> { +    unsafe { +        syscalls::prctl( +            PR_SCHED_CORE, +            PR_SCHED_CORE_SHARE_FROM as *mut _, +            pid.as_raw_nonzero().get() as usize as *mut _, +            scope as usize as *mut _, +            ptr::null_mut(), +        ) +        .map(|_r| ()) +    } +} diff --git a/vendor/rustix/src/thread/setns.rs b/vendor/rustix/src/thread/setns.rs new file mode 100644 index 0000000..ef61d11 --- /dev/null +++ b/vendor/rustix/src/thread/setns.rs @@ -0,0 +1,137 @@ +use bitflags::bitflags; +use linux_raw_sys::general::{ +    CLONE_FILES, CLONE_FS, CLONE_NEWCGROUP, CLONE_NEWIPC, CLONE_NEWNET, CLONE_NEWNS, CLONE_NEWPID, +    CLONE_NEWTIME, CLONE_NEWUSER, CLONE_NEWUTS, CLONE_SYSVSEM, +}; + +use crate::backend::c::c_int; +use crate::backend::thread::syscalls; +use crate::fd::BorrowedFd; +use crate::io; + +bitflags! { +    /// Thread name space type. +    #[repr(transparent)] +    #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +    pub struct ThreadNameSpaceType: u32 { +        /// Time name space. +        const TIME = CLONE_NEWTIME; +        /// Mount name space. +        const MOUNT = CLONE_NEWNS; +        /// Control group (CGroup) name space. +        const CONTROL_GROUP = CLONE_NEWCGROUP; +        /// `Host name` and `NIS domain name` (UTS) name space. +        const HOST_NAME_AND_NIS_DOMAIN_NAME = CLONE_NEWUTS; +        /// Inter-process communication (IPC) name space. +        const INTER_PROCESS_COMMUNICATION = CLONE_NEWIPC; +        /// User name space. +        const USER = CLONE_NEWUSER; +        /// Process ID name space. +        const PROCESS_ID = CLONE_NEWPID; +        /// Network name space. +        const NETWORK = CLONE_NEWNET; + +        /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags> +        const _ = !0; +    } +} + +/// Type of name space referred to by a link. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[repr(u32)] +pub enum LinkNameSpaceType { +    /// Time name space. +    Time = CLONE_NEWTIME, +    /// Mount name space. +    Mount = CLONE_NEWNS, +    /// Control group (CGroup) name space. +    ControlGroup = CLONE_NEWCGROUP, +    /// `Host name` and `NIS domain name` (UTS) name space. +    HostNameAndNISDomainName = CLONE_NEWUTS, +    /// Inter-process communication (IPC) name space. +    InterProcessCommunication = CLONE_NEWIPC, +    /// User name space. +    User = CLONE_NEWUSER, +    /// Process ID name space. +    ProcessID = CLONE_NEWPID, +    /// Network name space. +    Network = CLONE_NEWNET, +} + +bitflags! { +    /// `CLONE_*` for use with [`unshare`]. +    #[repr(transparent)] +    #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +    pub struct UnshareFlags: u32 { +        /// `CLONE_FILES`. +        const FILES = CLONE_FILES; +        /// `CLONE_FS`. +        const FS = CLONE_FS; +        /// `CLONE_NEWCGROUP`. +        const NEWCGROUP = CLONE_NEWCGROUP; +        /// `CLONE_NEWIPC`. +        const NEWIPC = CLONE_NEWIPC; +        /// `CLONE_NEWNET`. +        const NEWNET = CLONE_NEWNET; +        /// `CLONE_NEWNS`. +        const NEWNS = CLONE_NEWNS; +        /// `CLONE_NEWPID`. +        const NEWPID = CLONE_NEWPID; +        /// `CLONE_NEWTIME`. +        const NEWTIME = CLONE_NEWTIME; +        /// `CLONE_NEWUSER`. +        const NEWUSER = CLONE_NEWUSER; +        /// `CLONE_NEWUTS` +        const NEWUTS = CLONE_NEWUTS; +        /// `CLONE_SYSVSEM`. +        const SYSVSEM = CLONE_SYSVSEM; + +        /// <https://docs.rs/bitflags/*/bitflags/#externally-defined-flags> +        const _ = !0; +    } +} + +/// Reassociate the calling thread with the namespace associated with link +/// referred to by `fd`. +/// +/// `fd` must refer to one of the magic links in a `/proc/[pid]/ns/` directory, +/// or a bind mount to such a link. +/// +/// # References +///  - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/setns.2.html +pub fn move_into_link_name_space( +    fd: BorrowedFd<'_>, +    allowed_type: Option<LinkNameSpaceType>, +) -> io::Result<()> { +    let allowed_type = allowed_type.map_or(0, |t| t as c_int); +    syscalls::setns(fd, allowed_type).map(|_r| ()) +} + +/// Atomically move the calling thread into one or more of the same namespaces +/// as the thread referred to by `fd`. +/// +/// `fd` must refer to a thread ID. See: `pidfd_open` and `clone`. +/// +/// # References +///  - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/setns.2.html +pub fn move_into_thread_name_spaces( +    fd: BorrowedFd<'_>, +    allowed_types: ThreadNameSpaceType, +) -> io::Result<()> { +    syscalls::setns(fd, allowed_types.bits() as c_int).map(|_r| ()) +} + +/// `unshare(flags)`—Disassociate parts of the current thread's execution +/// context with other threads. +/// +/// # References +///  - [Linux] +/// +/// [Linux]: https://man7.org/linux/man-pages/man2/unshare.2.html +pub fn unshare(flags: UnshareFlags) -> io::Result<()> { +    syscalls::unshare(flags) +}  | 
