diff options
| author | Valentin Popov <valentin@popov.link> | 2024-01-08 00:21:28 +0300 | 
|---|---|---|
| committer | Valentin Popov <valentin@popov.link> | 2024-01-08 00:21:28 +0300 | 
| commit | 1b6a04ca5504955c571d1c97504fb45ea0befee4 (patch) | |
| tree | 7579f518b23313e8a9748a88ab6173d5e030b227 /vendor/portable-atomic/src/imp/fallback | |
| parent | 5ecd8cf2cba827454317368b68571df0d13d7842 (diff) | |
| download | fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.tar.xz fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.zip | |
Initial vendor packages
Signed-off-by: Valentin Popov <valentin@popov.link>
Diffstat (limited to 'vendor/portable-atomic/src/imp/fallback')
| -rw-r--r-- | vendor/portable-atomic/src/imp/fallback/mod.rs | 412 | ||||
| -rw-r--r-- | vendor/portable-atomic/src/imp/fallback/outline_atomics.rs | 172 | ||||
| -rw-r--r-- | vendor/portable-atomic/src/imp/fallback/seq_lock.rs | 147 | ||||
| -rw-r--r-- | vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs | 180 | ||||
| -rw-r--r-- | vendor/portable-atomic/src/imp/fallback/utils.rs | 141 | 
5 files changed, 1052 insertions, 0 deletions
| diff --git a/vendor/portable-atomic/src/imp/fallback/mod.rs b/vendor/portable-atomic/src/imp/fallback/mod.rs new file mode 100644 index 0000000..283c98c --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/mod.rs @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Fallback implementation using global locks. +// +// This implementation uses seqlock for global locks. +// +// This is basically based on global locks in crossbeam-utils's `AtomicCell`, +// but seqlock is implemented in a way that does not depend on UB +// (see comments in optimistic_read method in atomic! macro for details). +// +// Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic +// type and the value type must be the same. + +#![cfg_attr( +    any( +        all( +            target_arch = "x86_64", +            not(portable_atomic_no_cmpxchg16b_target_feature), +            not(portable_atomic_no_outline_atomics), +            not(any(target_env = "sgx", miri)), +        ), +        all( +            target_arch = "powerpc64", +            feature = "fallback", +            not(portable_atomic_no_outline_atomics), +            portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default +            any( +                all( +                    target_os = "linux", +                    any( +                        target_env = "gnu", +                        all( +                            any(target_env = "musl", target_env = "ohos"), +                            not(target_feature = "crt-static"), +                        ), +                        portable_atomic_outline_atomics, +                    ), +                ), +                target_os = "android", +                target_os = "freebsd", +            ), +            not(any(miri, portable_atomic_sanitize_thread)), +        ), +        all( +            target_arch = "arm", +            not(portable_atomic_no_asm), +            any(target_os = "linux", target_os = "android"), +            not(portable_atomic_no_outline_atomics), +        ), +    ), +    allow(dead_code) +)] + +#[macro_use] +pub(crate) mod utils; + +// Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap +// around. +// +// In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be +// vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the +// counter will not be increased that fast. +// +// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, +// aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast, +// so use it to implement normal sequence lock. +cfg_has_fast_atomic_64! { +    mod seq_lock; +} +cfg_no_fast_atomic_64! { +    #[path = "seq_lock_wide.rs"] +    mod seq_lock; +} + +use core::{cell::UnsafeCell, mem, sync::atomic::Ordering}; + +use seq_lock::{SeqLock, SeqLockWriteGuard}; +use utils::CachePadded; + +// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, +// aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast, +// so use it to reduce chunks of byte-wise atomic memcpy. +use seq_lock::{AtomicChunk, Chunk}; + +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L969-L1016. +#[inline] +#[must_use] +fn lock(addr: usize) -> &'static SeqLock { +    // The number of locks is a prime number because we want to make sure `addr % LEN` gets +    // dispersed across all locks. +    // +    // crossbeam-utils 0.8.7 uses 97 here but does not use CachePadded, +    // so the actual concurrency level will be smaller. +    const LEN: usize = 67; +    #[allow(clippy::declare_interior_mutable_const)] +    const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new()); +    static LOCKS: [CachePadded<SeqLock>; LEN] = [ +        L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, +        L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, +        L, L, L, L, L, L, L, +    ]; + +    // If the modulus is a constant number, the compiler will use crazy math to transform this into +    // a sequence of cheap arithmetic operations rather than using the slow modulo instruction. +    &LOCKS[addr % LEN] +} + +macro_rules! atomic { +    ($atomic_type:ident, $int_type:ident, $align:literal) => { +        #[repr(C, align($align))] +        pub(crate) struct $atomic_type { +            v: UnsafeCell<$int_type>, +        } + +        impl $atomic_type { +            const LEN: usize = mem::size_of::<$int_type>() / mem::size_of::<Chunk>(); + +            #[inline] +            unsafe fn chunks(&self) -> &[AtomicChunk; Self::LEN] { +                static_assert!($atomic_type::LEN > 1); +                static_assert!(mem::size_of::<$int_type>() % mem::size_of::<Chunk>() == 0); + +                // SAFETY: the caller must uphold the safety contract for `chunks`. +                unsafe { &*(self.v.get() as *const $int_type as *const [AtomicChunk; Self::LEN]) } +            } + +            #[inline] +            fn optimistic_read(&self) -> $int_type { +                // Using `MaybeUninit<[usize; Self::LEN]>` here doesn't change codegen: https://godbolt.org/z/86f8s733M +                let mut dst: [Chunk; Self::LEN] = [0; Self::LEN]; +                // SAFETY: +                // - There are no threads that perform non-atomic concurrent write operations. +                // - There is no writer that updates the value using atomic operations of different granularity. +                // +                // If the atomic operation is not used here, it will cause a data race +                // when `write` performs concurrent write operation. +                // Such a data race is sometimes considered virtually unproblematic +                // in SeqLock implementations: +                // +                // - https://github.com/Amanieu/seqlock/issues/2 +                // - https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L1111-L1116 +                // - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/avoiding.20UB.20due.20to.20races.20by.20discarding.20result.3F +                // +                // However, in our use case, the implementation that loads/stores value as +                // chunks of usize is enough fast and sound, so we use that implementation. +                // +                // See also atomic-memcpy crate, a generic implementation of this pattern: +                // https://github.com/taiki-e/atomic-memcpy +                let chunks = unsafe { self.chunks() }; +                for i in 0..Self::LEN { +                    dst[i] = chunks[i].load(Ordering::Relaxed); +                } +                // SAFETY: integers are plain old data types so we can always transmute to them. +                unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) } +            } + +            #[inline] +            fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type { +                // This calls optimistic_read that can return teared value, but the resulting value +                // is guaranteed not to be teared because we hold the lock to write. +                self.optimistic_read() +            } + +            #[inline] +            fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) { +                // SAFETY: integers are plain old data types so we can always transmute them to arrays of integers. +                let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) }; +                // SAFETY: +                // - The guard guarantees that we hold the lock to write. +                // - There are no threads that perform non-atomic concurrent read or write operations. +                // +                // See optimistic_read for the reason that atomic operations are used here. +                let chunks = unsafe { self.chunks() }; +                for i in 0..Self::LEN { +                    chunks[i].store(val[i], Ordering::Relaxed); +                } +            } +        } + +        // Send is implicitly implemented. +        // SAFETY: any data races are prevented by the lock and atomic operation. +        unsafe impl Sync for $atomic_type {} + +        impl_default_no_fetch_ops!($atomic_type, $int_type); +        impl_default_bit_opts!($atomic_type, $int_type); +        impl $atomic_type { +            #[inline] +            pub(crate) const fn new(v: $int_type) -> Self { +                Self { v: UnsafeCell::new(v) } +            } + +            #[inline] +            pub(crate) fn is_lock_free() -> bool { +                Self::is_always_lock_free() +            } +            #[inline] +            pub(crate) const fn is_always_lock_free() -> bool { +                false +            } + +            #[inline] +            pub(crate) fn get_mut(&mut self) -> &mut $int_type { +                // SAFETY: the mutable reference guarantees unique ownership. +                // (UnsafeCell::get_mut requires Rust 1.50) +                unsafe { &mut *self.v.get() } +            } + +            #[inline] +            pub(crate) fn into_inner(self) -> $int_type { +                self.v.into_inner() +            } + +            #[inline] +            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] +            pub(crate) fn load(&self, order: Ordering) -> $int_type { +                crate::utils::assert_load_ordering(order); +                let lock = lock(self.v.get() as usize); + +                // Try doing an optimistic read first. +                if let Some(stamp) = lock.optimistic_read() { +                    let val = self.optimistic_read(); + +                    if lock.validate_read(stamp) { +                        return val; +                    } +                } + +                // Grab a regular write lock so that writers don't starve this load. +                let guard = lock.write(); +                let val = self.read(&guard); +                // The value hasn't been changed. Drop the guard without incrementing the stamp. +                guard.abort(); +                val +            } + +            #[inline] +            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] +            pub(crate) fn store(&self, val: $int_type, order: Ordering) { +                crate::utils::assert_store_ordering(order); +                let guard = lock(self.v.get() as usize).write(); +                self.write(val, &guard) +            } + +            #[inline] +            pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type { +                let guard = lock(self.v.get() as usize).write(); +                let prev = self.read(&guard); +                self.write(val, &guard); +                prev +            } + +            #[inline] +            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] +            pub(crate) fn compare_exchange( +                &self, +                current: $int_type, +                new: $int_type, +                success: Ordering, +                failure: Ordering, +            ) -> Result<$int_type, $int_type> { +                crate::utils::assert_compare_exchange_ordering(success, failure); +                let guard = lock(self.v.get() as usize).write(); +                let prev = self.read(&guard); +                if prev == current { +                    self.write(new, &guard); +                    Ok(prev) +                } else { +                    // The value hasn't been changed. Drop the guard without incrementing the stamp. +                    guard.abort(); +                    Err(prev) +                } +            } + +            #[inline] +            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] +            pub(crate) fn compare_exchange_weak( +                &self, +                current: $int_type, +                new: $int_type, +                success: Ordering, +                failure: Ordering, +            ) -> Result<$int_type, $int_type> { +                self.compare_exchange(current, new, success, failure) +            } + +            #[inline] +            pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type { +                let guard = lock(self.v.get() as usize).write(); +                let prev = self.read(&guard); +                self.write(prev.wrapping_add(val), &guard); +                prev +            } + +            #[inline] +            pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type { +                let guard = lock(self.v.get() as usize).write(); +                let prev = self.read(&guard); +                self.write(prev.wrapping_sub(val), &guard); +                prev +            } + +            #[inline] +            pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type { +                let guard = lock(self.v.get() as usize).write(); +                let prev = self.read(&guard); +                self.write(prev & val, &guard); +                prev +            } + +            #[inline] +            pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type { +                let guard = lock(self.v.get() as usize).write(); +                let prev = self.read(&guard); +                self.write(!(prev & val), &guard); +                prev +            } + +            #[inline] +            pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type { +                let guard = lock(self.v.get() as usize).write(); +                let prev = self.read(&guard); +                self.write(prev | val, &guard); +                prev +            } + +            #[inline] +            pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type { +                let guard = lock(self.v.get() as usize).write(); +                let prev = self.read(&guard); +                self.write(prev ^ val, &guard); +                prev +            } + +            #[inline] +            pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type { +                let guard = lock(self.v.get() as usize).write(); +                let prev = self.read(&guard); +                self.write(core::cmp::max(prev, val), &guard); +                prev +            } + +            #[inline] +            pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type { +                let guard = lock(self.v.get() as usize).write(); +                let prev = self.read(&guard); +                self.write(core::cmp::min(prev, val), &guard); +                prev +            } + +            #[inline] +            pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type { +                let guard = lock(self.v.get() as usize).write(); +                let prev = self.read(&guard); +                self.write(!prev, &guard); +                prev +            } +            #[inline] +            pub(crate) fn not(&self, order: Ordering) { +                self.fetch_not(order); +            } + +            #[inline] +            pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type { +                let guard = lock(self.v.get() as usize).write(); +                let prev = self.read(&guard); +                self.write(prev.wrapping_neg(), &guard); +                prev +            } +            #[inline] +            pub(crate) fn neg(&self, order: Ordering) { +                self.fetch_neg(order); +            } + +            #[inline] +            pub(crate) const fn as_ptr(&self) -> *mut $int_type { +                self.v.get() +            } +        } +    }; +} + +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_64)))] +#[cfg_attr( +    not(portable_atomic_no_cfg_target_has_atomic), +    cfg(any(test, not(target_has_atomic = "64"))) +)] +cfg_no_fast_atomic_64! { +    atomic!(AtomicI64, i64, 8); +    atomic!(AtomicU64, u64, 8); +} + +atomic!(AtomicI128, i128, 16); +atomic!(AtomicU128, u128, 16); + +#[cfg(test)] +mod tests { +    use super::*; + +    cfg_no_fast_atomic_64! { +        test_atomic_int!(i64); +        test_atomic_int!(u64); +    } +    test_atomic_int!(i128); +    test_atomic_int!(u128); + +    // load/store/swap implementation is not affected by signedness, so it is +    // enough to test only unsigned types. +    cfg_no_fast_atomic_64! { +        stress_test!(u64); +    } +    stress_test!(u128); +} diff --git a/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs b/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs new file mode 100644 index 0000000..895b60c --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Helper for outline-atomics. +// +// On architectures where DW atomics are not supported on older CPUs, we use +// fallback implementation when DW atomic instructions are not supported and +// outline-atomics is enabled. +// +// This module provides helpers to implement them. + +use core::sync::atomic::Ordering; + +#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))] +pub(crate) type Udw = u128; +#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))] +pub(crate) type AtomicUdw = super::super::fallback::AtomicU128; +#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))] +pub(crate) type AtomicIdw = super::super::fallback::AtomicI128; + +#[cfg(target_arch = "arm")] +pub(crate) type Udw = u64; +#[cfg(target_arch = "arm")] +pub(crate) type AtomicUdw = super::super::fallback::AtomicU64; +#[cfg(target_arch = "arm")] +pub(crate) type AtomicIdw = super::super::fallback::AtomicI64; + +// Asserts that the function is called in the correct context. +macro_rules! debug_assert_outline_atomics { +    () => { +        #[cfg(target_arch = "x86_64")] +        { +            debug_assert!(!super::detect::detect().has_cmpxchg16b()); +        } +        #[cfg(target_arch = "powerpc64")] +        { +            debug_assert!(!super::detect::detect().has_quadword_atomics()); +        } +        #[cfg(target_arch = "arm")] +        { +            debug_assert!(!super::has_kuser_cmpxchg64()); +        } +    }; +} + +#[cold] +pub(crate) unsafe fn atomic_load(src: *mut Udw, order: Ordering) -> Udw { +    debug_assert_outline_atomics!(); +    #[allow(clippy::cast_ptr_alignment)] +    // SAFETY: the caller must uphold the safety contract. +    unsafe { +        (*(src as *const AtomicUdw)).load(order) +    } +} +fn_alias! { +    #[cold] +    pub(crate) unsafe fn(src: *mut Udw) -> Udw; +    // fallback's atomic load has at least acquire semantics. +    #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] +    atomic_load_non_seqcst = atomic_load(Ordering::Acquire); +    atomic_load_seqcst = atomic_load(Ordering::SeqCst); +} + +#[cold] +pub(crate) unsafe fn atomic_store(dst: *mut Udw, val: Udw, order: Ordering) { +    debug_assert_outline_atomics!(); +    #[allow(clippy::cast_ptr_alignment)] +    // SAFETY: the caller must uphold the safety contract. +    unsafe { +        (*(dst as *const AtomicUdw)).store(val, order); +    } +} +fn_alias! { +    #[cold] +    pub(crate) unsafe fn(dst: *mut Udw, val: Udw); +    // fallback's atomic store has at least release semantics. +    #[cfg(not(target_arch = "arm"))] +    atomic_store_non_seqcst = atomic_store(Ordering::Release); +    atomic_store_seqcst = atomic_store(Ordering::SeqCst); +} + +#[cold] +pub(crate) unsafe fn atomic_compare_exchange( +    dst: *mut Udw, +    old: Udw, +    new: Udw, +    success: Ordering, +    failure: Ordering, +) -> (Udw, bool) { +    debug_assert_outline_atomics!(); +    #[allow(clippy::cast_ptr_alignment)] +    // SAFETY: the caller must uphold the safety contract. +    unsafe { +        match (*(dst as *const AtomicUdw)).compare_exchange(old, new, success, failure) { +            Ok(v) => (v, true), +            Err(v) => (v, false), +        } +    } +} +fn_alias! { +    #[cold] +    pub(crate) unsafe fn(dst: *mut Udw, old: Udw, new: Udw) -> (Udw, bool); +    // fallback's atomic CAS has at least AcqRel semantics. +    #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] +    atomic_compare_exchange_non_seqcst +        = atomic_compare_exchange(Ordering::AcqRel, Ordering::Acquire); +    atomic_compare_exchange_seqcst +        = atomic_compare_exchange(Ordering::SeqCst, Ordering::SeqCst); +} + +macro_rules! atomic_rmw_3 { +    ( +        $name:ident($atomic_type:ident::$method_name:ident), +        $non_seqcst_alias:ident, $seqcst_alias:ident +    ) => { +        #[cold] +        pub(crate) unsafe fn $name(dst: *mut Udw, val: Udw, order: Ordering) -> Udw { +            debug_assert_outline_atomics!(); +            #[allow(clippy::cast_ptr_alignment)] +            // SAFETY: the caller must uphold the safety contract. +            unsafe { +                (*(dst as *const $atomic_type)).$method_name(val as _, order) as Udw +            } +        } +        fn_alias! { +            #[cold] +            pub(crate) unsafe fn(dst: *mut Udw, val: Udw) -> Udw; +            // fallback's atomic RMW has at least AcqRel semantics. +            #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] +            $non_seqcst_alias = $name(Ordering::AcqRel); +            $seqcst_alias = $name(Ordering::SeqCst); +        } +    }; +} +macro_rules! atomic_rmw_2 { +    ( +        $name:ident($atomic_type:ident::$method_name:ident), +        $non_seqcst_alias:ident, $seqcst_alias:ident +    ) => { +        #[cold] +        pub(crate) unsafe fn $name(dst: *mut Udw, order: Ordering) -> Udw { +            debug_assert_outline_atomics!(); +            #[allow(clippy::cast_ptr_alignment)] +            // SAFETY: the caller must uphold the safety contract. +            unsafe { +                (*(dst as *const $atomic_type)).$method_name(order) as Udw +            } +        } +        fn_alias! { +            #[cold] +            pub(crate) unsafe fn(dst: *mut Udw) -> Udw; +            // fallback's atomic RMW has at least AcqRel semantics. +            #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] +            $non_seqcst_alias = $name(Ordering::AcqRel); +            $seqcst_alias = $name(Ordering::SeqCst); +        } +    }; +} + +atomic_rmw_3!(atomic_swap(AtomicUdw::swap), atomic_swap_non_seqcst, atomic_swap_seqcst); +atomic_rmw_3!(atomic_add(AtomicUdw::fetch_add), atomic_add_non_seqcst, atomic_add_seqcst); +atomic_rmw_3!(atomic_sub(AtomicUdw::fetch_sub), atomic_sub_non_seqcst, atomic_sub_seqcst); +atomic_rmw_3!(atomic_and(AtomicUdw::fetch_and), atomic_and_non_seqcst, atomic_and_seqcst); +atomic_rmw_3!(atomic_nand(AtomicUdw::fetch_nand), atomic_nand_non_seqcst, atomic_nand_seqcst); +atomic_rmw_3!(atomic_or(AtomicUdw::fetch_or), atomic_or_non_seqcst, atomic_or_seqcst); +atomic_rmw_3!(atomic_xor(AtomicUdw::fetch_xor), atomic_xor_non_seqcst, atomic_xor_seqcst); +atomic_rmw_3!(atomic_max(AtomicIdw::fetch_max), atomic_max_non_seqcst, atomic_max_seqcst); +atomic_rmw_3!(atomic_umax(AtomicUdw::fetch_max), atomic_umax_non_seqcst, atomic_umax_seqcst); +atomic_rmw_3!(atomic_min(AtomicIdw::fetch_min), atomic_min_non_seqcst, atomic_min_seqcst); +atomic_rmw_3!(atomic_umin(AtomicUdw::fetch_min), atomic_umin_non_seqcst, atomic_umin_seqcst); + +atomic_rmw_2!(atomic_not(AtomicUdw::fetch_not), atomic_not_non_seqcst, atomic_not_seqcst); +atomic_rmw_2!(atomic_neg(AtomicUdw::fetch_neg), atomic_neg_non_seqcst, atomic_neg_seqcst); diff --git a/vendor/portable-atomic/src/imp/fallback/seq_lock.rs b/vendor/portable-atomic/src/imp/fallback/seq_lock.rs new file mode 100644 index 0000000..fb6803f --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/seq_lock.rs @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock.rs. + +use core::{ +    mem::ManuallyDrop, +    sync::atomic::{self, Ordering}, +}; + +use super::utils::Backoff; + +// See mod.rs for details. +#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))] +pub(super) use core::sync::atomic::AtomicU64 as AtomicStamp; +#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))] +pub(super) use core::sync::atomic::AtomicUsize as AtomicStamp; +#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))] +pub(super) type Stamp = usize; +#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))] +pub(super) type Stamp = u64; + +// See mod.rs for details. +pub(super) type AtomicChunk = AtomicStamp; +pub(super) type Chunk = Stamp; + +/// A simple stamped lock. +pub(super) struct SeqLock { +    /// The current state of the lock. +    /// +    /// All bits except the least significant one hold the current stamp. When locked, the state +    /// equals 1 and doesn't contain a valid stamp. +    state: AtomicStamp, +} + +impl SeqLock { +    #[inline] +    pub(super) const fn new() -> Self { +        Self { state: AtomicStamp::new(0) } +    } + +    /// If not locked, returns the current stamp. +    /// +    /// This method should be called before optimistic reads. +    #[inline] +    pub(super) fn optimistic_read(&self) -> Option<Stamp> { +        let state = self.state.load(Ordering::Acquire); +        if state == 1 { +            None +        } else { +            Some(state) +        } +    } + +    /// Returns `true` if the current stamp is equal to `stamp`. +    /// +    /// This method should be called after optimistic reads to check whether they are valid. The +    /// argument `stamp` should correspond to the one returned by method `optimistic_read`. +    #[inline] +    pub(super) fn validate_read(&self, stamp: Stamp) -> bool { +        atomic::fence(Ordering::Acquire); +        self.state.load(Ordering::Relaxed) == stamp +    } + +    /// Grabs the lock for writing. +    #[inline] +    pub(super) fn write(&self) -> SeqLockWriteGuard<'_> { +        let mut backoff = Backoff::new(); +        loop { +            let previous = self.state.swap(1, Ordering::Acquire); + +            if previous != 1 { +                atomic::fence(Ordering::Release); + +                return SeqLockWriteGuard { lock: self, state: previous }; +            } + +            while self.state.load(Ordering::Relaxed) == 1 { +                backoff.snooze(); +            } +        } +    } +} + +/// An RAII guard that releases the lock and increments the stamp when dropped. +#[must_use] +pub(super) struct SeqLockWriteGuard<'a> { +    /// The parent lock. +    lock: &'a SeqLock, + +    /// The stamp before locking. +    state: Stamp, +} + +impl SeqLockWriteGuard<'_> { +    /// Releases the lock without incrementing the stamp. +    #[inline] +    pub(super) fn abort(self) { +        // We specifically don't want to call drop(), since that's +        // what increments the stamp. +        let this = ManuallyDrop::new(self); + +        // Restore the stamp. +        // +        // Release ordering for synchronizing with `optimistic_read`. +        this.lock.state.store(this.state, Ordering::Release); +    } +} + +impl Drop for SeqLockWriteGuard<'_> { +    #[inline] +    fn drop(&mut self) { +        // Release the lock and increment the stamp. +        // +        // Release ordering for synchronizing with `optimistic_read`. +        self.lock.state.store(self.state.wrapping_add(2), Ordering::Release); +    } +} + +#[cfg(test)] +mod tests { +    use super::SeqLock; + +    #[test] +    fn smoke() { +        let lock = SeqLock::new(); +        let before = lock.optimistic_read().unwrap(); +        assert!(lock.validate_read(before)); +        { +            let _guard = lock.write(); +        } +        assert!(!lock.validate_read(before)); +        let after = lock.optimistic_read().unwrap(); +        assert_ne!(before, after); +    } + +    #[test] +    fn test_abort() { +        let lock = SeqLock::new(); +        let before = lock.optimistic_read().unwrap(); +        { +            let guard = lock.write(); +            guard.abort(); +        } +        let after = lock.optimistic_read().unwrap(); +        assert_eq!(before, after, "aborted write does not update the stamp"); +    } +} diff --git a/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs b/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs new file mode 100644 index 0000000..e12996f --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock_wide.rs. + +use core::{ +    mem::ManuallyDrop, +    sync::atomic::{self, AtomicUsize, Ordering}, +}; + +use super::utils::Backoff; + +// See mod.rs for details. +pub(super) type AtomicChunk = AtomicUsize; +pub(super) type Chunk = usize; + +/// A simple stamped lock. +/// +/// The state is represented as two `AtomicUsize`: `state_hi` for high bits and `state_lo` for low +/// bits. +pub(super) struct SeqLock { +    /// The high bits of the current state of the lock. +    state_hi: AtomicUsize, + +    /// The low bits of the current state of the lock. +    /// +    /// All bits except the least significant one hold the current stamp. When locked, the state_lo +    /// equals 1 and doesn't contain a valid stamp. +    state_lo: AtomicUsize, +} + +impl SeqLock { +    #[inline] +    pub(super) const fn new() -> Self { +        Self { state_hi: AtomicUsize::new(0), state_lo: AtomicUsize::new(0) } +    } + +    /// If not locked, returns the current stamp. +    /// +    /// This method should be called before optimistic reads. +    #[inline] +    pub(super) fn optimistic_read(&self) -> Option<(usize, usize)> { +        // The acquire loads from `state_hi` and `state_lo` synchronize with the release stores in +        // `SeqLockWriteGuard::drop` and `SeqLockWriteGuard::abort`. +        // +        // As a consequence, we can make sure that (1) all writes within the era of `state_hi - 1` +        // happens before now; and therefore, (2) if `state_lo` is even, all writes within the +        // critical section of (`state_hi`, `state_lo`) happens before now. +        let state_hi = self.state_hi.load(Ordering::Acquire); +        let state_lo = self.state_lo.load(Ordering::Acquire); +        if state_lo == 1 { +            None +        } else { +            Some((state_hi, state_lo)) +        } +    } + +    /// Returns `true` if the current stamp is equal to `stamp`. +    /// +    /// This method should be called after optimistic reads to check whether they are valid. The +    /// argument `stamp` should correspond to the one returned by method `optimistic_read`. +    #[inline] +    pub(super) fn validate_read(&self, stamp: (usize, usize)) -> bool { +        // Thanks to the fence, if we're noticing any modification to the data at the critical +        // section of `(stamp.0, stamp.1)`, then the critical section's write of 1 to state_lo should be +        // visible. +        atomic::fence(Ordering::Acquire); + +        // So if `state_lo` coincides with `stamp.1`, then either (1) we're noticing no modification +        // to the data after the critical section of `(stamp.0, stamp.1)`, or (2) `state_lo` wrapped +        // around. +        // +        // If (2) is the case, the acquire ordering ensures we see the new value of `state_hi`. +        let state_lo = self.state_lo.load(Ordering::Acquire); + +        // If (2) is the case and `state_hi` coincides with `stamp.0`, then `state_hi` also wrapped +        // around, which we give up to correctly validate the read. +        let state_hi = self.state_hi.load(Ordering::Relaxed); + +        // Except for the case that both `state_hi` and `state_lo` wrapped around, the following +        // condition implies that we're noticing no modification to the data after the critical +        // section of `(stamp.0, stamp.1)`. +        (state_hi, state_lo) == stamp +    } + +    /// Grabs the lock for writing. +    #[inline] +    pub(super) fn write(&self) -> SeqLockWriteGuard<'_> { +        let mut backoff = Backoff::new(); +        loop { +            let previous = self.state_lo.swap(1, Ordering::Acquire); + +            if previous != 1 { +                // To synchronize with the acquire fence in `validate_read` via any modification to +                // the data at the critical section of `(state_hi, previous)`. +                atomic::fence(Ordering::Release); + +                return SeqLockWriteGuard { lock: self, state_lo: previous }; +            } + +            while self.state_lo.load(Ordering::Relaxed) == 1 { +                backoff.snooze(); +            } +        } +    } +} + +/// An RAII guard that releases the lock and increments the stamp when dropped. +#[must_use] +pub(super) struct SeqLockWriteGuard<'a> { +    /// The parent lock. +    lock: &'a SeqLock, + +    /// The stamp before locking. +    state_lo: usize, +} + +impl SeqLockWriteGuard<'_> { +    /// Releases the lock without incrementing the stamp. +    #[inline] +    pub(super) fn abort(self) { +        // We specifically don't want to call drop(), since that's +        // what increments the stamp. +        let this = ManuallyDrop::new(self); + +        // Restore the stamp. +        // +        // Release ordering for synchronizing with `optimistic_read`. +        this.lock.state_lo.store(this.state_lo, Ordering::Release); +    } +} + +impl Drop for SeqLockWriteGuard<'_> { +    #[inline] +    fn drop(&mut self) { +        let state_lo = self.state_lo.wrapping_add(2); + +        // Increase the high bits if the low bits wrap around. +        // +        // Release ordering for synchronizing with `optimistic_read`. +        if state_lo == 0 { +            let state_hi = self.lock.state_hi.load(Ordering::Relaxed); +            self.lock.state_hi.store(state_hi.wrapping_add(1), Ordering::Release); +        } + +        // Release the lock and increment the stamp. +        // +        // Release ordering for synchronizing with `optimistic_read`. +        self.lock.state_lo.store(state_lo, Ordering::Release); +    } +} + +#[cfg(test)] +mod tests { +    use super::SeqLock; + +    #[test] +    fn smoke() { +        let lock = SeqLock::new(); +        let before = lock.optimistic_read().unwrap(); +        assert!(lock.validate_read(before)); +        { +            let _guard = lock.write(); +        } +        assert!(!lock.validate_read(before)); +        let after = lock.optimistic_read().unwrap(); +        assert_ne!(before, after); +    } + +    #[test] +    fn test_abort() { +        let lock = SeqLock::new(); +        let before = lock.optimistic_read().unwrap(); +        { +            let guard = lock.write(); +            guard.abort(); +        } +        let after = lock.optimistic_read().unwrap(); +        assert_eq!(before, after, "aborted write does not update the stamp"); +    } +} diff --git a/vendor/portable-atomic/src/imp/fallback/utils.rs b/vendor/portable-atomic/src/imp/fallback/utils.rs new file mode 100644 index 0000000..e8ed0ba --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/utils.rs @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use core::ops; + +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/9384f1eb2b356364e201ad38545e03c837d55f3a/crossbeam-utils/src/cache_padded.rs. +/// Pads and aligns a value to the length of a cache line. +// Starting from Intel's Sandy Bridge, spatial prefetcher is now pulling pairs of 64-byte cache +// lines at a time, so we have to align to 128 bytes rather than 64. +// +// Sources: +// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf +// - https://github.com/facebook/folly/blob/1b5288e6eea6df074758f877c849b6e73bbb9fbb/folly/lang/Align.h#L107 +// +// ARM's big.LITTLE architecture has asymmetric cores and "big" cores have 128-byte cache line size. +// +// Sources: +// - https://www.mono-project.com/news/2016/09/12/arm64-icache/ +// +// powerpc64 has 128-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/powerpc/include/asm/cache.h#L26 +#[cfg_attr( +    any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64"), +    repr(align(128)) +)] +// arm, mips, mips64, sparc, and hexagon have 32-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_arm.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mipsle.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips64x.go#L9 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L17 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/hexagon/include/asm/cache.h#L12 +#[cfg_attr( +    any( +        target_arch = "arm", +        target_arch = "mips", +        target_arch = "mips32r6", +        target_arch = "mips64", +        target_arch = "mips64r6", +        target_arch = "sparc", +        target_arch = "hexagon", +    ), +    repr(align(32)) +)] +// m68k has 16-byte cache line size. +// +// Sources: +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/m68k/include/asm/cache.h#L9 +#[cfg_attr(target_arch = "m68k", repr(align(16)))] +// s390x has 256-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_s390x.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/s390/include/asm/cache.h#L13 +#[cfg_attr(target_arch = "s390x", repr(align(256)))] +// x86, wasm, riscv, and sparc64 have 64-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/dda2991c2ea0c5914714469c4defc2562a907230/src/internal/cpu/cpu_x86.go#L9 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_wasm.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/riscv/include/asm/cache.h#L10 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L19 +// +// All others are assumed to have 64-byte cache line size. +#[cfg_attr( +    not(any( +        target_arch = "x86_64", +        target_arch = "aarch64", +        target_arch = "powerpc64", +        target_arch = "arm", +        target_arch = "mips", +        target_arch = "mips32r6", +        target_arch = "mips64", +        target_arch = "mips64r6", +        target_arch = "sparc", +        target_arch = "hexagon", +        target_arch = "m68k", +        target_arch = "s390x", +    )), +    repr(align(64)) +)] +pub(crate) struct CachePadded<T> { +    value: T, +} + +impl<T> CachePadded<T> { +    #[inline] +    pub(crate) const fn new(value: T) -> Self { +        Self { value } +    } +} + +impl<T> ops::Deref for CachePadded<T> { +    type Target = T; + +    #[inline] +    fn deref(&self) -> &Self::Target { +        &self.value +    } +} + +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/backoff.rs. +// Adjusted to reduce spinning. +/// Performs exponential backoff in spin loops. +pub(crate) struct Backoff { +    step: u32, +} + +// https://github.com/oneapi-src/oneTBB/blob/v2021.5.0/include/oneapi/tbb/detail/_utils.h#L46-L48 +const SPIN_LIMIT: u32 = 4; + +impl Backoff { +    #[inline] +    pub(crate) const fn new() -> Self { +        Self { step: 0 } +    } + +    #[inline] +    pub(crate) fn snooze(&mut self) { +        if self.step <= SPIN_LIMIT { +            for _ in 0..1 << self.step { +                #[allow(deprecated)] +                core::sync::atomic::spin_loop_hint(); +            } +            self.step += 1; +        } else { +            #[cfg(not(feature = "std"))] +            for _ in 0..1 << self.step { +                #[allow(deprecated)] +                core::sync::atomic::spin_loop_hint(); +            } + +            #[cfg(feature = "std")] +            std::thread::yield_now(); +        } +    } +} | 
