aboutsummaryrefslogtreecommitdiff
path: root/vendor/portable-atomic/src/imp/atomic128/aarch64.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/portable-atomic/src/imp/atomic128/aarch64.rs')
-rw-r--r--vendor/portable-atomic/src/imp/atomic128/aarch64.rs1708
1 files changed, 1708 insertions, 0 deletions
diff --git a/vendor/portable-atomic/src/imp/atomic128/aarch64.rs b/vendor/portable-atomic/src/imp/atomic128/aarch64.rs
new file mode 100644
index 0000000..32528a7
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/aarch64.rs
@@ -0,0 +1,1708 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic{I,U}128 implementation on AArch64.
+//
+// There are a few ways to implement 128-bit atomic operations in AArch64.
+//
+// - LDXP/STXP loop (DW LL/SC)
+// - CASP (DWCAS) added as FEAT_LSE (mandatory from armv8.1-a)
+// - LDP/STP (DW load/store) if FEAT_LSE2 (optional from armv8.2-a, mandatory from armv8.4-a) is available
+// - LDIAPP/STILP (DW acquire-load/release-store) added as FEAT_LRCPC3 (optional from armv8.9-a/armv9.4-a) (if FEAT_LSE2 is also available)
+// - LDCLRP/LDSETP/SWPP (DW RMW) added as FEAT_LSE128 (optional from armv9.4-a)
+//
+// If outline-atomics is not enabled and FEAT_LSE is not available at
+// compile-time, we use LDXP/STXP loop.
+// If outline-atomics is enabled and FEAT_LSE is not available at
+// compile-time, we use CASP for CAS if FEAT_LSE is available
+// at run-time, otherwise, use LDXP/STXP loop.
+// If FEAT_LSE is available at compile-time, we use CASP for load/store/CAS/RMW.
+// However, when portable_atomic_ll_sc_rmw cfg is set, use LDXP/STXP loop instead of CASP
+// loop for RMW (by default, it is set on Apple hardware; see build script for details).
+// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store.
+// If FEAT_LSE128 is available at compile-time, we use LDCLRP/LDSETP/SWPP for fetch_and/fetch_or/swap/{release,seqcst}-store.
+// If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store.
+//
+// Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2.
+//
+// Note that we do not separate LL and SC into separate functions, but handle
+// them within a single asm block. This is because it is theoretically possible
+// for the compiler to insert operations that might clear the reservation between
+// LL and SC. Considering the type of operations we are providing and the fact
+// that [progress64](https://github.com/ARM-software/progress64) uses such code,
+// this is probably not a problem for aarch64, but it seems that aarch64 doesn't
+// guarantee it and hexagon is the only architecture with hardware guarantees
+// that such code works. See also:
+//
+// - https://yarchive.net/comp/linux/cmpxchg_ll_sc_portability.html
+// - https://lists.llvm.org/pipermail/llvm-dev/2016-May/099490.html
+// - https://lists.llvm.org/pipermail/llvm-dev/2018-June/123993.html
+//
+// Also, even when using a CAS loop to implement atomic RMW, include the loop itself
+// in the asm block because it is more efficient for some codegen backends.
+// https://github.com/rust-lang/compiler-builtins/issues/339#issuecomment-1191260474
+//
+// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
+// this module and use intrinsics.rs instead.
+//
+// Refs:
+// - ARM Compiler armasm User Guide
+// https://developer.arm.com/documentation/dui0801/latest
+// - Arm A-profile A64 Instruction Set Architecture
+// https://developer.arm.com/documentation/ddi0602/latest
+// - Arm Architecture Reference Manual for A-profile architecture
+// https://developer.arm.com/documentation/ddi0487/latest
+// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
+//
+// Generated asm:
+// - aarch64 https://godbolt.org/z/5Mz1E33vz
+// - aarch64 msvc https://godbolt.org/z/P53d1MsGY
+// - aarch64 (+lse) https://godbolt.org/z/qvaE8n79K
+// - aarch64 msvc (+lse) https://godbolt.org/z/dj4aYerfr
+// - aarch64 (+lse,+lse2) https://godbolt.org/z/1E15jjxah
+// - aarch64 (+lse,+lse2,+rcpc3) https://godbolt.org/z/YreM4n84o
+// - aarch64 (+lse2,+lse128) https://godbolt.org/z/Kfeqs54ox
+// - aarch64 (+lse2,+lse128,+rcpc3) https://godbolt.org/z/n6zhjE77s
+
+include!("macros.rs");
+
+// On musl with static linking, it seems that getauxval is not always available.
+// See detect/auxv.rs for more.
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(
+ test,
+ not(all(
+ any(target_feature = "lse2", portable_atomic_target_feature = "lse2"),
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ )),
+))]
+#[cfg(any(
+ all(
+ target_os = "linux",
+ any(
+ target_env = "gnu",
+ all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")),
+ portable_atomic_outline_atomics,
+ ),
+ ),
+ target_os = "android",
+ target_os = "freebsd",
+))]
+#[path = "detect/auxv.rs"]
+mod detect;
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg_attr(
+ target_os = "netbsd",
+ cfg(any(
+ test,
+ not(all(
+ any(target_feature = "lse2", portable_atomic_target_feature = "lse2"),
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ )),
+ ))
+)]
+#[cfg_attr(
+ target_os = "openbsd",
+ cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))
+)]
+#[cfg(any(target_os = "netbsd", target_os = "openbsd"))]
+#[path = "detect/aarch64_aa64reg.rs"]
+mod detect;
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
+#[cfg(target_os = "fuchsia")]
+#[path = "detect/aarch64_fuchsia.rs"]
+mod detect;
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
+#[cfg(target_os = "windows")]
+#[path = "detect/aarch64_windows.rs"]
+mod detect;
+
+// test only
+#[cfg(test)]
+#[cfg(not(qemu))]
+#[cfg(not(valgrind))]
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(target_os = "linux", target_os = "android", target_os = "freebsd"))]
+#[path = "detect/aarch64_aa64reg.rs"]
+mod detect_aa64reg;
+#[cfg(test)]
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(target_os = "macos")]
+#[path = "detect/aarch64_macos.rs"]
+mod detect_macos;
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+use core::sync::atomic::Ordering;
+
+use crate::utils::{Pair, U128};
+
+#[cfg(any(
+ target_feature = "lse",
+ portable_atomic_target_feature = "lse",
+ not(portable_atomic_no_outline_atomics),
+))]
+macro_rules! debug_assert_lse {
+ () => {
+ #[cfg(all(
+ not(portable_atomic_no_outline_atomics),
+ any(
+ all(
+ target_os = "linux",
+ any(
+ target_env = "gnu",
+ all(
+ any(target_env = "musl", target_env = "ohos"),
+ not(target_feature = "crt-static"),
+ ),
+ portable_atomic_outline_atomics,
+ ),
+ ),
+ target_os = "android",
+ target_os = "freebsd",
+ target_os = "netbsd",
+ target_os = "openbsd",
+ target_os = "fuchsia",
+ target_os = "windows",
+ ),
+ ))]
+ #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
+ {
+ debug_assert!(detect::detect().has_lse());
+ }
+ };
+}
+#[rustfmt::skip]
+#[cfg(any(
+ target_feature = "lse2",
+ portable_atomic_target_feature = "lse2",
+ not(portable_atomic_no_outline_atomics),
+))]
+macro_rules! debug_assert_lse2 {
+ () => {
+ #[cfg(all(
+ not(portable_atomic_no_outline_atomics),
+ any(
+ all(
+ target_os = "linux",
+ any(
+ target_env = "gnu",
+ all(
+ any(target_env = "musl", target_env = "ohos"),
+ not(target_feature = "crt-static"),
+ ),
+ portable_atomic_outline_atomics,
+ ),
+ ),
+ target_os = "android",
+ target_os = "freebsd",
+ target_os = "netbsd",
+ // These don't support detection of FEAT_LSE2.
+ // target_os = "openbsd",
+ // target_os = "fuchsia",
+ // target_os = "windows",
+ ),
+ ))]
+ #[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))]
+ {
+ debug_assert!(detect::detect().has_lse2());
+ }
+ };
+}
+
+// Refs: https://developer.arm.com/documentation/100067/0612/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en
+//
+// This is similar to #[target_feature(enable = "lse")], except that there are
+// no compiler guarantees regarding (un)inlining, and the scope is within an asm
+// block rather than a function. We use this directive to support outline-atomics
+// on pre-1.61 rustc (aarch64_target_feature stabilized in Rust 1.61).
+//
+// The .arch_extension directive is effective until the end of the assembly block and
+// is not propagated to subsequent code, so the end_lse macro is unneeded.
+// https://godbolt.org/z/4oMEW8vWc
+// https://github.com/torvalds/linux/commit/e0d5896bd356cd577f9710a02d7a474cdf58426b
+// https://github.com/torvalds/linux/commit/dd1f6308b28edf0452dd5dc7877992903ec61e69
+// (It seems GCC effectively ignores this directive and always allow FEAT_LSE instructions: https://godbolt.org/z/W9W6rensG)
+//
+// The .arch directive has a similar effect, but we don't use it due to the following issue:
+// https://github.com/torvalds/linux/commit/dd1f6308b28edf0452dd5dc7877992903ec61e69
+//
+// This is also needed for compatibility with rustc_codegen_cranelift:
+// https://github.com/rust-lang/rustc_codegen_cranelift/issues/1400#issuecomment-1774599775
+//
+// Note: If FEAT_LSE is not available at compile-time, we must guarantee that
+// the function that uses it is not inlined into a function where it is not
+// clear whether FEAT_LSE is available. Otherwise, (even if we checked whether
+// FEAT_LSE is available at run-time) optimizations that reorder its
+// instructions across the if condition might introduce undefined behavior.
+// (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts)
+// However, our code uses the ifunc helper macro that works with function pointers,
+// so we don't have to worry about this unless calling without helper macro.
+#[cfg(any(
+ target_feature = "lse",
+ portable_atomic_target_feature = "lse",
+ not(portable_atomic_no_outline_atomics),
+))]
+macro_rules! start_lse {
+ () => {
+ ".arch_extension lse"
+ };
+}
+
+#[cfg(target_endian = "little")]
+macro_rules! select_le_or_be {
+ ($le:expr, $be:expr) => {
+ $le
+ };
+}
+#[cfg(target_endian = "big")]
+macro_rules! select_le_or_be {
+ ($le:expr, $be:expr) => {
+ $be
+ };
+}
+
+macro_rules! atomic_rmw {
+ ($op:ident, $order:ident) => {
+ atomic_rmw!($op, $order, write = $order)
+ };
+ ($op:ident, $order:ident, write = $write:ident) => {
+ match $order {
+ Ordering::Relaxed => $op!("", "", ""),
+ Ordering::Acquire => $op!("a", "", ""),
+ Ordering::Release => $op!("", "l", ""),
+ Ordering::AcqRel => $op!("a", "l", ""),
+ // In MSVC environments, SeqCst stores/writes needs fences after writes.
+ // https://reviews.llvm.org/D141748
+ #[cfg(target_env = "msvc")]
+ Ordering::SeqCst if $write == Ordering::SeqCst => $op!("a", "l", "dmb ish"),
+ // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
+ Ordering::SeqCst => $op!("a", "l", ""),
+ _ => unreachable!("{:?}", $order),
+ }
+ };
+}
+
+// cfg guarantee that the CPU supports FEAT_LSE2.
+#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))]
+use _atomic_load_ldp as atomic_load;
+#[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))]
+#[inline]
+unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
+ #[inline]
+ unsafe fn atomic_load_no_lse2(src: *mut u128, order: Ordering) -> u128 {
+ #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+ // SAFETY: the caller must uphold the safety contract.
+ // cfg guarantee that the CPU supports FEAT_LSE.
+ unsafe {
+ _atomic_load_casp(src, order)
+ }
+ #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ _atomic_load_ldxp_stxp(src, order)
+ }
+ }
+ #[cfg(not(all(
+ not(portable_atomic_no_outline_atomics),
+ any(
+ all(
+ target_os = "linux",
+ any(
+ target_env = "gnu",
+ all(
+ any(target_env = "musl", target_env = "ohos"),
+ not(target_feature = "crt-static"),
+ ),
+ portable_atomic_outline_atomics,
+ ),
+ ),
+ target_os = "android",
+ target_os = "freebsd",
+ target_os = "netbsd",
+ // These don't support detection of FEAT_LSE2.
+ // target_os = "openbsd",
+ // target_os = "fuchsia",
+ // target_os = "windows",
+ ),
+ )))]
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ atomic_load_no_lse2(src, order)
+ }
+ #[cfg(all(
+ not(portable_atomic_no_outline_atomics),
+ any(
+ all(
+ target_os = "linux",
+ any(
+ target_env = "gnu",
+ all(
+ any(target_env = "musl", target_env = "ohos"),
+ not(target_feature = "crt-static"),
+ ),
+ portable_atomic_outline_atomics,
+ ),
+ ),
+ target_os = "android",
+ target_os = "freebsd",
+ target_os = "netbsd",
+ // These don't support detection of FEAT_LSE2.
+ // target_os = "openbsd",
+ // target_os = "fuchsia",
+ // target_os = "windows",
+ ),
+ ))]
+ {
+ fn_alias! {
+ // inline(never) is just a hint and also not strictly necessary
+ // because we use ifunc helper macro, but used for clarity.
+ #[inline(never)]
+ unsafe fn(src: *mut u128) -> u128;
+ atomic_load_lse2_relaxed = _atomic_load_ldp(Ordering::Relaxed);
+ atomic_load_lse2_acquire = _atomic_load_ldp(Ordering::Acquire);
+ atomic_load_lse2_seqcst = _atomic_load_ldp(Ordering::SeqCst);
+ }
+ fn_alias! {
+ unsafe fn(src: *mut u128) -> u128;
+ atomic_load_no_lse2_relaxed = atomic_load_no_lse2(Ordering::Relaxed);
+ atomic_load_no_lse2_acquire = atomic_load_no_lse2(Ordering::Acquire);
+ atomic_load_no_lse2_seqcst = atomic_load_no_lse2(Ordering::SeqCst);
+ }
+ // SAFETY: the caller must uphold the safety contract.
+ // and we've checked if FEAT_LSE2 is available.
+ unsafe {
+ match order {
+ Ordering::Relaxed => {
+ ifunc!(unsafe fn(src: *mut u128) -> u128 {
+ let cpuinfo = detect::detect();
+ if cpuinfo.has_lse2() {
+ atomic_load_lse2_relaxed
+ } else {
+ atomic_load_no_lse2_relaxed
+ }
+ })
+ }
+ Ordering::Acquire => {
+ ifunc!(unsafe fn(src: *mut u128) -> u128 {
+ let cpuinfo = detect::detect();
+ if cpuinfo.has_lse2() {
+ atomic_load_lse2_acquire
+ } else {
+ atomic_load_no_lse2_acquire
+ }
+ })
+ }
+ Ordering::SeqCst => {
+ ifunc!(unsafe fn(src: *mut u128) -> u128 {
+ let cpuinfo = detect::detect();
+ if cpuinfo.has_lse2() {
+ atomic_load_lse2_seqcst
+ } else {
+ atomic_load_no_lse2_seqcst
+ }
+ })
+ }
+ _ => unreachable!("{:?}", order),
+ }
+ }
+ }
+}
+// If CPU supports FEAT_LSE2, LDP/LDIAPP is single-copy atomic reads,
+// otherwise it is two single-copy atomic reads.
+// Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile
+#[cfg(any(
+ target_feature = "lse2",
+ portable_atomic_target_feature = "lse2",
+ not(portable_atomic_no_outline_atomics),
+))]
+#[inline]
+unsafe fn _atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 {
+ debug_assert!(src as usize % 16 == 0);
+ debug_assert_lse2!();
+
+ // SAFETY: the caller must guarantee that `dst` is valid for reads,
+ // 16-byte aligned, that there are no concurrent non-atomic operations.
+ //
+ // Refs:
+ // - LDP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDP--A64-
+ unsafe {
+ let (out_lo, out_hi);
+ macro_rules! atomic_load_relaxed {
+ ($acquire:tt $(, $readonly:tt)?) => {
+ asm!(
+ "ldp {out_lo}, {out_hi}, [{src}]",
+ $acquire,
+ src = in(reg) ptr_reg!(src),
+ out_hi = lateout(reg) out_hi,
+ out_lo = lateout(reg) out_lo,
+ options(nostack, preserves_flags $(, $readonly)?),
+ )
+ };
+ }
+ match order {
+ Ordering::Relaxed => atomic_load_relaxed!("", readonly),
+ #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))]
+ Ordering::Acquire => {
+ // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
+ // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-Pair-of-registers-
+ asm!(
+ "ldiapp {out_lo}, {out_hi}, [{src}]",
+ src = in(reg) ptr_reg!(src),
+ out_hi = lateout(reg) out_hi,
+ out_lo = lateout(reg) out_lo,
+ options(nostack, preserves_flags),
+ );
+ }
+ #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
+ Ordering::Acquire => atomic_load_relaxed!("dmb ishld"),
+ Ordering::SeqCst => {
+ asm!(
+ // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp.
+ // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891 for details.
+ "ldar {tmp}, [{src}]",
+ "ldp {out_lo}, {out_hi}, [{src}]",
+ "dmb ishld",
+ src = in(reg) ptr_reg!(src),
+ out_hi = lateout(reg) out_hi,
+ out_lo = lateout(reg) out_lo,
+ tmp = out(reg) _,
+ options(nostack, preserves_flags),
+ );
+ }
+ _ => unreachable!("{:?}", order),
+ }
+ U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
+ }
+}
+// Do not use _atomic_compare_exchange_casp because it needs extra MOV to implement load.
+#[cfg(any(test, not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))))]
+#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+#[inline]
+unsafe fn _atomic_load_casp(src: *mut u128, order: Ordering) -> u128 {
+ debug_assert!(src as usize % 16 == 0);
+ debug_assert_lse!();
+
+ // SAFETY: the caller must uphold the safety contract.
+ // cfg guarantee that the CPU supports FEAT_LSE.
+ unsafe {
+ let (out_lo, out_hi);
+ macro_rules! atomic_load {
+ ($acquire:tt, $release:tt) => {
+ asm!(
+ start_lse!(),
+ concat!("casp", $acquire, $release, " x2, x3, x2, x3, [{src}]"),
+ src = in(reg) ptr_reg!(src),
+ // must be allocated to even/odd register pair
+ inout("x2") 0_u64 => out_lo,
+ inout("x3") 0_u64 => out_hi,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ match order {
+ Ordering::Relaxed => atomic_load!("", ""),
+ Ordering::Acquire => atomic_load!("a", ""),
+ Ordering::SeqCst => atomic_load!("a", "l"),
+ _ => unreachable!("{:?}", order),
+ }
+ U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
+ }
+}
+#[cfg(any(
+ test,
+ all(
+ not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")),
+ not(any(target_feature = "lse", portable_atomic_target_feature = "lse")),
+ ),
+))]
+#[inline]
+unsafe fn _atomic_load_ldxp_stxp(src: *mut u128, order: Ordering) -> u128 {
+ debug_assert!(src as usize % 16 == 0);
+
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ let (mut out_lo, mut out_hi);
+ macro_rules! atomic_load {
+ ($acquire:tt, $release:tt) => {
+ asm!(
+ "2:",
+ concat!("ld", $acquire, "xp {out_lo}, {out_hi}, [{src}]"),
+ concat!("st", $release, "xp {r:w}, {out_lo}, {out_hi}, [{src}]"),
+ // 0 if the store was successful, 1 if no store was performed
+ "cbnz {r:w}, 2b",
+ src = in(reg) ptr_reg!(src),
+ out_lo = out(reg) out_lo,
+ out_hi = out(reg) out_hi,
+ r = out(reg) _,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ match order {
+ Ordering::Relaxed => atomic_load!("", ""),
+ Ordering::Acquire => atomic_load!("a", ""),
+ Ordering::SeqCst => atomic_load!("a", "l"),
+ _ => unreachable!("{:?}", order),
+ }
+ U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
+ }
+}
+
+// cfg guarantee that the CPU supports FEAT_LSE2.
+#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))]
+use _atomic_store_stp as atomic_store;
+#[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))]
+#[inline]
+unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
+ #[inline]
+ unsafe fn atomic_store_no_lse2(dst: *mut u128, val: u128, order: Ordering) {
+ // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
+ // we use CAS-based atomic RMW.
+ #[cfg(all(
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ not(portable_atomic_ll_sc_rmw),
+ ))]
+ // SAFETY: the caller must uphold the safety contract.
+ // cfg guarantee that the CPU supports FEAT_LSE.
+ unsafe {
+ _atomic_swap_casp(dst, val, order);
+ }
+ #[cfg(not(all(
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ not(portable_atomic_ll_sc_rmw),
+ )))]
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ _atomic_store_ldxp_stxp(dst, val, order);
+ }
+ }
+ #[cfg(not(all(
+ not(portable_atomic_no_outline_atomics),
+ any(
+ all(
+ target_os = "linux",
+ any(
+ target_env = "gnu",
+ all(
+ any(target_env = "musl", target_env = "ohos"),
+ not(target_feature = "crt-static"),
+ ),
+ portable_atomic_outline_atomics,
+ ),
+ ),
+ target_os = "android",
+ target_os = "freebsd",
+ target_os = "netbsd",
+ // These don't support detection of FEAT_LSE2.
+ // target_os = "openbsd",
+ // target_os = "fuchsia",
+ // target_os = "windows",
+ ),
+ )))]
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ atomic_store_no_lse2(dst, val, order);
+ }
+ #[cfg(all(
+ not(portable_atomic_no_outline_atomics),
+ any(
+ all(
+ target_os = "linux",
+ any(
+ target_env = "gnu",
+ all(
+ any(target_env = "musl", target_env = "ohos"),
+ not(target_feature = "crt-static"),
+ ),
+ portable_atomic_outline_atomics,
+ ),
+ ),
+ target_os = "android",
+ target_os = "freebsd",
+ target_os = "netbsd",
+ // These don't support detection of FEAT_LSE2.
+ // target_os = "openbsd",
+ // target_os = "fuchsia",
+ // target_os = "windows",
+ ),
+ ))]
+ {
+ fn_alias! {
+ // inline(never) is just a hint and also not strictly necessary
+ // because we use ifunc helper macro, but used for clarity.
+ #[inline(never)]
+ unsafe fn(dst: *mut u128, val: u128);
+ atomic_store_lse2_relaxed = _atomic_store_stp(Ordering::Relaxed);
+ atomic_store_lse2_release = _atomic_store_stp(Ordering::Release);
+ atomic_store_lse2_seqcst = _atomic_store_stp(Ordering::SeqCst);
+ }
+ fn_alias! {
+ unsafe fn(dst: *mut u128, val: u128);
+ atomic_store_no_lse2_relaxed = atomic_store_no_lse2(Ordering::Relaxed);
+ atomic_store_no_lse2_release = atomic_store_no_lse2(Ordering::Release);
+ atomic_store_no_lse2_seqcst = atomic_store_no_lse2(Ordering::SeqCst);
+ }
+ // SAFETY: the caller must uphold the safety contract.
+ // and we've checked if FEAT_LSE2 is available.
+ unsafe {
+ match order {
+ Ordering::Relaxed => {
+ ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+ let cpuinfo = detect::detect();
+ if cpuinfo.has_lse2() {
+ atomic_store_lse2_relaxed
+ } else {
+ atomic_store_no_lse2_relaxed
+ }
+ });
+ }
+ Ordering::Release => {
+ ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+ let cpuinfo = detect::detect();
+ if cpuinfo.has_lse2() {
+ atomic_store_lse2_release
+ } else {
+ atomic_store_no_lse2_release
+ }
+ });
+ }
+ Ordering::SeqCst => {
+ ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+ let cpuinfo = detect::detect();
+ if cpuinfo.has_lse2() {
+ atomic_store_lse2_seqcst
+ } else {
+ atomic_store_no_lse2_seqcst
+ }
+ });
+ }
+ _ => unreachable!("{:?}", order),
+ }
+ }
+ }
+}
+// If CPU supports FEAT_LSE2, STP/STILP is single-copy atomic writes,
+// otherwise it is two single-copy atomic writes.
+// Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile
+#[cfg(any(
+ target_feature = "lse2",
+ portable_atomic_target_feature = "lse2",
+ not(portable_atomic_no_outline_atomics),
+))]
+#[inline]
+unsafe fn _atomic_store_stp(dst: *mut u128, val: u128, order: Ordering) {
+ debug_assert!(dst as usize % 16 == 0);
+ debug_assert_lse2!();
+
+ // SAFETY: the caller must guarantee that `dst` is valid for writes,
+ // 16-byte aligned, that there are no concurrent non-atomic operations.
+ //
+ // Refs:
+ // - STP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STP--A64-
+ unsafe {
+ #[rustfmt::skip]
+ macro_rules! atomic_store {
+ ($acquire:tt, $release:tt) => {{
+ let val = U128 { whole: val };
+ asm!(
+ $release,
+ "stp {val_lo}, {val_hi}, [{dst}]",
+ $acquire,
+ dst = in(reg) ptr_reg!(dst),
+ val_lo = in(reg) val.pair.lo,
+ val_hi = in(reg) val.pair.hi,
+ options(nostack, preserves_flags),
+ );
+ }};
+ }
+ match order {
+ Ordering::Relaxed => atomic_store!("", ""),
+ #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))]
+ Ordering::Release => {
+ let val = U128 { whole: val };
+ // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
+ // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/STILP--Store-Release-ordered-Pair-of-registers-
+ asm!(
+ "stilp {val_lo}, {val_hi}, [{dst}]",
+ dst = in(reg) ptr_reg!(dst),
+ val_lo = in(reg) val.pair.lo,
+ val_hi = in(reg) val.pair.hi,
+ options(nostack, preserves_flags),
+ );
+ }
+ #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
+ #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+ Ordering::Release => {
+ // Use swpp if stp requires fences.
+ // https://reviews.llvm.org/D143506
+ // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
+ _atomic_swap_swpp(dst, val, order);
+ }
+ #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
+ #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+ Ordering::Release => atomic_store!("", "dmb ish"),
+ #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+ Ordering::SeqCst => {
+ // Use swpp if stp requires fences.
+ // https://reviews.llvm.org/D143506
+ // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
+ _atomic_swap_swpp(dst, val, order);
+ }
+ #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+ Ordering::SeqCst => atomic_store!("dmb ish", "dmb ish"),
+ _ => unreachable!("{:?}", order),
+ }
+ }
+}
+// Do not use _atomic_swap_ldxp_stxp because it needs extra registers to implement store.
+#[cfg(any(
+ test,
+ not(all(
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ not(portable_atomic_ll_sc_rmw),
+ ))
+))]
+#[inline]
+unsafe fn _atomic_store_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) {
+ debug_assert!(dst as usize % 16 == 0);
+
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ let val = U128 { whole: val };
+ macro_rules! store {
+ ($acquire:tt, $release:tt, $fence:tt) => {
+ asm!(
+ "2:",
+ concat!("ld", $acquire, "xp xzr, {tmp}, [{dst}]"),
+ concat!("st", $release, "xp {tmp:w}, {val_lo}, {val_hi}, [{dst}]"),
+ // 0 if the store was successful, 1 if no store was performed
+ "cbnz {tmp:w}, 2b",
+ $fence,
+ dst = in(reg) ptr_reg!(dst),
+ val_lo = in(reg) val.pair.lo,
+ val_hi = in(reg) val.pair.hi,
+ tmp = out(reg) _,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ atomic_rmw!(store, order);
+ }
+}
+
+#[inline]
+unsafe fn atomic_compare_exchange(
+ dst: *mut u128,
+ old: u128,
+ new: u128,
+ success: Ordering,
+ failure: Ordering,
+) -> Result<u128, u128> {
+ #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+ // SAFETY: the caller must uphold the safety contract.
+ // cfg guarantee that the CPU supports FEAT_LSE.
+ let prev = unsafe { _atomic_compare_exchange_casp(dst, old, new, success, failure) };
+ #[cfg(not(all(
+ not(portable_atomic_no_outline_atomics),
+ any(
+ all(
+ target_os = "linux",
+ any(
+ target_env = "gnu",
+ all(
+ any(target_env = "musl", target_env = "ohos"),
+ not(target_feature = "crt-static"),
+ ),
+ portable_atomic_outline_atomics,
+ ),
+ ),
+ target_os = "android",
+ target_os = "freebsd",
+ target_os = "netbsd",
+ target_os = "openbsd",
+ target_os = "fuchsia",
+ target_os = "windows",
+ ),
+ )))]
+ #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
+ // SAFETY: the caller must uphold the safety contract.
+ let prev = unsafe { _atomic_compare_exchange_ldxp_stxp(dst, old, new, success, failure) };
+ #[cfg(all(
+ not(portable_atomic_no_outline_atomics),
+ any(
+ all(
+ target_os = "linux",
+ any(
+ target_env = "gnu",
+ all(
+ any(target_env = "musl", target_env = "ohos"),
+ not(target_feature = "crt-static"),
+ ),
+ portable_atomic_outline_atomics,
+ ),
+ ),
+ target_os = "android",
+ target_os = "freebsd",
+ target_os = "netbsd",
+ target_os = "openbsd",
+ target_os = "fuchsia",
+ target_os = "windows",
+ ),
+ ))]
+ #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
+ let prev = {
+ fn_alias! {
+ // inline(never) is just a hint and also not strictly necessary
+ // because we use ifunc helper macro, but used for clarity.
+ #[inline(never)]
+ unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128;
+ atomic_compare_exchange_casp_relaxed
+ = _atomic_compare_exchange_casp(Ordering::Relaxed, Ordering::Relaxed);
+ atomic_compare_exchange_casp_acquire
+ = _atomic_compare_exchange_casp(Ordering::Acquire, Ordering::Acquire);
+ atomic_compare_exchange_casp_release
+ = _atomic_compare_exchange_casp(Ordering::Release, Ordering::Relaxed);
+ atomic_compare_exchange_casp_acqrel
+ = _atomic_compare_exchange_casp(Ordering::AcqRel, Ordering::Acquire);
+ // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
+ #[cfg(target_env = "msvc")]
+ atomic_compare_exchange_casp_seqcst
+ = _atomic_compare_exchange_casp(Ordering::SeqCst, Ordering::SeqCst);
+ }
+ fn_alias! {
+ unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128;
+ atomic_compare_exchange_ldxp_stxp_relaxed
+ = _atomic_compare_exchange_ldxp_stxp(Ordering::Relaxed, Ordering::Relaxed);
+ atomic_compare_exchange_ldxp_stxp_acquire
+ = _atomic_compare_exchange_ldxp_stxp(Ordering::Acquire, Ordering::Acquire);
+ atomic_compare_exchange_ldxp_stxp_release
+ = _atomic_compare_exchange_ldxp_stxp(Ordering::Release, Ordering::Relaxed);
+ atomic_compare_exchange_ldxp_stxp_acqrel
+ = _atomic_compare_exchange_ldxp_stxp(Ordering::AcqRel, Ordering::Acquire);
+ // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
+ #[cfg(target_env = "msvc")]
+ atomic_compare_exchange_ldxp_stxp_seqcst
+ = _atomic_compare_exchange_ldxp_stxp(Ordering::SeqCst, Ordering::SeqCst);
+ }
+ // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+ // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+ // and we've checked if FEAT_LSE is available.
+ unsafe {
+ let success = crate::utils::upgrade_success_ordering(success, failure);
+ match success {
+ Ordering::Relaxed => {
+ ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+ if detect::detect().has_lse() {
+ atomic_compare_exchange_casp_relaxed
+ } else {
+ atomic_compare_exchange_ldxp_stxp_relaxed
+ }
+ })
+ }
+ Ordering::Acquire => {
+ ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+ if detect::detect().has_lse() {
+ atomic_compare_exchange_casp_acquire
+ } else {
+ atomic_compare_exchange_ldxp_stxp_acquire
+ }
+ })
+ }
+ Ordering::Release => {
+ ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+ if detect::detect().has_lse() {
+ atomic_compare_exchange_casp_release
+ } else {
+ atomic_compare_exchange_ldxp_stxp_release
+ }
+ })
+ }
+ // AcqRel and SeqCst RMWs are equivalent in both implementations in non-MSVC environments.
+ #[cfg(not(target_env = "msvc"))]
+ Ordering::AcqRel | Ordering::SeqCst => {
+ ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+ if detect::detect().has_lse() {
+ atomic_compare_exchange_casp_acqrel
+ } else {
+ atomic_compare_exchange_ldxp_stxp_acqrel
+ }
+ })
+ }
+ #[cfg(target_env = "msvc")]
+ Ordering::AcqRel => {
+ ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+ if detect::detect().has_lse() {
+ atomic_compare_exchange_casp_acqrel
+ } else {
+ atomic_compare_exchange_ldxp_stxp_acqrel
+ }
+ })
+ }
+ #[cfg(target_env = "msvc")]
+ Ordering::SeqCst => {
+ ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+ if detect::detect().has_lse() {
+ atomic_compare_exchange_casp_seqcst
+ } else {
+ atomic_compare_exchange_ldxp_stxp_seqcst
+ }
+ })
+ }
+ _ => unreachable!("{:?}", success),
+ }
+ }
+ };
+ if prev == old {
+ Ok(prev)
+ } else {
+ Err(prev)
+ }
+}
+#[cfg(any(
+ target_feature = "lse",
+ portable_atomic_target_feature = "lse",
+ not(portable_atomic_no_outline_atomics),
+))]
+#[inline]
+unsafe fn _atomic_compare_exchange_casp(
+ dst: *mut u128,
+ old: u128,
+ new: u128,
+ success: Ordering,
+ failure: Ordering,
+) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+ debug_assert_lse!();
+ let order = crate::utils::upgrade_success_ordering(success, failure);
+
+ // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+ // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+ // and the CPU supports FEAT_LSE.
+ //
+ // Refs:
+ // - https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/CASPA--CASPAL--CASP--CASPL--CASPAL--CASP--CASPL--A64-
+ // - https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-Swap-Pair-of-words-or-doublewords-in-memory-
+ unsafe {
+ let old = U128 { whole: old };
+ let new = U128 { whole: new };
+ let (prev_lo, prev_hi);
+ macro_rules! cmpxchg {
+ ($acquire:tt, $release:tt, $fence:tt) => {
+ asm!(
+ start_lse!(),
+ concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"),
+ $fence,
+ dst = in(reg) ptr_reg!(dst),
+ // must be allocated to even/odd register pair
+ inout("x6") old.pair.lo => prev_lo,
+ inout("x7") old.pair.hi => prev_hi,
+ // must be allocated to even/odd register pair
+ in("x4") new.pair.lo,
+ in("x5") new.pair.hi,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ atomic_rmw!(cmpxchg, order, write = success);
+ U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+ }
+}
+#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
+#[inline]
+unsafe fn _atomic_compare_exchange_ldxp_stxp(
+ dst: *mut u128,
+ old: u128,
+ new: u128,
+ success: Ordering,
+ failure: Ordering,
+) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+ let order = crate::utils::upgrade_success_ordering(success, failure);
+
+ // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+ // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
+ //
+ // Refs:
+ // - LDXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDXP--A64-
+ // - LDAXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDAXP--A64-
+ // - STXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STXP--A64-
+ // - STLXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STLXP--A64-
+ //
+ // Note: Load-Exclusive pair (by itself) does not guarantee atomicity; to complete an atomic
+ // operation (even load/store), a corresponding Store-Exclusive pair must succeed.
+ // See Arm Architecture Reference Manual for A-profile architecture
+ // Section B2.2.1 "Requirements for single-copy atomicity", and
+ // Section B2.9 "Synchronization and semaphores" for more.
+ unsafe {
+ let old = U128 { whole: old };
+ let new = U128 { whole: new };
+ let (mut prev_lo, mut prev_hi);
+ macro_rules! cmpxchg {
+ ($acquire:tt, $release:tt, $fence:tt) => {
+ asm!(
+ "2:",
+ concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
+ "cmp {prev_lo}, {old_lo}",
+ "cset {r:w}, ne",
+ "cmp {prev_hi}, {old_hi}",
+ "cinc {r:w}, {r:w}, ne",
+ "cbz {r:w}, 3f",
+ concat!("st", $release, "xp {r:w}, {prev_lo}, {prev_hi}, [{dst}]"),
+ // 0 if the store was successful, 1 if no store was performed
+ "cbnz {r:w}, 2b",
+ "b 4f",
+ "3:",
+ concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"),
+ // 0 if the store was successful, 1 if no store was performed
+ "cbnz {r:w}, 2b",
+ "4:",
+ $fence,
+ dst = in(reg) ptr_reg!(dst),
+ old_lo = in(reg) old.pair.lo,
+ old_hi = in(reg) old.pair.hi,
+ new_lo = in(reg) new.pair.lo,
+ new_hi = in(reg) new.pair.hi,
+ prev_lo = out(reg) prev_lo,
+ prev_hi = out(reg) prev_hi,
+ r = out(reg) _,
+ // Do not use `preserves_flags` because CMP modifies the condition flags.
+ options(nostack),
+ )
+ };
+ }
+ atomic_rmw!(cmpxchg, order, write = success);
+ U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+ }
+}
+
+// casp is always strong, and ldxp requires a corresponding (succeed) stxp for
+// its atomicity (see code comment in _atomic_compare_exchange_ldxp_stxp).
+// (i.e., aarch64 doesn't have 128-bit weak CAS)
+use self::atomic_compare_exchange as atomic_compare_exchange_weak;
+
+// If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
+// we use CAS-based atomic RMW.
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+#[cfg(all(
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ not(portable_atomic_ll_sc_rmw),
+))]
+use _atomic_swap_casp as atomic_swap;
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+#[cfg(not(all(
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ not(portable_atomic_ll_sc_rmw),
+)))]
+use _atomic_swap_ldxp_stxp as atomic_swap;
+#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+use _atomic_swap_swpp as atomic_swap;
+#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+#[inline]
+unsafe fn _atomic_swap_swpp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+
+ // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+ // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+ // and the CPU supports FEAT_LSE128.
+ //
+ // Refs:
+ // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/SWPP--SWPPA--SWPPAL--SWPPL--Swap-quadword-in-memory-?lang=en
+ unsafe {
+ let val = U128 { whole: val };
+ let (prev_lo, prev_hi);
+ macro_rules! swap {
+ ($acquire:tt, $release:tt, $fence:tt) => {
+ asm!(
+ concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"),
+ $fence,
+ dst = in(reg) ptr_reg!(dst),
+ val_lo = inout(reg) val.pair.lo => prev_lo,
+ val_hi = inout(reg) val.pair.hi => prev_hi,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ atomic_rmw!(swap, order);
+ U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+ }
+}
+// Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap.
+#[cfg(any(test, not(portable_atomic_ll_sc_rmw)))]
+#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+#[inline]
+unsafe fn _atomic_swap_casp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+ debug_assert_lse!();
+
+ // SAFETY: the caller must uphold the safety contract.
+ // cfg guarantee that the CPU supports FEAT_LSE.
+ unsafe {
+ let val = U128 { whole: val };
+ let (mut prev_lo, mut prev_hi);
+ macro_rules! swap {
+ ($acquire:tt, $release:tt, $fence:tt) => {
+ asm!(
+ start_lse!(),
+ // If FEAT_LSE2 is not supported, this works like byte-wise atomic.
+ // This is not single-copy atomic reads, but this is ok because subsequent
+ // CAS will check for consistency.
+ "ldp x4, x5, [{dst}]",
+ "2:",
+ // casp writes the current value to the first register pair,
+ // so copy the `out`'s value for later comparison.
+ "mov {tmp_lo}, x4",
+ "mov {tmp_hi}, x5",
+ concat!("casp", $acquire, $release, " x4, x5, x2, x3, [{dst}]"),
+ "cmp {tmp_hi}, x5",
+ "ccmp {tmp_lo}, x4, #0, eq",
+ "b.ne 2b",
+ $fence,
+ dst = in(reg) ptr_reg!(dst),
+ tmp_lo = out(reg) _,
+ tmp_hi = out(reg) _,
+ // must be allocated to even/odd register pair
+ out("x4") prev_lo,
+ out("x5") prev_hi,
+ // must be allocated to even/odd register pair
+ in("x2") val.pair.lo,
+ in("x3") val.pair.hi,
+ // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
+ options(nostack),
+ )
+ };
+ }
+ atomic_rmw!(swap, order);
+ U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+ }
+}
+// Do not use atomic_rmw_ll_sc_3 because it needs extra MOV to implement swap.
+#[cfg(any(
+ test,
+ not(all(
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ not(portable_atomic_ll_sc_rmw),
+ ))
+))]
+#[inline]
+unsafe fn _atomic_swap_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ let val = U128 { whole: val };
+ let (mut prev_lo, mut prev_hi);
+ macro_rules! swap {
+ ($acquire:tt, $release:tt, $fence:tt) => {
+ asm!(
+ "2:",
+ concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
+ concat!("st", $release, "xp {r:w}, {val_lo}, {val_hi}, [{dst}]"),
+ // 0 if the store was successful, 1 if no store was performed
+ "cbnz {r:w}, 2b",
+ $fence,
+ dst = in(reg) ptr_reg!(dst),
+ val_lo = in(reg) val.pair.lo,
+ val_hi = in(reg) val.pair.hi,
+ prev_lo = out(reg) prev_lo,
+ prev_hi = out(reg) prev_hi,
+ r = out(reg) _,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ atomic_rmw!(swap, order);
+ U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+ }
+}
+
+/// Atomic RMW by LL/SC loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - val_lo/val_hi pair: val argument (read-only for `$op`)
+/// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`)
+/// - new_lo/new_hi pair: new value that will be stored by sc
+macro_rules! atomic_rmw_ll_sc_3 {
+ ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => {
+ // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
+ // we use CAS-based atomic RMW generated by atomic_rmw_cas_3! macro instead.
+ #[cfg(not(all(
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ not(portable_atomic_ll_sc_rmw),
+ )))]
+ use $name as $reexport_name;
+ #[cfg(any(
+ test,
+ not(all(
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ not(portable_atomic_ll_sc_rmw),
+ ))
+ ))]
+ #[inline]
+ unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ let val = U128 { whole: val };
+ let (mut prev_lo, mut prev_hi);
+ macro_rules! op {
+ ($acquire:tt, $release:tt, $fence:tt) => {
+ asm!(
+ "2:",
+ concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
+ $($op)*
+ concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"),
+ // 0 if the store was successful, 1 if no store was performed
+ "cbnz {r:w}, 2b",
+ $fence,
+ dst = in(reg) ptr_reg!(dst),
+ val_lo = in(reg) val.pair.lo,
+ val_hi = in(reg) val.pair.hi,
+ prev_lo = out(reg) prev_lo,
+ prev_hi = out(reg) prev_hi,
+ new_lo = out(reg) _,
+ new_hi = out(reg) _,
+ r = out(reg) _,
+ options(nostack $(, $preserves_flags)?),
+ )
+ };
+ }
+ atomic_rmw!(op, order);
+ U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+ }
+ }
+ };
+}
+/// Atomic RMW by CAS loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - val_lo/val_hi pair: val argument (read-only for `$op`)
+/// - x6/x7 pair: previous value loaded (read-only for `$op`)
+/// - x4/x5 pair: new value that will be stored
+macro_rules! atomic_rmw_cas_3 {
+ ($name:ident as $reexport_name:ident, $($op:tt)*) => {
+ // If FEAT_LSE is not available at compile-time or portable_atomic_ll_sc_rmw cfg is set,
+ // we use LL/SC-based atomic RMW generated by atomic_rmw_ll_sc_3! macro instead.
+ #[cfg(all(
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ not(portable_atomic_ll_sc_rmw),
+ ))]
+ use $name as $reexport_name;
+ #[cfg(any(test, not(portable_atomic_ll_sc_rmw)))]
+ #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+ #[inline]
+ unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+ debug_assert_lse!();
+ // SAFETY: the caller must uphold the safety contract.
+ // cfg guarantee that the CPU supports FEAT_LSE.
+ unsafe {
+ let val = U128 { whole: val };
+ let (mut prev_lo, mut prev_hi);
+ macro_rules! op {
+ ($acquire:tt, $release:tt, $fence:tt) => {
+ asm!(
+ start_lse!(),
+ // If FEAT_LSE2 is not supported, this works like byte-wise atomic.
+ // This is not single-copy atomic reads, but this is ok because subsequent
+ // CAS will check for consistency.
+ "ldp x6, x7, [{dst}]",
+ "2:",
+ // casp writes the current value to the first register pair,
+ // so copy the `out`'s value for later comparison.
+ "mov {tmp_lo}, x6",
+ "mov {tmp_hi}, x7",
+ $($op)*
+ concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"),
+ "cmp {tmp_hi}, x7",
+ "ccmp {tmp_lo}, x6, #0, eq",
+ "b.ne 2b",
+ $fence,
+ dst = in(reg) ptr_reg!(dst),
+ val_lo = in(reg) val.pair.lo,
+ val_hi = in(reg) val.pair.hi,
+ tmp_lo = out(reg) _,
+ tmp_hi = out(reg) _,
+ // must be allocated to even/odd register pair
+ out("x6") prev_lo,
+ out("x7") prev_hi,
+ // must be allocated to even/odd register pair
+ out("x4") _,
+ out("x5") _,
+ // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
+ options(nostack),
+ )
+ };
+ }
+ atomic_rmw!(op, order);
+ U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+ }
+ }
+ };
+}
+
+/// Atomic RMW by LL/SC loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`)
+/// - new_lo/new_hi pair: new value that will be stored by sc
+macro_rules! atomic_rmw_ll_sc_2 {
+ ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => {
+ // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
+ // we use CAS-based atomic RMW generated by atomic_rmw_cas_2! macro instead.
+ #[cfg(not(all(
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ not(portable_atomic_ll_sc_rmw),
+ )))]
+ use $name as $reexport_name;
+ #[cfg(any(
+ test,
+ not(all(
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ not(portable_atomic_ll_sc_rmw),
+ ))
+ ))]
+ #[inline]
+ unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ let (mut prev_lo, mut prev_hi);
+ macro_rules! op {
+ ($acquire:tt, $release:tt, $fence:tt) => {
+ asm!(
+ "2:",
+ concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
+ $($op)*
+ concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"),
+ // 0 if the store was successful, 1 if no store was performed
+ "cbnz {r:w}, 2b",
+ $fence,
+ dst = in(reg) ptr_reg!(dst),
+ prev_lo = out(reg) prev_lo,
+ prev_hi = out(reg) prev_hi,
+ new_lo = out(reg) _,
+ new_hi = out(reg) _,
+ r = out(reg) _,
+ options(nostack $(, $preserves_flags)?),
+ )
+ };
+ }
+ atomic_rmw!(op, order);
+ U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+ }
+ }
+ };
+}
+/// Atomic RMW by CAS loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - x6/x7 pair: previous value loaded (read-only for `$op`)
+/// - x4/x5 pair: new value that will be stored
+macro_rules! atomic_rmw_cas_2 {
+ ($name:ident as $reexport_name:ident, $($op:tt)*) => {
+ // If FEAT_LSE is not available at compile-time or portable_atomic_ll_sc_rmw cfg is set,
+ // we use LL/SC-based atomic RMW generated by atomic_rmw_ll_sc_3! macro instead.
+ #[cfg(all(
+ any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+ not(portable_atomic_ll_sc_rmw),
+ ))]
+ use $name as $reexport_name;
+ #[cfg(any(test, not(portable_atomic_ll_sc_rmw)))]
+ #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+ #[inline]
+ unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+ debug_assert_lse!();
+ // SAFETY: the caller must uphold the safety contract.
+ // cfg guarantee that the CPU supports FEAT_LSE.
+ unsafe {
+ let (mut prev_lo, mut prev_hi);
+ macro_rules! op {
+ ($acquire:tt, $release:tt, $fence:tt) => {
+ asm!(
+ start_lse!(),
+ // If FEAT_LSE2 is not supported, this works like byte-wise atomic.
+ // This is not single-copy atomic reads, but this is ok because subsequent
+ // CAS will check for consistency.
+ "ldp x6, x7, [{dst}]",
+ "2:",
+ // casp writes the current value to the first register pair,
+ // so copy the `out`'s value for later comparison.
+ "mov {tmp_lo}, x6",
+ "mov {tmp_hi}, x7",
+ $($op)*
+ concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"),
+ "cmp {tmp_hi}, x7",
+ "ccmp {tmp_lo}, x6, #0, eq",
+ "b.ne 2b",
+ $fence,
+ dst = in(reg) ptr_reg!(dst),
+ tmp_lo = out(reg) _,
+ tmp_hi = out(reg) _,
+ // must be allocated to even/odd register pair
+ out("x6") prev_lo,
+ out("x7") prev_hi,
+ // must be allocated to even/odd register pair
+ out("x4") _,
+ out("x5") _,
+ // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
+ options(nostack),
+ )
+ };
+ }
+ atomic_rmw!(op, order);
+ U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+ }
+ }
+ };
+}
+
+// Do not use `preserves_flags` because ADDS modifies the condition flags.
+atomic_rmw_ll_sc_3! {
+ _atomic_add_ldxp_stxp as atomic_add,
+ select_le_or_be!("adds {new_lo}, {prev_lo}, {val_lo}", "adds {new_hi}, {prev_hi}, {val_hi}"),
+ select_le_or_be!("adc {new_hi}, {prev_hi}, {val_hi}", "adc {new_lo}, {prev_lo}, {val_lo}"),
+}
+atomic_rmw_cas_3! {
+ _atomic_add_casp as atomic_add,
+ select_le_or_be!("adds x4, x6, {val_lo}", "adds x5, x7, {val_hi}"),
+ select_le_or_be!("adc x5, x7, {val_hi}", "adc x4, x6, {val_lo}"),
+}
+
+// Do not use `preserves_flags` because SUBS modifies the condition flags.
+atomic_rmw_ll_sc_3! {
+ _atomic_sub_ldxp_stxp as atomic_sub,
+ select_le_or_be!("subs {new_lo}, {prev_lo}, {val_lo}", "subs {new_hi}, {prev_hi}, {val_hi}"),
+ select_le_or_be!("sbc {new_hi}, {prev_hi}, {val_hi}", "sbc {new_lo}, {prev_lo}, {val_lo}"),
+}
+atomic_rmw_cas_3! {
+ _atomic_sub_casp as atomic_sub,
+ select_le_or_be!("subs x4, x6, {val_lo}", "subs x5, x7, {val_hi}"),
+ select_le_or_be!("sbc x5, x7, {val_hi}", "sbc x4, x6, {val_lo}"),
+}
+
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+atomic_rmw_ll_sc_3! {
+ _atomic_and_ldxp_stxp as atomic_and (preserves_flags),
+ "and {new_lo}, {prev_lo}, {val_lo}",
+ "and {new_hi}, {prev_hi}, {val_hi}",
+}
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+atomic_rmw_cas_3! {
+ _atomic_and_casp as atomic_and,
+ "and x4, x6, {val_lo}",
+ "and x5, x7, {val_hi}",
+}
+#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+#[inline]
+unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+
+ // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+ // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+ // and the CPU supports FEAT_LSE128.
+ //
+ // Refs:
+ // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDCLRP--LDCLRPA--LDCLRPAL--LDCLRPL--Atomic-bit-clear-on-quadword-in-memory-?lang=en
+ unsafe {
+ let val = U128 { whole: !val };
+ let (prev_lo, prev_hi);
+ macro_rules! and {
+ ($acquire:tt, $release:tt, $fence:tt) => {
+ asm!(
+ concat!("ldclrp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"),
+ $fence,
+ dst = in(reg) ptr_reg!(dst),
+ val_lo = inout(reg) val.pair.lo => prev_lo,
+ val_hi = inout(reg) val.pair.hi => prev_hi,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ atomic_rmw!(and, order);
+ U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+ }
+}
+
+atomic_rmw_ll_sc_3! {
+ _atomic_nand_ldxp_stxp as atomic_nand (preserves_flags),
+ "and {new_lo}, {prev_lo}, {val_lo}",
+ "mvn {new_lo}, {new_lo}",
+ "and {new_hi}, {prev_hi}, {val_hi}",
+ "mvn {new_hi}, {new_hi}",
+}
+atomic_rmw_cas_3! {
+ _atomic_nand_casp as atomic_nand,
+ "and x4, x6, {val_lo}",
+ "mvn x4, x4",
+ "and x5, x7, {val_hi}",
+ "mvn x5, x5",
+}
+
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+atomic_rmw_ll_sc_3! {
+ _atomic_or_ldxp_stxp as atomic_or (preserves_flags),
+ "orr {new_lo}, {prev_lo}, {val_lo}",
+ "orr {new_hi}, {prev_hi}, {val_hi}",
+}
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+atomic_rmw_cas_3! {
+ _atomic_or_casp as atomic_or,
+ "orr x4, x6, {val_lo}",
+ "orr x5, x7, {val_hi}",
+}
+#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+#[inline]
+unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+
+ // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+ // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+ // and the CPU supports FEAT_LSE128.
+ //
+ // Refs:
+ // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDSETP--LDSETPA--LDSETPAL--LDSETPL--Atomic-bit-set-on-quadword-in-memory-?lang=en
+ unsafe {
+ let val = U128 { whole: val };
+ let (prev_lo, prev_hi);
+ macro_rules! or {
+ ($acquire:tt, $release:tt, $fence:tt) => {
+ asm!(
+ concat!("ldsetp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"),
+ $fence,
+ dst = in(reg) ptr_reg!(dst),
+ val_lo = inout(reg) val.pair.lo => prev_lo,
+ val_hi = inout(reg) val.pair.hi => prev_hi,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ atomic_rmw!(or, order);
+ U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+ }
+}
+
+atomic_rmw_ll_sc_3! {
+ _atomic_xor_ldxp_stxp as atomic_xor (preserves_flags),
+ "eor {new_lo}, {prev_lo}, {val_lo}",
+ "eor {new_hi}, {prev_hi}, {val_hi}",
+}
+atomic_rmw_cas_3! {
+ _atomic_xor_casp as atomic_xor,
+ "eor x4, x6, {val_lo}",
+ "eor x5, x7, {val_hi}",
+}
+
+atomic_rmw_ll_sc_2! {
+ _atomic_not_ldxp_stxp as atomic_not (preserves_flags),
+ "mvn {new_lo}, {prev_lo}",
+ "mvn {new_hi}, {prev_hi}",
+}
+atomic_rmw_cas_2! {
+ _atomic_not_casp as atomic_not,
+ "mvn x4, x6",
+ "mvn x5, x7",
+}
+
+// Do not use `preserves_flags` because NEGS modifies the condition flags.
+atomic_rmw_ll_sc_2! {
+ _atomic_neg_ldxp_stxp as atomic_neg,
+ select_le_or_be!("negs {new_lo}, {prev_lo}", "negs {new_hi}, {prev_hi}"),
+ select_le_or_be!("ngc {new_hi}, {prev_hi}", "ngc {new_lo}, {prev_lo}"),
+}
+atomic_rmw_cas_2! {
+ _atomic_neg_casp as atomic_neg,
+ select_le_or_be!("negs x4, x6", "negs x5, x7"),
+ select_le_or_be!("ngc x5, x7", "ngc x4, x6"),
+}
+
+// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
+atomic_rmw_ll_sc_3! {
+ _atomic_max_ldxp_stxp as atomic_max,
+ select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
+ select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
+ "csel {new_hi}, {prev_hi}, {val_hi}, lt", // select hi 64-bit
+ "csel {new_lo}, {prev_lo}, {val_lo}, lt", // select lo 64-bit
+}
+atomic_rmw_cas_3! {
+ _atomic_max_casp as atomic_max,
+ select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
+ select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
+ "csel x5, x7, {val_hi}, lt", // select hi 64-bit
+ "csel x4, x6, {val_lo}, lt", // select lo 64-bit
+}
+
+// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
+atomic_rmw_ll_sc_3! {
+ _atomic_umax_ldxp_stxp as atomic_umax,
+ select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
+ select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
+ "csel {new_hi}, {prev_hi}, {val_hi}, lo", // select hi 64-bit
+ "csel {new_lo}, {prev_lo}, {val_lo}, lo", // select lo 64-bit
+}
+atomic_rmw_cas_3! {
+ _atomic_umax_casp as atomic_umax,
+ select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
+ select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
+ "csel x5, x7, {val_hi}, lo", // select hi 64-bit
+ "csel x4, x6, {val_lo}, lo", // select lo 64-bit
+}
+
+// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
+atomic_rmw_ll_sc_3! {
+ _atomic_min_ldxp_stxp as atomic_min,
+ select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
+ select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
+ "csel {new_hi}, {prev_hi}, {val_hi}, ge", // select hi 64-bit
+ "csel {new_lo}, {prev_lo}, {val_lo}, ge", // select lo 64-bit
+}
+atomic_rmw_cas_3! {
+ _atomic_min_casp as atomic_min,
+ select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
+ select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
+ "csel x5, x7, {val_hi}, ge", // select hi 64-bit
+ "csel x4, x6, {val_lo}, ge", // select lo 64-bit
+}
+
+// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
+atomic_rmw_ll_sc_3! {
+ _atomic_umin_ldxp_stxp as atomic_umin,
+ select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
+ select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
+ "csel {new_hi}, {prev_hi}, {val_hi}, hs", // select hi 64-bit
+ "csel {new_lo}, {prev_lo}, {val_lo}, hs", // select lo 64-bit
+}
+atomic_rmw_cas_3! {
+ _atomic_umin_casp as atomic_umin,
+ select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
+ select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
+ "csel x5, x7, {val_hi}, hs", // select hi 64-bit
+ "csel x4, x6, {val_lo}, hs", // select lo 64-bit
+}
+
+#[inline]
+const fn is_lock_free() -> bool {
+ IS_ALWAYS_LOCK_FREE
+}
+const IS_ALWAYS_LOCK_FREE: bool = true;
+
+atomic128!(AtomicI128, i128, atomic_max, atomic_min);
+atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ test_atomic_int!(i128);
+ test_atomic_int!(u128);
+
+ // load/store/swap implementation is not affected by signedness, so it is
+ // enough to test only unsigned types.
+ stress_test!(u128);
+}