// SPDX-License-Identifier: Apache-2.0 OR MIT // Atomic{I,U}128 implementation on PowerPC64. // // powerpc64 on pwr8+ support 128-bit atomics: // https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445 // https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll // https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128.ll // // powerpc64le is pwr8+ by default https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/PowerPC/PPC.td#L663 // See also https://github.com/rust-lang/rust/issues/59932 // // Note that we do not separate LL and SC into separate functions, but handle // them within a single asm block. This is because it is theoretically possible // for the compiler to insert operations that might clear the reservation between // LL and SC. See aarch64.rs for details. // // Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use // this module and use intrinsics.rs instead. // // Refs: // - Power ISA https://openpowerfoundation.org/specifications/isa // - AIX Assembler language reference https://www.ibm.com/docs/en/aix/7.3?topic=aix-assembler-language-reference // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit // // Generated asm: // - powerpc64 (pwr8) https://godbolt.org/z/nG5dGa38a // - powerpc64le https://godbolt.org/z/6c99s75e4 include!("macros.rs"); #[cfg(not(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", )))] #[path = "../fallback/outline_atomics.rs"] mod fallback; // On musl with static linking, it seems that getauxval is not always available. // See detect/auxv.rs for more. #[cfg(not(portable_atomic_no_outline_atomics))] #[cfg(any(test, portable_atomic_outline_atomics))] // TODO(powerpc64): currently disabled by default #[cfg(any( test, not(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", )), ))] #[cfg(any( all( target_os = "linux", any( target_env = "gnu", all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")), portable_atomic_outline_atomics, ), ), target_os = "android", target_os = "freebsd", ))] #[path = "detect/auxv.rs"] mod detect; use core::{arch::asm, sync::atomic::Ordering}; use crate::utils::{Pair, U128}; macro_rules! debug_assert_pwr8 { () => { #[cfg(not(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", )))] { debug_assert!(detect::detect().has_quadword_atomics()); } }; } // Refs: https://www.ibm.com/docs/en/aix/7.3?topic=ops-machine-pseudo-op // // This is similar to #[target_feature(enable = "quadword-atomics")], except that there are // no compiler guarantees regarding (un)inlining, and the scope is within an asm // block rather than a function. We use this directive because #[target_feature(enable = "quadword-atomics")] // is not supported as of Rust 1.70-nightly. // // start_pwr8 and end_pwr8 must be used in pairs. // // Note: If power8 instructions are not available at compile-time, we must guarantee that // the function that uses it is not inlined into a function where it is not // clear whether power8 instructions are available. Otherwise, (even if we checked whether // power8 instructions are available at run-time) optimizations that reorder its // instructions across the if condition might introduce undefined behavior. // (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts) // However, our code uses the ifunc helper macro that works with function pointers, // so we don't have to worry about this unless calling without helper macro. macro_rules! start_pwr8 { () => { ".machine push\n.machine power8" }; } macro_rules! end_pwr8 { () => { ".machine pop" }; } macro_rules! atomic_rmw { ($op:ident, $order:ident) => { match $order { Ordering::Relaxed => $op!("", ""), Ordering::Acquire => $op!("lwsync", ""), Ordering::Release => $op!("", "lwsync"), Ordering::AcqRel => $op!("lwsync", "lwsync"), Ordering::SeqCst => $op!("lwsync", "sync"), _ => unreachable!("{:?}", $order), } }; } // Extracts and checks the EQ bit of cr0. #[inline] fn extract_cr0(r: u64) -> bool { r & 0x20000000 != 0 } #[cfg(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ))] use atomic_load_pwr8 as atomic_load; #[cfg(not(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", )))] #[inline] unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { fn_alias! { // inline(never) is just a hint and also not strictly necessary // because we use ifunc helper macro, but used for clarity. #[inline(never)] unsafe fn(src: *mut u128) -> u128; atomic_load_pwr8_relaxed = atomic_load_pwr8(Ordering::Relaxed); atomic_load_pwr8_acquire = atomic_load_pwr8(Ordering::Acquire); atomic_load_pwr8_seqcst = atomic_load_pwr8(Ordering::SeqCst); } // SAFETY: the caller must uphold the safety contract. // we only calls atomic_load_pwr8 if quadword-atomics is available. unsafe { match order { Ordering::Relaxed => { ifunc!(unsafe fn(src: *mut u128) -> u128 { if detect::detect().has_quadword_atomics() { atomic_load_pwr8_relaxed } else { fallback::atomic_load_non_seqcst } }) } Ordering::Acquire => { ifunc!(unsafe fn(src: *mut u128) -> u128 { if detect::detect().has_quadword_atomics() { atomic_load_pwr8_acquire } else { fallback::atomic_load_non_seqcst } }) } Ordering::SeqCst => { ifunc!(unsafe fn(src: *mut u128) -> u128 { if detect::detect().has_quadword_atomics() { atomic_load_pwr8_seqcst } else { fallback::atomic_load_seqcst } }) } _ => unreachable!("{:?}", order), } } } #[inline] unsafe fn atomic_load_pwr8(src: *mut u128, order: Ordering) -> u128 { debug_assert!(src as usize % 16 == 0); debug_assert_pwr8!(); // SAFETY: the caller must uphold the safety contract. // // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA unsafe { let (out_hi, out_lo); macro_rules! atomic_load_acquire { ($release:tt) => { asm!( start_pwr8!(), $release, "lq %r4, 0({src})", // Lightweight acquire sync // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62 "cmpd %cr7, %r4, %r4", "bne- %cr7, 2f", "2:", "isync", end_pwr8!(), src = in(reg_nonzero) ptr_reg!(src), // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. out("r4") out_hi, out("r5") out_lo, out("cr7") _, options(nostack, preserves_flags), ) }; } match order { Ordering::Relaxed => { asm!( start_pwr8!(), "lq %r4, 0({src})", end_pwr8!(), src = in(reg_nonzero) ptr_reg!(src), // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. out("r4") out_hi, out("r5") out_lo, options(nostack, preserves_flags, readonly), ); } Ordering::Acquire => atomic_load_acquire!(""), Ordering::SeqCst => atomic_load_acquire!("sync"), _ => unreachable!("{:?}", order), } U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole } } #[cfg(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ))] use atomic_store_pwr8 as atomic_store; #[cfg(not(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", )))] #[inline] unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { fn_alias! { // inline(never) is just a hint and also not strictly necessary // because we use ifunc helper macro, but used for clarity. #[inline(never)] unsafe fn(dst: *mut u128, val: u128); atomic_store_pwr8_relaxed = atomic_store_pwr8(Ordering::Relaxed); atomic_store_pwr8_release = atomic_store_pwr8(Ordering::Release); atomic_store_pwr8_seqcst = atomic_store_pwr8(Ordering::SeqCst); } // SAFETY: the caller must uphold the safety contract. // we only calls atomic_store_pwr8 if quadword-atomics is available. unsafe { match order { Ordering::Relaxed => { ifunc!(unsafe fn(dst: *mut u128, val: u128) { if detect::detect().has_quadword_atomics() { atomic_store_pwr8_relaxed } else { fallback::atomic_store_non_seqcst } }); } Ordering::Release => { ifunc!(unsafe fn(dst: *mut u128, val: u128) { if detect::detect().has_quadword_atomics() { atomic_store_pwr8_release } else { fallback::atomic_store_non_seqcst } }); } Ordering::SeqCst => { ifunc!(unsafe fn(dst: *mut u128, val: u128) { if detect::detect().has_quadword_atomics() { atomic_store_pwr8_seqcst } else { fallback::atomic_store_seqcst } }); } _ => unreachable!("{:?}", order), } } } #[inline] unsafe fn atomic_store_pwr8(dst: *mut u128, val: u128, order: Ordering) { debug_assert!(dst as usize % 16 == 0); debug_assert_pwr8!(); // SAFETY: the caller must uphold the safety contract. // // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA unsafe { let val = U128 { whole: val }; macro_rules! atomic_store { ($release:tt) => { asm!( start_pwr8!(), $release, "stq %r4, 0({dst})", end_pwr8!(), dst = in(reg_nonzero) ptr_reg!(dst), // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. in("r4") val.pair.hi, in("r5") val.pair.lo, options(nostack, preserves_flags), ) }; } match order { Ordering::Relaxed => atomic_store!(""), Ordering::Release => atomic_store!("lwsync"), Ordering::SeqCst => atomic_store!("sync"), _ => unreachable!("{:?}", order), } } } #[inline] unsafe fn atomic_compare_exchange( dst: *mut u128, old: u128, new: u128, success: Ordering, failure: Ordering, ) -> Result { let success = crate::utils::upgrade_success_ordering(success, failure); #[cfg(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ))] // SAFETY: the caller must uphold the safety contract. // cfg guarantees that quadword atomics instructions are available at compile-time. let (prev, ok) = unsafe { atomic_compare_exchange_pwr8(dst, old, new, success) }; #[cfg(not(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", )))] // SAFETY: the caller must uphold the safety contract. let (prev, ok) = unsafe { atomic_compare_exchange_ifunc(dst, old, new, success) }; if ok { Ok(prev) } else { Err(prev) } } #[inline] unsafe fn atomic_compare_exchange_pwr8( dst: *mut u128, old: u128, new: u128, order: Ordering, ) -> (u128, bool) { debug_assert!(dst as usize % 16 == 0); debug_assert_pwr8!(); // SAFETY: the caller must uphold the safety contract. // // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA unsafe { let old = U128 { whole: old }; let new = U128 { whole: new }; let (mut prev_hi, mut prev_lo); let mut r; macro_rules! cmpxchg { ($acquire:tt, $release:tt) => { asm!( start_pwr8!(), $release, "2:", "lqarx %r8, 0, {dst}", "xor {tmp_lo}, %r9, {old_lo}", "xor {tmp_hi}, %r8, {old_hi}", "or. {tmp_lo}, {tmp_lo}, {tmp_hi}", "bne %cr0, 3f", // jump if compare failed "stqcx. %r6, 0, {dst}", "bne %cr0, 2b", // continue loop if store failed "3:", // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set. "mfcr {tmp_lo}", $acquire, end_pwr8!(), dst = in(reg_nonzero) ptr_reg!(dst), old_hi = in(reg) old.pair.hi, old_lo = in(reg) old.pair.lo, tmp_hi = out(reg) _, tmp_lo = out(reg) r, // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. in("r6") new.pair.hi, in("r7") new.pair.lo, out("r8") prev_hi, out("r9") prev_lo, out("cr0") _, options(nostack, preserves_flags), ) }; } atomic_rmw!(cmpxchg, order); (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r)) } } // Always use strong CAS for outline-atomics. #[cfg(not(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", )))] use atomic_compare_exchange as atomic_compare_exchange_weak; #[cfg(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ))] #[inline] unsafe fn atomic_compare_exchange_weak( dst: *mut u128, old: u128, new: u128, success: Ordering, failure: Ordering, ) -> Result { let success = crate::utils::upgrade_success_ordering(success, failure); // SAFETY: the caller must uphold the safety contract. // cfg guarantees that quadword atomics instructions are available at compile-time. let (prev, ok) = unsafe { atomic_compare_exchange_weak_pwr8(dst, old, new, success) }; if ok { Ok(prev) } else { Err(prev) } } #[cfg(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ))] #[inline] unsafe fn atomic_compare_exchange_weak_pwr8( dst: *mut u128, old: u128, new: u128, order: Ordering, ) -> (u128, bool) { debug_assert!(dst as usize % 16 == 0); debug_assert_pwr8!(); // SAFETY: the caller must uphold the safety contract. // // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA unsafe { let old = U128 { whole: old }; let new = U128 { whole: new }; let (mut prev_hi, mut prev_lo); let mut r; macro_rules! cmpxchg_weak { ($acquire:tt, $release:tt) => { asm!( start_pwr8!(), $release, "lqarx %r8, 0, {dst}", "xor {tmp_lo}, %r9, {old_lo}", "xor {tmp_hi}, %r8, {old_hi}", "or. {tmp_lo}, {tmp_lo}, {tmp_hi}", "bne %cr0, 3f", // jump if compare failed "stqcx. %r6, 0, {dst}", "3:", // if compare or stqcx failed EQ bit is cleared, if stqcx succeeds EQ bit is set. "mfcr {tmp_lo}", $acquire, end_pwr8!(), dst = in(reg_nonzero) ptr_reg!(dst), old_hi = in(reg) old.pair.hi, old_lo = in(reg) old.pair.lo, tmp_hi = out(reg) _, tmp_lo = out(reg) r, // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. in("r6") new.pair.hi, in("r7") new.pair.lo, out("r8") prev_hi, out("r9") prev_lo, out("cr0") _, options(nostack, preserves_flags), ) }; } atomic_rmw!(cmpxchg_weak, order); (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r)) } } #[cfg(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ))] use atomic_swap_pwr8 as atomic_swap; // Do not use atomic_rmw_ll_sc_3 because it needs extra MR to implement swap. #[inline] unsafe fn atomic_swap_pwr8(dst: *mut u128, val: u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); debug_assert_pwr8!(); // SAFETY: the caller must uphold the safety contract. unsafe { let val = U128 { whole: val }; let (mut prev_hi, mut prev_lo); macro_rules! swap { ($acquire:tt, $release:tt) => { asm!( start_pwr8!(), $release, "2:", "lqarx %r6, 0, {dst}", "stqcx. %r8, 0, {dst}", "bne %cr0, 2b", $acquire, end_pwr8!(), dst = in(reg_nonzero) ptr_reg!(dst), // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. out("r6") prev_hi, out("r7") prev_lo, in("r8") val.pair.hi, in("r9") val.pair.lo, out("cr0") _, options(nostack, preserves_flags), ) }; } atomic_rmw!(swap, order); U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole } } /// Atomic RMW by LL/SC loop (3 arguments) /// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;` /// /// $op can use the following registers: /// - val_hi/val_lo pair: val argument (read-only for `$op`) /// - r6/r7 pair: previous value loaded by ll (read-only for `$op`) /// - r8/r9 pair: new value that will be stored by sc macro_rules! atomic_rmw_ll_sc_3 { ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => { #[cfg(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ))] use $name as $reexport_name; #[inline] unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); debug_assert_pwr8!(); // SAFETY: the caller must uphold the safety contract. unsafe { let val = U128 { whole: val }; let (mut prev_hi, mut prev_lo); macro_rules! op { ($acquire:tt, $release:tt) => { asm!( start_pwr8!(), $release, "2:", "lqarx %r6, 0, {dst}", $($op)* "stqcx. %r8, 0, {dst}", "bne %cr0, 2b", $acquire, end_pwr8!(), dst = in(reg_nonzero) ptr_reg!(dst), val_hi = in(reg) val.pair.hi, val_lo = in(reg) val.pair.lo, $($reg)* // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. out("r6") prev_hi, out("r7") prev_lo, out("r8") _, // new (hi) out("r9") _, // new (lo) out("cr0") _, options(nostack, preserves_flags), ) }; } atomic_rmw!(op, order); U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole } } }; } /// Atomic RMW by LL/SC loop (2 arguments) /// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;` /// /// $op can use the following registers: /// - r6/r7 pair: previous value loaded by ll (read-only for `$op`) /// - r8/r9 pair: new value that will be stored by sc macro_rules! atomic_rmw_ll_sc_2 { ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => { #[cfg(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ))] use $name as $reexport_name; #[inline] unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); debug_assert_pwr8!(); // SAFETY: the caller must uphold the safety contract. unsafe { let (mut prev_hi, mut prev_lo); macro_rules! op { ($acquire:tt, $release:tt) => { asm!( start_pwr8!(), $release, "2:", "lqarx %r6, 0, {dst}", $($op)* "stqcx. %r8, 0, {dst}", "bne %cr0, 2b", $acquire, end_pwr8!(), dst = in(reg_nonzero) ptr_reg!(dst), $($reg)* // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. out("r6") prev_hi, out("r7") prev_lo, out("r8") _, // new (hi) out("r9") _, // new (lo) out("cr0") _, options(nostack, preserves_flags), ) }; } atomic_rmw!(op, order); U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole } } }; } atomic_rmw_ll_sc_3! { atomic_add_pwr8 as atomic_add, [out("xer") _,], "addc %r9, {val_lo}, %r7", "adde %r8, {val_hi}, %r6", } atomic_rmw_ll_sc_3! { atomic_sub_pwr8 as atomic_sub, [out("xer") _,], "subc %r9, %r7, {val_lo}", "subfe %r8, {val_hi}, %r6", } atomic_rmw_ll_sc_3! { atomic_and_pwr8 as atomic_and, [], "and %r9, {val_lo}, %r7", "and %r8, {val_hi}, %r6", } atomic_rmw_ll_sc_3! { atomic_nand_pwr8 as atomic_nand, [], "nand %r9, {val_lo}, %r7", "nand %r8, {val_hi}, %r6", } atomic_rmw_ll_sc_3! { atomic_or_pwr8 as atomic_or, [], "or %r9, {val_lo}, %r7", "or %r8, {val_hi}, %r6", } atomic_rmw_ll_sc_3! { atomic_xor_pwr8 as atomic_xor, [], "xor %r9, {val_lo}, %r7", "xor %r8, {val_hi}, %r6", } atomic_rmw_ll_sc_3! { atomic_max_pwr8 as atomic_max, [out("cr1") _,], "cmpld %r7, {val_lo}", // (unsigned) compare lo 64-bit, store result to cr0 "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0 "cmpd %cr1, %r6, {val_hi}", // (signed) compare hi 64-bit, store result to cr1 "isel %r8, %r7, {val_lo}, 5", // select lo 64-bit based on GT bit in cr1 "cmpld %r6, {val_hi}", // (unsigned) compare hi 64-bit, store result to cr0 "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0 "isel %r8, %r6, {val_hi}, 5", // select hi 64-bit based on GT bit in cr1 } atomic_rmw_ll_sc_3! { atomic_umax_pwr8 as atomic_umax, [], "cmpld %r7, {val_lo}", // compare lo 64-bit, store result to cr0 "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0 "cmpld %r6, {val_hi}", // compare hi 64-bit, store result to cr0 "iselgt %r8, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0 "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0 "iselgt %r8, %r6, {val_hi}", // select hi 64-bit based on GT bit in cr0 } atomic_rmw_ll_sc_3! { atomic_min_pwr8 as atomic_min, [out("cr1") _,], "cmpld %r7, {val_lo}", // (unsigned) compare lo 64-bit, store result to cr0 "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0 "cmpd %cr1, %r6, {val_hi}", // (signed) compare hi 64-bit, store result to cr1 "isel %r8, %r7, {val_lo}, 4", // select lo 64-bit based on LT bit in cr1 "cmpld %r6, {val_hi}", // (unsigned) compare hi 64-bit, store result to cr0 "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0 "isel %r8, %r6, {val_hi}, 4", // select hi 64-bit based on LT bit in cr1 } atomic_rmw_ll_sc_3! { atomic_umin_pwr8 as atomic_umin, [], "cmpld %r7, {val_lo}", // compare lo 64-bit, store result to cr0 "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0 "cmpld %r6, {val_hi}", // compare hi 64-bit, store result to cr0 "isellt %r8, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0 "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0 "isellt %r8, %r6, {val_hi}", // select hi 64-bit based on LT bit in cr0 } #[cfg(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ))] use atomic_not_pwr8 as atomic_not; #[inline] unsafe fn atomic_not_pwr8(dst: *mut u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { atomic_xor_pwr8(dst, !0, order) } } #[cfg(portable_atomic_llvm_16)] atomic_rmw_ll_sc_2! { atomic_neg_pwr8 as atomic_neg, [out("xer") _,], "subfic %r9, %r7, 0", "subfze %r8, %r6", } // LLVM 15 miscompiles subfic. #[cfg(not(portable_atomic_llvm_16))] atomic_rmw_ll_sc_2! { atomic_neg_pwr8 as atomic_neg, [zero = in(reg) 0_u64, out("xer") _,], "subc %r9, {zero}, %r7", "subfze %r8, %r6", } macro_rules! atomic_rmw_with_ifunc { ( unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?; pwr8 = $pwr8_fn:ident; non_seqcst_fallback = $non_seqcst_fallback_fn:ident; seqcst_fallback = $seqcst_fallback_fn:ident; ) => { #[cfg(not(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", )))] #[inline] unsafe fn $name($($arg)*, order: Ordering) $(-> $ret_ty)? { fn_alias! { // inline(never) is just a hint and also not strictly necessary // because we use ifunc helper macro, but used for clarity. #[inline(never)] unsafe fn($($arg)*) $(-> $ret_ty)?; pwr8_relaxed_fn = $pwr8_fn(Ordering::Relaxed); pwr8_acquire_fn = $pwr8_fn(Ordering::Acquire); pwr8_release_fn = $pwr8_fn(Ordering::Release); pwr8_acqrel_fn = $pwr8_fn(Ordering::AcqRel); pwr8_seqcst_fn = $pwr8_fn(Ordering::SeqCst); } // SAFETY: the caller must uphold the safety contract. // we only calls pwr8_fn if quadword-atomics is available. unsafe { match order { Ordering::Relaxed => { ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { if detect::detect().has_quadword_atomics() { pwr8_relaxed_fn } else { fallback::$non_seqcst_fallback_fn } }) } Ordering::Acquire => { ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { if detect::detect().has_quadword_atomics() { pwr8_acquire_fn } else { fallback::$non_seqcst_fallback_fn } }) } Ordering::Release => { ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { if detect::detect().has_quadword_atomics() { pwr8_release_fn } else { fallback::$non_seqcst_fallback_fn } }) } Ordering::AcqRel => { ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { if detect::detect().has_quadword_atomics() { pwr8_acqrel_fn } else { fallback::$non_seqcst_fallback_fn } }) } Ordering::SeqCst => { ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { if detect::detect().has_quadword_atomics() { pwr8_seqcst_fn } else { fallback::$seqcst_fallback_fn } }) } _ => unreachable!("{:?}", order), } } } }; } atomic_rmw_with_ifunc! { unsafe fn atomic_compare_exchange_ifunc(dst: *mut u128, old: u128, new: u128) -> (u128, bool); pwr8 = atomic_compare_exchange_pwr8; non_seqcst_fallback = atomic_compare_exchange_non_seqcst; seqcst_fallback = atomic_compare_exchange_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_swap_pwr8; non_seqcst_fallback = atomic_swap_non_seqcst; seqcst_fallback = atomic_swap_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_add_pwr8; non_seqcst_fallback = atomic_add_non_seqcst; seqcst_fallback = atomic_add_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_sub_pwr8; non_seqcst_fallback = atomic_sub_non_seqcst; seqcst_fallback = atomic_sub_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_and_pwr8; non_seqcst_fallback = atomic_and_non_seqcst; seqcst_fallback = atomic_and_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_nand_pwr8; non_seqcst_fallback = atomic_nand_non_seqcst; seqcst_fallback = atomic_nand_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_or_pwr8; non_seqcst_fallback = atomic_or_non_seqcst; seqcst_fallback = atomic_or_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_xor_pwr8; non_seqcst_fallback = atomic_xor_non_seqcst; seqcst_fallback = atomic_xor_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_max_pwr8; non_seqcst_fallback = atomic_max_non_seqcst; seqcst_fallback = atomic_max_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_umax_pwr8; non_seqcst_fallback = atomic_umax_non_seqcst; seqcst_fallback = atomic_umax_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_min_pwr8; non_seqcst_fallback = atomic_min_non_seqcst; seqcst_fallback = atomic_min_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128; pwr8 = atomic_umin_pwr8; non_seqcst_fallback = atomic_umin_non_seqcst; seqcst_fallback = atomic_umin_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_not(dst: *mut u128) -> u128; pwr8 = atomic_not_pwr8; non_seqcst_fallback = atomic_not_non_seqcst; seqcst_fallback = atomic_not_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_neg(dst: *mut u128) -> u128; pwr8 = atomic_neg_pwr8; non_seqcst_fallback = atomic_neg_non_seqcst; seqcst_fallback = atomic_neg_seqcst; } #[inline] fn is_lock_free() -> bool { #[cfg(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ))] { // lqarx and stqcx. instructions are statically available. true } #[cfg(not(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", )))] { detect::detect().has_quadword_atomics() } } const IS_ALWAYS_LOCK_FREE: bool = cfg!(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", )); atomic128!(AtomicI128, i128, atomic_max, atomic_min); atomic128!(AtomicU128, u128, atomic_umax, atomic_umin); #[cfg(test)] mod tests { use super::*; test_atomic_int!(i128); test_atomic_int!(u128); // load/store/swap implementation is not affected by signedness, so it is // enough to test only unsigned types. stress_test!(u128); }