summaryrefslogtreecommitdiff
path: root/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/portable-atomic/src/imp/atomic128/powerpc64.rs')
-rw-r--r--vendor/portable-atomic/src/imp/atomic128/powerpc64.rs947
1 files changed, 947 insertions, 0 deletions
diff --git a/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs b/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs
new file mode 100644
index 0000000..5edc147
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs
@@ -0,0 +1,947 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic{I,U}128 implementation on PowerPC64.
+//
+// powerpc64 on pwr8+ support 128-bit atomics:
+// https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445
+// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
+// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128.ll
+//
+// powerpc64le is pwr8+ by default https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/PowerPC/PPC.td#L663
+// See also https://github.com/rust-lang/rust/issues/59932
+//
+// Note that we do not separate LL and SC into separate functions, but handle
+// them within a single asm block. This is because it is theoretically possible
+// for the compiler to insert operations that might clear the reservation between
+// LL and SC. See aarch64.rs for details.
+//
+// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
+// this module and use intrinsics.rs instead.
+//
+// Refs:
+// - Power ISA https://openpowerfoundation.org/specifications/isa
+// - AIX Assembler language reference https://www.ibm.com/docs/en/aix/7.3?topic=aix-assembler-language-reference
+// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
+//
+// Generated asm:
+// - powerpc64 (pwr8) https://godbolt.org/z/nG5dGa38a
+// - powerpc64le https://godbolt.org/z/6c99s75e4
+
+include!("macros.rs");
+
+#[cfg(not(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+)))]
+#[path = "../fallback/outline_atomics.rs"]
+mod fallback;
+
+// On musl with static linking, it seems that getauxval is not always available.
+// See detect/auxv.rs for more.
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(test, portable_atomic_outline_atomics))] // TODO(powerpc64): currently disabled by default
+#[cfg(any(
+ test,
+ not(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+ )),
+))]
+#[cfg(any(
+ all(
+ target_os = "linux",
+ any(
+ target_env = "gnu",
+ all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")),
+ portable_atomic_outline_atomics,
+ ),
+ ),
+ target_os = "android",
+ target_os = "freebsd",
+))]
+#[path = "detect/auxv.rs"]
+mod detect;
+
+use core::{arch::asm, sync::atomic::Ordering};
+
+use crate::utils::{Pair, U128};
+
+macro_rules! debug_assert_pwr8 {
+ () => {
+ #[cfg(not(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+ )))]
+ {
+ debug_assert!(detect::detect().has_quadword_atomics());
+ }
+ };
+}
+
+// Refs: https://www.ibm.com/docs/en/aix/7.3?topic=ops-machine-pseudo-op
+//
+// This is similar to #[target_feature(enable = "quadword-atomics")], except that there are
+// no compiler guarantees regarding (un)inlining, and the scope is within an asm
+// block rather than a function. We use this directive because #[target_feature(enable = "quadword-atomics")]
+// is not supported as of Rust 1.70-nightly.
+//
+// start_pwr8 and end_pwr8 must be used in pairs.
+//
+// Note: If power8 instructions are not available at compile-time, we must guarantee that
+// the function that uses it is not inlined into a function where it is not
+// clear whether power8 instructions are available. Otherwise, (even if we checked whether
+// power8 instructions are available at run-time) optimizations that reorder its
+// instructions across the if condition might introduce undefined behavior.
+// (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts)
+// However, our code uses the ifunc helper macro that works with function pointers,
+// so we don't have to worry about this unless calling without helper macro.
+macro_rules! start_pwr8 {
+ () => {
+ ".machine push\n.machine power8"
+ };
+}
+macro_rules! end_pwr8 {
+ () => {
+ ".machine pop"
+ };
+}
+
+macro_rules! atomic_rmw {
+ ($op:ident, $order:ident) => {
+ match $order {
+ Ordering::Relaxed => $op!("", ""),
+ Ordering::Acquire => $op!("lwsync", ""),
+ Ordering::Release => $op!("", "lwsync"),
+ Ordering::AcqRel => $op!("lwsync", "lwsync"),
+ Ordering::SeqCst => $op!("lwsync", "sync"),
+ _ => unreachable!("{:?}", $order),
+ }
+ };
+}
+
+// Extracts and checks the EQ bit of cr0.
+#[inline]
+fn extract_cr0(r: u64) -> bool {
+ r & 0x20000000 != 0
+}
+
+#[cfg(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+))]
+use atomic_load_pwr8 as atomic_load;
+#[cfg(not(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+)))]
+#[inline]
+unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
+ fn_alias! {
+ // inline(never) is just a hint and also not strictly necessary
+ // because we use ifunc helper macro, but used for clarity.
+ #[inline(never)]
+ unsafe fn(src: *mut u128) -> u128;
+ atomic_load_pwr8_relaxed = atomic_load_pwr8(Ordering::Relaxed);
+ atomic_load_pwr8_acquire = atomic_load_pwr8(Ordering::Acquire);
+ atomic_load_pwr8_seqcst = atomic_load_pwr8(Ordering::SeqCst);
+ }
+ // SAFETY: the caller must uphold the safety contract.
+ // we only calls atomic_load_pwr8 if quadword-atomics is available.
+ unsafe {
+ match order {
+ Ordering::Relaxed => {
+ ifunc!(unsafe fn(src: *mut u128) -> u128 {
+ if detect::detect().has_quadword_atomics() {
+ atomic_load_pwr8_relaxed
+ } else {
+ fallback::atomic_load_non_seqcst
+ }
+ })
+ }
+ Ordering::Acquire => {
+ ifunc!(unsafe fn(src: *mut u128) -> u128 {
+ if detect::detect().has_quadword_atomics() {
+ atomic_load_pwr8_acquire
+ } else {
+ fallback::atomic_load_non_seqcst
+ }
+ })
+ }
+ Ordering::SeqCst => {
+ ifunc!(unsafe fn(src: *mut u128) -> u128 {
+ if detect::detect().has_quadword_atomics() {
+ atomic_load_pwr8_seqcst
+ } else {
+ fallback::atomic_load_seqcst
+ }
+ })
+ }
+ _ => unreachable!("{:?}", order),
+ }
+ }
+}
+#[inline]
+unsafe fn atomic_load_pwr8(src: *mut u128, order: Ordering) -> u128 {
+ debug_assert!(src as usize % 16 == 0);
+ debug_assert_pwr8!();
+
+ // SAFETY: the caller must uphold the safety contract.
+ //
+ // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA
+ unsafe {
+ let (out_hi, out_lo);
+ macro_rules! atomic_load_acquire {
+ ($release:tt) => {
+ asm!(
+ start_pwr8!(),
+ $release,
+ "lq %r4, 0({src})",
+ // Lightweight acquire sync
+ // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62
+ "cmpd %cr7, %r4, %r4",
+ "bne- %cr7, 2f",
+ "2:",
+ "isync",
+ end_pwr8!(),
+ src = in(reg_nonzero) ptr_reg!(src),
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+ out("r4") out_hi,
+ out("r5") out_lo,
+ out("cr7") _,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ match order {
+ Ordering::Relaxed => {
+ asm!(
+ start_pwr8!(),
+ "lq %r4, 0({src})",
+ end_pwr8!(),
+ src = in(reg_nonzero) ptr_reg!(src),
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+ out("r4") out_hi,
+ out("r5") out_lo,
+ options(nostack, preserves_flags, readonly),
+ );
+ }
+ Ordering::Acquire => atomic_load_acquire!(""),
+ Ordering::SeqCst => atomic_load_acquire!("sync"),
+ _ => unreachable!("{:?}", order),
+ }
+ U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
+ }
+}
+
+#[cfg(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+))]
+use atomic_store_pwr8 as atomic_store;
+#[cfg(not(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+)))]
+#[inline]
+unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
+ fn_alias! {
+ // inline(never) is just a hint and also not strictly necessary
+ // because we use ifunc helper macro, but used for clarity.
+ #[inline(never)]
+ unsafe fn(dst: *mut u128, val: u128);
+ atomic_store_pwr8_relaxed = atomic_store_pwr8(Ordering::Relaxed);
+ atomic_store_pwr8_release = atomic_store_pwr8(Ordering::Release);
+ atomic_store_pwr8_seqcst = atomic_store_pwr8(Ordering::SeqCst);
+ }
+ // SAFETY: the caller must uphold the safety contract.
+ // we only calls atomic_store_pwr8 if quadword-atomics is available.
+ unsafe {
+ match order {
+ Ordering::Relaxed => {
+ ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+ if detect::detect().has_quadword_atomics() {
+ atomic_store_pwr8_relaxed
+ } else {
+ fallback::atomic_store_non_seqcst
+ }
+ });
+ }
+ Ordering::Release => {
+ ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+ if detect::detect().has_quadword_atomics() {
+ atomic_store_pwr8_release
+ } else {
+ fallback::atomic_store_non_seqcst
+ }
+ });
+ }
+ Ordering::SeqCst => {
+ ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+ if detect::detect().has_quadword_atomics() {
+ atomic_store_pwr8_seqcst
+ } else {
+ fallback::atomic_store_seqcst
+ }
+ });
+ }
+ _ => unreachable!("{:?}", order),
+ }
+ }
+}
+#[inline]
+unsafe fn atomic_store_pwr8(dst: *mut u128, val: u128, order: Ordering) {
+ debug_assert!(dst as usize % 16 == 0);
+ debug_assert_pwr8!();
+
+ // SAFETY: the caller must uphold the safety contract.
+ //
+ // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA
+ unsafe {
+ let val = U128 { whole: val };
+ macro_rules! atomic_store {
+ ($release:tt) => {
+ asm!(
+ start_pwr8!(),
+ $release,
+ "stq %r4, 0({dst})",
+ end_pwr8!(),
+ dst = in(reg_nonzero) ptr_reg!(dst),
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+ in("r4") val.pair.hi,
+ in("r5") val.pair.lo,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ match order {
+ Ordering::Relaxed => atomic_store!(""),
+ Ordering::Release => atomic_store!("lwsync"),
+ Ordering::SeqCst => atomic_store!("sync"),
+ _ => unreachable!("{:?}", order),
+ }
+ }
+}
+
+#[inline]
+unsafe fn atomic_compare_exchange(
+ dst: *mut u128,
+ old: u128,
+ new: u128,
+ success: Ordering,
+ failure: Ordering,
+) -> Result<u128, u128> {
+ let success = crate::utils::upgrade_success_ordering(success, failure);
+
+ #[cfg(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+ ))]
+ // SAFETY: the caller must uphold the safety contract.
+ // cfg guarantees that quadword atomics instructions are available at compile-time.
+ let (prev, ok) = unsafe { atomic_compare_exchange_pwr8(dst, old, new, success) };
+ #[cfg(not(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+ )))]
+ // SAFETY: the caller must uphold the safety contract.
+ let (prev, ok) = unsafe { atomic_compare_exchange_ifunc(dst, old, new, success) };
+ if ok {
+ Ok(prev)
+ } else {
+ Err(prev)
+ }
+}
+#[inline]
+unsafe fn atomic_compare_exchange_pwr8(
+ dst: *mut u128,
+ old: u128,
+ new: u128,
+ order: Ordering,
+) -> (u128, bool) {
+ debug_assert!(dst as usize % 16 == 0);
+ debug_assert_pwr8!();
+
+ // SAFETY: the caller must uphold the safety contract.
+ //
+ // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA
+ unsafe {
+ let old = U128 { whole: old };
+ let new = U128 { whole: new };
+ let (mut prev_hi, mut prev_lo);
+ let mut r;
+ macro_rules! cmpxchg {
+ ($acquire:tt, $release:tt) => {
+ asm!(
+ start_pwr8!(),
+ $release,
+ "2:",
+ "lqarx %r8, 0, {dst}",
+ "xor {tmp_lo}, %r9, {old_lo}",
+ "xor {tmp_hi}, %r8, {old_hi}",
+ "or. {tmp_lo}, {tmp_lo}, {tmp_hi}",
+ "bne %cr0, 3f", // jump if compare failed
+ "stqcx. %r6, 0, {dst}",
+ "bne %cr0, 2b", // continue loop if store failed
+ "3:",
+ // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set.
+ "mfcr {tmp_lo}",
+ $acquire,
+ end_pwr8!(),
+ dst = in(reg_nonzero) ptr_reg!(dst),
+ old_hi = in(reg) old.pair.hi,
+ old_lo = in(reg) old.pair.lo,
+ tmp_hi = out(reg) _,
+ tmp_lo = out(reg) r,
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+ in("r6") new.pair.hi,
+ in("r7") new.pair.lo,
+ out("r8") prev_hi,
+ out("r9") prev_lo,
+ out("cr0") _,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ atomic_rmw!(cmpxchg, order);
+ (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r))
+ }
+}
+
+// Always use strong CAS for outline-atomics.
+#[cfg(not(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+)))]
+use atomic_compare_exchange as atomic_compare_exchange_weak;
+#[cfg(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+))]
+#[inline]
+unsafe fn atomic_compare_exchange_weak(
+ dst: *mut u128,
+ old: u128,
+ new: u128,
+ success: Ordering,
+ failure: Ordering,
+) -> Result<u128, u128> {
+ let success = crate::utils::upgrade_success_ordering(success, failure);
+
+ // SAFETY: the caller must uphold the safety contract.
+ // cfg guarantees that quadword atomics instructions are available at compile-time.
+ let (prev, ok) = unsafe { atomic_compare_exchange_weak_pwr8(dst, old, new, success) };
+ if ok {
+ Ok(prev)
+ } else {
+ Err(prev)
+ }
+}
+#[cfg(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+))]
+#[inline]
+unsafe fn atomic_compare_exchange_weak_pwr8(
+ dst: *mut u128,
+ old: u128,
+ new: u128,
+ order: Ordering,
+) -> (u128, bool) {
+ debug_assert!(dst as usize % 16 == 0);
+ debug_assert_pwr8!();
+
+ // SAFETY: the caller must uphold the safety contract.
+ //
+ // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA
+ unsafe {
+ let old = U128 { whole: old };
+ let new = U128 { whole: new };
+ let (mut prev_hi, mut prev_lo);
+ let mut r;
+ macro_rules! cmpxchg_weak {
+ ($acquire:tt, $release:tt) => {
+ asm!(
+ start_pwr8!(),
+ $release,
+ "lqarx %r8, 0, {dst}",
+ "xor {tmp_lo}, %r9, {old_lo}",
+ "xor {tmp_hi}, %r8, {old_hi}",
+ "or. {tmp_lo}, {tmp_lo}, {tmp_hi}",
+ "bne %cr0, 3f", // jump if compare failed
+ "stqcx. %r6, 0, {dst}",
+ "3:",
+ // if compare or stqcx failed EQ bit is cleared, if stqcx succeeds EQ bit is set.
+ "mfcr {tmp_lo}",
+ $acquire,
+ end_pwr8!(),
+ dst = in(reg_nonzero) ptr_reg!(dst),
+ old_hi = in(reg) old.pair.hi,
+ old_lo = in(reg) old.pair.lo,
+ tmp_hi = out(reg) _,
+ tmp_lo = out(reg) r,
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+ in("r6") new.pair.hi,
+ in("r7") new.pair.lo,
+ out("r8") prev_hi,
+ out("r9") prev_lo,
+ out("cr0") _,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ atomic_rmw!(cmpxchg_weak, order);
+ (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r))
+ }
+}
+
+#[cfg(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+))]
+use atomic_swap_pwr8 as atomic_swap;
+// Do not use atomic_rmw_ll_sc_3 because it needs extra MR to implement swap.
+#[inline]
+unsafe fn atomic_swap_pwr8(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+ debug_assert_pwr8!();
+
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ let val = U128 { whole: val };
+ let (mut prev_hi, mut prev_lo);
+ macro_rules! swap {
+ ($acquire:tt, $release:tt) => {
+ asm!(
+ start_pwr8!(),
+ $release,
+ "2:",
+ "lqarx %r6, 0, {dst}",
+ "stqcx. %r8, 0, {dst}",
+ "bne %cr0, 2b",
+ $acquire,
+ end_pwr8!(),
+ dst = in(reg_nonzero) ptr_reg!(dst),
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+ out("r6") prev_hi,
+ out("r7") prev_lo,
+ in("r8") val.pair.hi,
+ in("r9") val.pair.lo,
+ out("cr0") _,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ atomic_rmw!(swap, order);
+ U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+ }
+}
+
+/// Atomic RMW by LL/SC loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// $op can use the following registers:
+/// - val_hi/val_lo pair: val argument (read-only for `$op`)
+/// - r6/r7 pair: previous value loaded by ll (read-only for `$op`)
+/// - r8/r9 pair: new value that will be stored by sc
+macro_rules! atomic_rmw_ll_sc_3 {
+ ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => {
+ #[cfg(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+ ))]
+ use $name as $reexport_name;
+ #[inline]
+ unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+ debug_assert_pwr8!();
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ let val = U128 { whole: val };
+ let (mut prev_hi, mut prev_lo);
+ macro_rules! op {
+ ($acquire:tt, $release:tt) => {
+ asm!(
+ start_pwr8!(),
+ $release,
+ "2:",
+ "lqarx %r6, 0, {dst}",
+ $($op)*
+ "stqcx. %r8, 0, {dst}",
+ "bne %cr0, 2b",
+ $acquire,
+ end_pwr8!(),
+ dst = in(reg_nonzero) ptr_reg!(dst),
+ val_hi = in(reg) val.pair.hi,
+ val_lo = in(reg) val.pair.lo,
+ $($reg)*
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+ out("r6") prev_hi,
+ out("r7") prev_lo,
+ out("r8") _, // new (hi)
+ out("r9") _, // new (lo)
+ out("cr0") _,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ atomic_rmw!(op, order);
+ U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+ }
+ }
+ };
+}
+/// Atomic RMW by LL/SC loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// $op can use the following registers:
+/// - r6/r7 pair: previous value loaded by ll (read-only for `$op`)
+/// - r8/r9 pair: new value that will be stored by sc
+macro_rules! atomic_rmw_ll_sc_2 {
+ ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => {
+ #[cfg(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+ ))]
+ use $name as $reexport_name;
+ #[inline]
+ unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+ debug_assert_pwr8!();
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ let (mut prev_hi, mut prev_lo);
+ macro_rules! op {
+ ($acquire:tt, $release:tt) => {
+ asm!(
+ start_pwr8!(),
+ $release,
+ "2:",
+ "lqarx %r6, 0, {dst}",
+ $($op)*
+ "stqcx. %r8, 0, {dst}",
+ "bne %cr0, 2b",
+ $acquire,
+ end_pwr8!(),
+ dst = in(reg_nonzero) ptr_reg!(dst),
+ $($reg)*
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+ out("r6") prev_hi,
+ out("r7") prev_lo,
+ out("r8") _, // new (hi)
+ out("r9") _, // new (lo)
+ out("cr0") _,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ atomic_rmw!(op, order);
+ U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+ }
+ }
+ };
+}
+
+atomic_rmw_ll_sc_3! {
+ atomic_add_pwr8 as atomic_add, [out("xer") _,],
+ "addc %r9, {val_lo}, %r7",
+ "adde %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+ atomic_sub_pwr8 as atomic_sub, [out("xer") _,],
+ "subc %r9, %r7, {val_lo}",
+ "subfe %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+ atomic_and_pwr8 as atomic_and, [],
+ "and %r9, {val_lo}, %r7",
+ "and %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+ atomic_nand_pwr8 as atomic_nand, [],
+ "nand %r9, {val_lo}, %r7",
+ "nand %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+ atomic_or_pwr8 as atomic_or, [],
+ "or %r9, {val_lo}, %r7",
+ "or %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+ atomic_xor_pwr8 as atomic_xor, [],
+ "xor %r9, {val_lo}, %r7",
+ "xor %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+ atomic_max_pwr8 as atomic_max, [out("cr1") _,],
+ "cmpld %r7, {val_lo}", // (unsigned) compare lo 64-bit, store result to cr0
+ "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0
+ "cmpd %cr1, %r6, {val_hi}", // (signed) compare hi 64-bit, store result to cr1
+ "isel %r8, %r7, {val_lo}, 5", // select lo 64-bit based on GT bit in cr1
+ "cmpld %r6, {val_hi}", // (unsigned) compare hi 64-bit, store result to cr0
+ "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0
+ "isel %r8, %r6, {val_hi}, 5", // select hi 64-bit based on GT bit in cr1
+}
+atomic_rmw_ll_sc_3! {
+ atomic_umax_pwr8 as atomic_umax, [],
+ "cmpld %r7, {val_lo}", // compare lo 64-bit, store result to cr0
+ "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0
+ "cmpld %r6, {val_hi}", // compare hi 64-bit, store result to cr0
+ "iselgt %r8, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0
+ "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0
+ "iselgt %r8, %r6, {val_hi}", // select hi 64-bit based on GT bit in cr0
+}
+atomic_rmw_ll_sc_3! {
+ atomic_min_pwr8 as atomic_min, [out("cr1") _,],
+ "cmpld %r7, {val_lo}", // (unsigned) compare lo 64-bit, store result to cr0
+ "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0
+ "cmpd %cr1, %r6, {val_hi}", // (signed) compare hi 64-bit, store result to cr1
+ "isel %r8, %r7, {val_lo}, 4", // select lo 64-bit based on LT bit in cr1
+ "cmpld %r6, {val_hi}", // (unsigned) compare hi 64-bit, store result to cr0
+ "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0
+ "isel %r8, %r6, {val_hi}, 4", // select hi 64-bit based on LT bit in cr1
+}
+atomic_rmw_ll_sc_3! {
+ atomic_umin_pwr8 as atomic_umin, [],
+ "cmpld %r7, {val_lo}", // compare lo 64-bit, store result to cr0
+ "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0
+ "cmpld %r6, {val_hi}", // compare hi 64-bit, store result to cr0
+ "isellt %r8, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0
+ "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0
+ "isellt %r8, %r6, {val_hi}", // select hi 64-bit based on LT bit in cr0
+}
+
+#[cfg(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+))]
+use atomic_not_pwr8 as atomic_not;
+#[inline]
+unsafe fn atomic_not_pwr8(dst: *mut u128, order: Ordering) -> u128 {
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe { atomic_xor_pwr8(dst, !0, order) }
+}
+
+#[cfg(portable_atomic_llvm_16)]
+atomic_rmw_ll_sc_2! {
+ atomic_neg_pwr8 as atomic_neg, [out("xer") _,],
+ "subfic %r9, %r7, 0",
+ "subfze %r8, %r6",
+}
+// LLVM 15 miscompiles subfic.
+#[cfg(not(portable_atomic_llvm_16))]
+atomic_rmw_ll_sc_2! {
+ atomic_neg_pwr8 as atomic_neg, [zero = in(reg) 0_u64, out("xer") _,],
+ "subc %r9, {zero}, %r7",
+ "subfze %r8, %r6",
+}
+
+macro_rules! atomic_rmw_with_ifunc {
+ (
+ unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?;
+ pwr8 = $pwr8_fn:ident;
+ non_seqcst_fallback = $non_seqcst_fallback_fn:ident;
+ seqcst_fallback = $seqcst_fallback_fn:ident;
+ ) => {
+ #[cfg(not(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+ )))]
+ #[inline]
+ unsafe fn $name($($arg)*, order: Ordering) $(-> $ret_ty)? {
+ fn_alias! {
+ // inline(never) is just a hint and also not strictly necessary
+ // because we use ifunc helper macro, but used for clarity.
+ #[inline(never)]
+ unsafe fn($($arg)*) $(-> $ret_ty)?;
+ pwr8_relaxed_fn = $pwr8_fn(Ordering::Relaxed);
+ pwr8_acquire_fn = $pwr8_fn(Ordering::Acquire);
+ pwr8_release_fn = $pwr8_fn(Ordering::Release);
+ pwr8_acqrel_fn = $pwr8_fn(Ordering::AcqRel);
+ pwr8_seqcst_fn = $pwr8_fn(Ordering::SeqCst);
+ }
+ // SAFETY: the caller must uphold the safety contract.
+ // we only calls pwr8_fn if quadword-atomics is available.
+ unsafe {
+ match order {
+ Ordering::Relaxed => {
+ ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+ if detect::detect().has_quadword_atomics() {
+ pwr8_relaxed_fn
+ } else {
+ fallback::$non_seqcst_fallback_fn
+ }
+ })
+ }
+ Ordering::Acquire => {
+ ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+ if detect::detect().has_quadword_atomics() {
+ pwr8_acquire_fn
+ } else {
+ fallback::$non_seqcst_fallback_fn
+ }
+ })
+ }
+ Ordering::Release => {
+ ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+ if detect::detect().has_quadword_atomics() {
+ pwr8_release_fn
+ } else {
+ fallback::$non_seqcst_fallback_fn
+ }
+ })
+ }
+ Ordering::AcqRel => {
+ ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+ if detect::detect().has_quadword_atomics() {
+ pwr8_acqrel_fn
+ } else {
+ fallback::$non_seqcst_fallback_fn
+ }
+ })
+ }
+ Ordering::SeqCst => {
+ ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+ if detect::detect().has_quadword_atomics() {
+ pwr8_seqcst_fn
+ } else {
+ fallback::$seqcst_fallback_fn
+ }
+ })
+ }
+ _ => unreachable!("{:?}", order),
+ }
+ }
+ }
+ };
+}
+
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_compare_exchange_ifunc(dst: *mut u128, old: u128, new: u128) -> (u128, bool);
+ pwr8 = atomic_compare_exchange_pwr8;
+ non_seqcst_fallback = atomic_compare_exchange_non_seqcst;
+ seqcst_fallback = atomic_compare_exchange_seqcst;
+}
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128;
+ pwr8 = atomic_swap_pwr8;
+ non_seqcst_fallback = atomic_swap_non_seqcst;
+ seqcst_fallback = atomic_swap_seqcst;
+}
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128;
+ pwr8 = atomic_add_pwr8;
+ non_seqcst_fallback = atomic_add_non_seqcst;
+ seqcst_fallback = atomic_add_seqcst;
+}
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128;
+ pwr8 = atomic_sub_pwr8;
+ non_seqcst_fallback = atomic_sub_non_seqcst;
+ seqcst_fallback = atomic_sub_seqcst;
+}
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128;
+ pwr8 = atomic_and_pwr8;
+ non_seqcst_fallback = atomic_and_non_seqcst;
+ seqcst_fallback = atomic_and_seqcst;
+}
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128;
+ pwr8 = atomic_nand_pwr8;
+ non_seqcst_fallback = atomic_nand_non_seqcst;
+ seqcst_fallback = atomic_nand_seqcst;
+}
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128;
+ pwr8 = atomic_or_pwr8;
+ non_seqcst_fallback = atomic_or_non_seqcst;
+ seqcst_fallback = atomic_or_seqcst;
+}
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128;
+ pwr8 = atomic_xor_pwr8;
+ non_seqcst_fallback = atomic_xor_non_seqcst;
+ seqcst_fallback = atomic_xor_seqcst;
+}
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128;
+ pwr8 = atomic_max_pwr8;
+ non_seqcst_fallback = atomic_max_non_seqcst;
+ seqcst_fallback = atomic_max_seqcst;
+}
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128;
+ pwr8 = atomic_umax_pwr8;
+ non_seqcst_fallback = atomic_umax_non_seqcst;
+ seqcst_fallback = atomic_umax_seqcst;
+}
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128;
+ pwr8 = atomic_min_pwr8;
+ non_seqcst_fallback = atomic_min_non_seqcst;
+ seqcst_fallback = atomic_min_seqcst;
+}
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128;
+ pwr8 = atomic_umin_pwr8;
+ non_seqcst_fallback = atomic_umin_non_seqcst;
+ seqcst_fallback = atomic_umin_seqcst;
+}
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_not(dst: *mut u128) -> u128;
+ pwr8 = atomic_not_pwr8;
+ non_seqcst_fallback = atomic_not_non_seqcst;
+ seqcst_fallback = atomic_not_seqcst;
+}
+atomic_rmw_with_ifunc! {
+ unsafe fn atomic_neg(dst: *mut u128) -> u128;
+ pwr8 = atomic_neg_pwr8;
+ non_seqcst_fallback = atomic_neg_non_seqcst;
+ seqcst_fallback = atomic_neg_seqcst;
+}
+
+#[inline]
+fn is_lock_free() -> bool {
+ #[cfg(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+ ))]
+ {
+ // lqarx and stqcx. instructions are statically available.
+ true
+ }
+ #[cfg(not(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+ )))]
+ {
+ detect::detect().has_quadword_atomics()
+ }
+}
+const IS_ALWAYS_LOCK_FREE: bool = cfg!(any(
+ target_feature = "quadword-atomics",
+ portable_atomic_target_feature = "quadword-atomics",
+));
+
+atomic128!(AtomicI128, i128, atomic_max, atomic_min);
+atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ test_atomic_int!(i128);
+ test_atomic_int!(u128);
+
+ // load/store/swap implementation is not affected by signedness, so it is
+ // enough to test only unsigned types.
+ stress_test!(u128);
+}