diff options
Diffstat (limited to 'vendor/portable-atomic/src/imp/atomic128/s390x.rs')
-rw-r--r-- | vendor/portable-atomic/src/imp/atomic128/s390x.rs | 461 |
1 files changed, 461 insertions, 0 deletions
diff --git a/vendor/portable-atomic/src/imp/atomic128/s390x.rs b/vendor/portable-atomic/src/imp/atomic128/s390x.rs new file mode 100644 index 0000000..37c2063 --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/s390x.rs @@ -0,0 +1,461 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Atomic{I,U}128 implementation on s390x. +// +// s390x supports 128-bit atomic load/store/cmpxchg: +// https://github.com/llvm/llvm-project/commit/a11f63a952664f700f076fd754476a2b9eb158cc +// +// LLVM's minimal supported architecture level is z10: +// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/SystemZ/SystemZProcessors.td) +// This does not appear to have changed since the current s390x backend was added in LLVM 3.3: +// https://github.com/llvm/llvm-project/commit/5f613dfd1f7edb0ae95d521b7107b582d9df5103#diff-cbaef692b3958312e80fd5507a7e2aff071f1acb086f10e8a96bc06a7bb289db +// +// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +// this module and use intrinsics.rs instead. +// +// Refs: +// - z/Architecture Principles of Operation https://publibfp.dhe.ibm.com/epubs/pdf/a227832d.pdf +// - z/Architecture Reference Summary https://www.ibm.com/support/pages/zarchitecture-reference-summary +// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit +// +// Generated asm: +// - s390x https://godbolt.org/z/b11znnEh4 +// - s390x (z196) https://godbolt.org/z/s5n9PGcv6 +// - s390x (z15) https://godbolt.org/z/Wf49h7bPf + +include!("macros.rs"); + +use core::{arch::asm, sync::atomic::Ordering}; + +use crate::utils::{Pair, U128}; + +// Use distinct operands on z196 or later, otherwise split to lgr and $op. +#[cfg(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops"))] +macro_rules! distinct_op { + ($op:tt, $a0:tt, $a1:tt, $a2:tt) => { + concat!($op, "k ", $a0, ", ", $a1, ", ", $a2) + }; +} +#[cfg(not(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops")))] +macro_rules! distinct_op { + ($op:tt, $a0:tt, $a1:tt, $a2:tt) => { + concat!("lgr ", $a0, ", ", $a1, "\n", $op, " ", $a0, ", ", $a2) + }; +} + +// Use selgr$cond on z15 or later, otherwise split to locgr$cond and $op. +#[cfg(any( + target_feature = "miscellaneous-extensions-3", + portable_atomic_target_feature = "miscellaneous-extensions-3", +))] +#[cfg(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +))] +macro_rules! select_op { + ($cond:tt, $a0:tt, $a1:tt, $a2:tt) => { + concat!("selgr", $cond, " ", $a0, ", ", $a1, ", ", $a2) + }; +} +#[cfg(not(any( + target_feature = "miscellaneous-extensions-3", + portable_atomic_target_feature = "miscellaneous-extensions-3", +)))] +#[cfg(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +))] +macro_rules! select_op { + ($cond:tt, $a0:tt, $a1:tt, $a2:tt) => { + concat!("lgr ", $a0, ", ", $a2, "\n", "locgr", $cond, " ", $a0, ", ", $a1) + }; +} + +#[inline] +unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 { + debug_assert!(src as usize % 16 == 0); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic load is always SeqCst. + let (out_hi, out_lo); + asm!( + "lpq %r0, 0({src})", + src = in(reg) ptr_reg!(src), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + out("r0") out_hi, + out("r1") out_lo, + options(nostack, preserves_flags), + ); + U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole + } +} + +#[inline] +unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { + debug_assert!(dst as usize % 16 == 0); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let val = U128 { whole: val }; + macro_rules! atomic_store { + ($fence:tt) => { + asm!( + "stpq %r0, 0({dst})", + $fence, + dst = in(reg) ptr_reg!(dst), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + in("r0") val.pair.hi, + in("r1") val.pair.lo, + options(nostack, preserves_flags), + ) + }; + } + match order { + // Relaxed and Release stores are equivalent. + Ordering::Relaxed | Ordering::Release => atomic_store!(""), + // bcr 14,0 (fast-BCR-serialization) requires z196 or later. + #[cfg(any( + target_feature = "fast-serialization", + portable_atomic_target_feature = "fast-serialization", + ))] + Ordering::SeqCst => atomic_store!("bcr 14, 0"), + #[cfg(not(any( + target_feature = "fast-serialization", + portable_atomic_target_feature = "fast-serialization", + )))] + Ordering::SeqCst => atomic_store!("bcr 15, 0"), + _ => unreachable!("{:?}", order), + } + } +} + +#[inline] +unsafe fn atomic_compare_exchange( + dst: *mut u128, + old: u128, + new: u128, + _success: Ordering, + _failure: Ordering, +) -> Result<u128, u128> { + debug_assert!(dst as usize % 16 == 0); + + // SAFETY: the caller must uphold the safety contract. + let prev = unsafe { + // atomic CAS is always SeqCst. + let old = U128 { whole: old }; + let new = U128 { whole: new }; + let (prev_hi, prev_lo); + asm!( + "cdsg %r0, %r12, 0({dst})", + dst = in(reg) ptr_reg!(dst), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + inout("r0") old.pair.hi => prev_hi, + inout("r1") old.pair.lo => prev_lo, + in("r12") new.pair.hi, + in("r13") new.pair.lo, + // Do not use `preserves_flags` because CDSG modifies the condition code. + options(nostack), + ); + U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole + }; + if prev == old { + Ok(prev) + } else { + Err(prev) + } +} + +// cdsg is always strong. +use atomic_compare_exchange as atomic_compare_exchange_weak; + +#[cfg(not(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +)))] +#[inline(always)] +unsafe fn atomic_update<F>(dst: *mut u128, order: Ordering, mut f: F) -> u128 +where + F: FnMut(u128) -> u128, +{ + // SAFETY: the caller must uphold the safety contract. + unsafe { + // This is a private function and all instances of `f` only operate on the value + // loaded, so there is no need to synchronize the first load/failed CAS. + let mut prev = atomic_load(dst, Ordering::Relaxed); + loop { + let next = f(prev); + match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) { + Ok(x) => return x, + Err(x) => prev = x, + } + } + } +} + +#[inline] +unsafe fn atomic_swap(dst: *mut u128, val: u128, _order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + + // SAFETY: the caller must uphold the safety contract. + // + // We could use atomic_update here, but using an inline assembly allows omitting + // the comparison of results and the storing/comparing of condition flags. + // + // Do not use atomic_rmw_cas_3 because it needs extra LGR to implement swap. + unsafe { + // atomic swap is always SeqCst. + let val = U128 { whole: val }; + let (mut prev_hi, mut prev_lo); + asm!( + "lpq %r0, 0({dst})", + "2:", + "cdsg %r0, %r12, 0({dst})", + "jl 2b", + dst = in(reg) ptr_reg!(dst), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + out("r0") prev_hi, + out("r1") prev_lo, + in("r12") val.pair.hi, + in("r13") val.pair.lo, + // Do not use `preserves_flags` because CDSG modifies the condition code. + options(nostack), + ); + U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole + } +} + +/// Atomic RMW by CAS loop (3 arguments) +/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;` +/// +/// `$op` can use the following registers: +/// - val_hi/val_lo pair: val argument (read-only for `$op`) +/// - r0/r1 pair: previous value loaded (read-only for `$op`) +/// - r12/r13 pair: new value that will be stored +// We could use atomic_update here, but using an inline assembly allows omitting +// the comparison of results and the storing/comparing of condition flags. +macro_rules! atomic_rmw_cas_3 { + ($name:ident, [$($reg:tt)*], $($op:tt)*) => { + #[inline] + unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic RMW is always SeqCst. + let val = U128 { whole: val }; + let (mut prev_hi, mut prev_lo); + asm!( + "lpq %r0, 0({dst})", + "2:", + $($op)* + "cdsg %r0, %r12, 0({dst})", + "jl 2b", + dst = in(reg) ptr_reg!(dst), + val_hi = in(reg) val.pair.hi, + val_lo = in(reg) val.pair.lo, + $($reg)* + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + out("r0") prev_hi, + out("r1") prev_lo, + out("r12") _, + out("r13") _, + // Do not use `preserves_flags` because CDSG modifies the condition code. + options(nostack), + ); + U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole + } + } + }; +} +/// Atomic RMW by CAS loop (2 arguments) +/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;` +/// +/// `$op` can use the following registers: +/// - r0/r1 pair: previous value loaded (read-only for `$op`) +/// - r12/r13 pair: new value that will be stored +// We could use atomic_update here, but using an inline assembly allows omitting +// the comparison of results and the storing/comparing of condition flags. +macro_rules! atomic_rmw_cas_2 { + ($name:ident, $($op:tt)*) => { + #[inline] + unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic RMW is always SeqCst. + let (mut prev_hi, mut prev_lo); + asm!( + "lpq %r0, 0({dst})", + "2:", + $($op)* + "cdsg %r0, %r12, 0({dst})", + "jl 2b", + dst = in(reg) ptr_reg!(dst), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + out("r0") prev_hi, + out("r1") prev_lo, + out("r12") _, + out("r13") _, + // Do not use `preserves_flags` because CDSG modifies the condition code. + options(nostack), + ); + U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole + } + } + }; +} + +atomic_rmw_cas_3! { + atomic_add, [], + distinct_op!("algr", "%r13", "%r1", "{val_lo}"), + "lgr %r12, %r0", + "alcgr %r12, {val_hi}", +} +atomic_rmw_cas_3! { + atomic_sub, [], + distinct_op!("slgr", "%r13", "%r1", "{val_lo}"), + "lgr %r12, %r0", + "slbgr %r12, {val_hi}", +} +atomic_rmw_cas_3! { + atomic_and, [], + distinct_op!("ngr", "%r13", "%r1", "{val_lo}"), + distinct_op!("ngr", "%r12", "%r0", "{val_hi}"), +} + +// Use nngrk on z15 or later. +#[cfg(any( + target_feature = "miscellaneous-extensions-3", + portable_atomic_target_feature = "miscellaneous-extensions-3", +))] +atomic_rmw_cas_3! { + atomic_nand, [], + "nngrk %r13, %r1, {val_lo}", + "nngrk %r12, %r0, {val_hi}", +} +#[cfg(not(any( + target_feature = "miscellaneous-extensions-3", + portable_atomic_target_feature = "miscellaneous-extensions-3", +)))] +atomic_rmw_cas_3! { + atomic_nand, [], + distinct_op!("ngr", "%r13", "%r1", "{val_lo}"), + "xihf %r13, 4294967295", + "xilf %r13, 4294967295", + distinct_op!("ngr", "%r12", "%r0", "{val_hi}"), + "xihf %r12, 4294967295", + "xilf %r12, 4294967295", +} + +atomic_rmw_cas_3! { + atomic_or, [], + distinct_op!("ogr", "%r13", "%r1", "{val_lo}"), + distinct_op!("ogr", "%r12", "%r0", "{val_hi}"), +} +atomic_rmw_cas_3! { + atomic_xor, [], + distinct_op!("xgr", "%r13", "%r1", "{val_lo}"), + distinct_op!("xgr", "%r12", "%r0", "{val_hi}"), +} + +#[cfg(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +))] +atomic_rmw_cas_3! { + atomic_max, [], + "clgr %r1, {val_lo}", + select_op!("h", "%r12", "%r1", "{val_lo}"), + "cgr %r0, {val_hi}", + select_op!("h", "%r13", "%r1", "{val_lo}"), + "locgre %r13, %r12", + select_op!("h", "%r12", "%r0", "{val_hi}"), +} +#[cfg(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +))] +atomic_rmw_cas_3! { + atomic_umax, [tmp = out(reg) _,], + "clgr %r1, {val_lo}", + select_op!("h", "{tmp}", "%r1", "{val_lo}"), + "clgr %r0, {val_hi}", + select_op!("h", "%r12", "%r0", "{val_hi}"), + select_op!("h", "%r13", "%r1", "{val_lo}"), + "cgr %r0, {val_hi}", + "locgre %r13, {tmp}", +} +#[cfg(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +))] +atomic_rmw_cas_3! { + atomic_min, [], + "clgr %r1, {val_lo}", + select_op!("l", "%r12", "%r1", "{val_lo}"), + "cgr %r0, {val_hi}", + select_op!("l", "%r13", "%r1", "{val_lo}"), + "locgre %r13, %r12", + select_op!("l", "%r12", "%r0", "{val_hi}"), +} +#[cfg(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +))] +atomic_rmw_cas_3! { + atomic_umin, [tmp = out(reg) _,], + "clgr %r1, {val_lo}", + select_op!("l", "{tmp}", "%r1", "{val_lo}"), + "clgr %r0, {val_hi}", + select_op!("l", "%r12", "%r0", "{val_hi}"), + select_op!("l", "%r13", "%r1", "{val_lo}"), + "cgr %r0, {val_hi}", + "locgre %r13, {tmp}", +} +// We use atomic_update for atomic min/max on pre-z196 because +// z10 doesn't seem to have a good way to implement 128-bit min/max. +// loc{,g}r requires z196 or later. +// https://godbolt.org/z/j8KG9q5oq +#[cfg(not(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +)))] +atomic_rmw_by_atomic_update!(cmp); + +atomic_rmw_cas_2! { + atomic_not, + "lgr %r13, %r1", + "xihf %r13, 4294967295", + "xilf %r13, 4294967295", + "lgr %r12, %r0", + "xihf %r12, 4294967295", + "xilf %r12, 4294967295", +} +atomic_rmw_cas_2! { + atomic_neg, + "lghi %r13, 0", + "slgr %r13, %r1", + "lghi %r12, 0", + "slbgr %r12, %r0", +} + +#[inline] +const fn is_lock_free() -> bool { + IS_ALWAYS_LOCK_FREE +} +const IS_ALWAYS_LOCK_FREE: bool = true; + +atomic128!(AtomicI128, i128, atomic_max, atomic_min); +atomic128!(AtomicU128, u128, atomic_umax, atomic_umin); + +#[cfg(test)] +mod tests { + use super::*; + + test_atomic_int!(i128); + test_atomic_int!(u128); + + // load/store/swap implementation is not affected by signedness, so it is + // enough to test only unsigned types. + stress_test!(u128); +} |