summaryrefslogtreecommitdiff
path: root/vendor/portable-atomic/src/imp/atomic128/s390x.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/portable-atomic/src/imp/atomic128/s390x.rs')
-rw-r--r--vendor/portable-atomic/src/imp/atomic128/s390x.rs461
1 files changed, 461 insertions, 0 deletions
diff --git a/vendor/portable-atomic/src/imp/atomic128/s390x.rs b/vendor/portable-atomic/src/imp/atomic128/s390x.rs
new file mode 100644
index 0000000..37c2063
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/s390x.rs
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic{I,U}128 implementation on s390x.
+//
+// s390x supports 128-bit atomic load/store/cmpxchg:
+// https://github.com/llvm/llvm-project/commit/a11f63a952664f700f076fd754476a2b9eb158cc
+//
+// LLVM's minimal supported architecture level is z10:
+// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/SystemZ/SystemZProcessors.td)
+// This does not appear to have changed since the current s390x backend was added in LLVM 3.3:
+// https://github.com/llvm/llvm-project/commit/5f613dfd1f7edb0ae95d521b7107b582d9df5103#diff-cbaef692b3958312e80fd5507a7e2aff071f1acb086f10e8a96bc06a7bb289db
+//
+// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
+// this module and use intrinsics.rs instead.
+//
+// Refs:
+// - z/Architecture Principles of Operation https://publibfp.dhe.ibm.com/epubs/pdf/a227832d.pdf
+// - z/Architecture Reference Summary https://www.ibm.com/support/pages/zarchitecture-reference-summary
+// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
+//
+// Generated asm:
+// - s390x https://godbolt.org/z/b11znnEh4
+// - s390x (z196) https://godbolt.org/z/s5n9PGcv6
+// - s390x (z15) https://godbolt.org/z/Wf49h7bPf
+
+include!("macros.rs");
+
+use core::{arch::asm, sync::atomic::Ordering};
+
+use crate::utils::{Pair, U128};
+
+// Use distinct operands on z196 or later, otherwise split to lgr and $op.
+#[cfg(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops"))]
+macro_rules! distinct_op {
+ ($op:tt, $a0:tt, $a1:tt, $a2:tt) => {
+ concat!($op, "k ", $a0, ", ", $a1, ", ", $a2)
+ };
+}
+#[cfg(not(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops")))]
+macro_rules! distinct_op {
+ ($op:tt, $a0:tt, $a1:tt, $a2:tt) => {
+ concat!("lgr ", $a0, ", ", $a1, "\n", $op, " ", $a0, ", ", $a2)
+ };
+}
+
+// Use selgr$cond on z15 or later, otherwise split to locgr$cond and $op.
+#[cfg(any(
+ target_feature = "miscellaneous-extensions-3",
+ portable_atomic_target_feature = "miscellaneous-extensions-3",
+))]
+#[cfg(any(
+ target_feature = "load-store-on-cond",
+ portable_atomic_target_feature = "load-store-on-cond",
+))]
+macro_rules! select_op {
+ ($cond:tt, $a0:tt, $a1:tt, $a2:tt) => {
+ concat!("selgr", $cond, " ", $a0, ", ", $a1, ", ", $a2)
+ };
+}
+#[cfg(not(any(
+ target_feature = "miscellaneous-extensions-3",
+ portable_atomic_target_feature = "miscellaneous-extensions-3",
+)))]
+#[cfg(any(
+ target_feature = "load-store-on-cond",
+ portable_atomic_target_feature = "load-store-on-cond",
+))]
+macro_rules! select_op {
+ ($cond:tt, $a0:tt, $a1:tt, $a2:tt) => {
+ concat!("lgr ", $a0, ", ", $a2, "\n", "locgr", $cond, " ", $a0, ", ", $a1)
+ };
+}
+
+#[inline]
+unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 {
+ debug_assert!(src as usize % 16 == 0);
+
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ // atomic load is always SeqCst.
+ let (out_hi, out_lo);
+ asm!(
+ "lpq %r0, 0({src})",
+ src = in(reg) ptr_reg!(src),
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ out("r0") out_hi,
+ out("r1") out_lo,
+ options(nostack, preserves_flags),
+ );
+ U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
+ }
+}
+
+#[inline]
+unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
+ debug_assert!(dst as usize % 16 == 0);
+
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ let val = U128 { whole: val };
+ macro_rules! atomic_store {
+ ($fence:tt) => {
+ asm!(
+ "stpq %r0, 0({dst})",
+ $fence,
+ dst = in(reg) ptr_reg!(dst),
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ in("r0") val.pair.hi,
+ in("r1") val.pair.lo,
+ options(nostack, preserves_flags),
+ )
+ };
+ }
+ match order {
+ // Relaxed and Release stores are equivalent.
+ Ordering::Relaxed | Ordering::Release => atomic_store!(""),
+ // bcr 14,0 (fast-BCR-serialization) requires z196 or later.
+ #[cfg(any(
+ target_feature = "fast-serialization",
+ portable_atomic_target_feature = "fast-serialization",
+ ))]
+ Ordering::SeqCst => atomic_store!("bcr 14, 0"),
+ #[cfg(not(any(
+ target_feature = "fast-serialization",
+ portable_atomic_target_feature = "fast-serialization",
+ )))]
+ Ordering::SeqCst => atomic_store!("bcr 15, 0"),
+ _ => unreachable!("{:?}", order),
+ }
+ }
+}
+
+#[inline]
+unsafe fn atomic_compare_exchange(
+ dst: *mut u128,
+ old: u128,
+ new: u128,
+ _success: Ordering,
+ _failure: Ordering,
+) -> Result<u128, u128> {
+ debug_assert!(dst as usize % 16 == 0);
+
+ // SAFETY: the caller must uphold the safety contract.
+ let prev = unsafe {
+ // atomic CAS is always SeqCst.
+ let old = U128 { whole: old };
+ let new = U128 { whole: new };
+ let (prev_hi, prev_lo);
+ asm!(
+ "cdsg %r0, %r12, 0({dst})",
+ dst = in(reg) ptr_reg!(dst),
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ inout("r0") old.pair.hi => prev_hi,
+ inout("r1") old.pair.lo => prev_lo,
+ in("r12") new.pair.hi,
+ in("r13") new.pair.lo,
+ // Do not use `preserves_flags` because CDSG modifies the condition code.
+ options(nostack),
+ );
+ U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+ };
+ if prev == old {
+ Ok(prev)
+ } else {
+ Err(prev)
+ }
+}
+
+// cdsg is always strong.
+use atomic_compare_exchange as atomic_compare_exchange_weak;
+
+#[cfg(not(any(
+ target_feature = "load-store-on-cond",
+ portable_atomic_target_feature = "load-store-on-cond",
+)))]
+#[inline(always)]
+unsafe fn atomic_update<F>(dst: *mut u128, order: Ordering, mut f: F) -> u128
+where
+ F: FnMut(u128) -> u128,
+{
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ // This is a private function and all instances of `f` only operate on the value
+ // loaded, so there is no need to synchronize the first load/failed CAS.
+ let mut prev = atomic_load(dst, Ordering::Relaxed);
+ loop {
+ let next = f(prev);
+ match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) {
+ Ok(x) => return x,
+ Err(x) => prev = x,
+ }
+ }
+ }
+}
+
+#[inline]
+unsafe fn atomic_swap(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+
+ // SAFETY: the caller must uphold the safety contract.
+ //
+ // We could use atomic_update here, but using an inline assembly allows omitting
+ // the comparison of results and the storing/comparing of condition flags.
+ //
+ // Do not use atomic_rmw_cas_3 because it needs extra LGR to implement swap.
+ unsafe {
+ // atomic swap is always SeqCst.
+ let val = U128 { whole: val };
+ let (mut prev_hi, mut prev_lo);
+ asm!(
+ "lpq %r0, 0({dst})",
+ "2:",
+ "cdsg %r0, %r12, 0({dst})",
+ "jl 2b",
+ dst = in(reg) ptr_reg!(dst),
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ out("r0") prev_hi,
+ out("r1") prev_lo,
+ in("r12") val.pair.hi,
+ in("r13") val.pair.lo,
+ // Do not use `preserves_flags` because CDSG modifies the condition code.
+ options(nostack),
+ );
+ U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+ }
+}
+
+/// Atomic RMW by CAS loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - val_hi/val_lo pair: val argument (read-only for `$op`)
+/// - r0/r1 pair: previous value loaded (read-only for `$op`)
+/// - r12/r13 pair: new value that will be stored
+// We could use atomic_update here, but using an inline assembly allows omitting
+// the comparison of results and the storing/comparing of condition flags.
+macro_rules! atomic_rmw_cas_3 {
+ ($name:ident, [$($reg:tt)*], $($op:tt)*) => {
+ #[inline]
+ unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ // atomic RMW is always SeqCst.
+ let val = U128 { whole: val };
+ let (mut prev_hi, mut prev_lo);
+ asm!(
+ "lpq %r0, 0({dst})",
+ "2:",
+ $($op)*
+ "cdsg %r0, %r12, 0({dst})",
+ "jl 2b",
+ dst = in(reg) ptr_reg!(dst),
+ val_hi = in(reg) val.pair.hi,
+ val_lo = in(reg) val.pair.lo,
+ $($reg)*
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ out("r0") prev_hi,
+ out("r1") prev_lo,
+ out("r12") _,
+ out("r13") _,
+ // Do not use `preserves_flags` because CDSG modifies the condition code.
+ options(nostack),
+ );
+ U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+ }
+ }
+ };
+}
+/// Atomic RMW by CAS loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - r0/r1 pair: previous value loaded (read-only for `$op`)
+/// - r12/r13 pair: new value that will be stored
+// We could use atomic_update here, but using an inline assembly allows omitting
+// the comparison of results and the storing/comparing of condition flags.
+macro_rules! atomic_rmw_cas_2 {
+ ($name:ident, $($op:tt)*) => {
+ #[inline]
+ unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 {
+ debug_assert!(dst as usize % 16 == 0);
+ // SAFETY: the caller must uphold the safety contract.
+ unsafe {
+ // atomic RMW is always SeqCst.
+ let (mut prev_hi, mut prev_lo);
+ asm!(
+ "lpq %r0, 0({dst})",
+ "2:",
+ $($op)*
+ "cdsg %r0, %r12, 0({dst})",
+ "jl 2b",
+ dst = in(reg) ptr_reg!(dst),
+ // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+ out("r0") prev_hi,
+ out("r1") prev_lo,
+ out("r12") _,
+ out("r13") _,
+ // Do not use `preserves_flags` because CDSG modifies the condition code.
+ options(nostack),
+ );
+ U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+ }
+ }
+ };
+}
+
+atomic_rmw_cas_3! {
+ atomic_add, [],
+ distinct_op!("algr", "%r13", "%r1", "{val_lo}"),
+ "lgr %r12, %r0",
+ "alcgr %r12, {val_hi}",
+}
+atomic_rmw_cas_3! {
+ atomic_sub, [],
+ distinct_op!("slgr", "%r13", "%r1", "{val_lo}"),
+ "lgr %r12, %r0",
+ "slbgr %r12, {val_hi}",
+}
+atomic_rmw_cas_3! {
+ atomic_and, [],
+ distinct_op!("ngr", "%r13", "%r1", "{val_lo}"),
+ distinct_op!("ngr", "%r12", "%r0", "{val_hi}"),
+}
+
+// Use nngrk on z15 or later.
+#[cfg(any(
+ target_feature = "miscellaneous-extensions-3",
+ portable_atomic_target_feature = "miscellaneous-extensions-3",
+))]
+atomic_rmw_cas_3! {
+ atomic_nand, [],
+ "nngrk %r13, %r1, {val_lo}",
+ "nngrk %r12, %r0, {val_hi}",
+}
+#[cfg(not(any(
+ target_feature = "miscellaneous-extensions-3",
+ portable_atomic_target_feature = "miscellaneous-extensions-3",
+)))]
+atomic_rmw_cas_3! {
+ atomic_nand, [],
+ distinct_op!("ngr", "%r13", "%r1", "{val_lo}"),
+ "xihf %r13, 4294967295",
+ "xilf %r13, 4294967295",
+ distinct_op!("ngr", "%r12", "%r0", "{val_hi}"),
+ "xihf %r12, 4294967295",
+ "xilf %r12, 4294967295",
+}
+
+atomic_rmw_cas_3! {
+ atomic_or, [],
+ distinct_op!("ogr", "%r13", "%r1", "{val_lo}"),
+ distinct_op!("ogr", "%r12", "%r0", "{val_hi}"),
+}
+atomic_rmw_cas_3! {
+ atomic_xor, [],
+ distinct_op!("xgr", "%r13", "%r1", "{val_lo}"),
+ distinct_op!("xgr", "%r12", "%r0", "{val_hi}"),
+}
+
+#[cfg(any(
+ target_feature = "load-store-on-cond",
+ portable_atomic_target_feature = "load-store-on-cond",
+))]
+atomic_rmw_cas_3! {
+ atomic_max, [],
+ "clgr %r1, {val_lo}",
+ select_op!("h", "%r12", "%r1", "{val_lo}"),
+ "cgr %r0, {val_hi}",
+ select_op!("h", "%r13", "%r1", "{val_lo}"),
+ "locgre %r13, %r12",
+ select_op!("h", "%r12", "%r0", "{val_hi}"),
+}
+#[cfg(any(
+ target_feature = "load-store-on-cond",
+ portable_atomic_target_feature = "load-store-on-cond",
+))]
+atomic_rmw_cas_3! {
+ atomic_umax, [tmp = out(reg) _,],
+ "clgr %r1, {val_lo}",
+ select_op!("h", "{tmp}", "%r1", "{val_lo}"),
+ "clgr %r0, {val_hi}",
+ select_op!("h", "%r12", "%r0", "{val_hi}"),
+ select_op!("h", "%r13", "%r1", "{val_lo}"),
+ "cgr %r0, {val_hi}",
+ "locgre %r13, {tmp}",
+}
+#[cfg(any(
+ target_feature = "load-store-on-cond",
+ portable_atomic_target_feature = "load-store-on-cond",
+))]
+atomic_rmw_cas_3! {
+ atomic_min, [],
+ "clgr %r1, {val_lo}",
+ select_op!("l", "%r12", "%r1", "{val_lo}"),
+ "cgr %r0, {val_hi}",
+ select_op!("l", "%r13", "%r1", "{val_lo}"),
+ "locgre %r13, %r12",
+ select_op!("l", "%r12", "%r0", "{val_hi}"),
+}
+#[cfg(any(
+ target_feature = "load-store-on-cond",
+ portable_atomic_target_feature = "load-store-on-cond",
+))]
+atomic_rmw_cas_3! {
+ atomic_umin, [tmp = out(reg) _,],
+ "clgr %r1, {val_lo}",
+ select_op!("l", "{tmp}", "%r1", "{val_lo}"),
+ "clgr %r0, {val_hi}",
+ select_op!("l", "%r12", "%r0", "{val_hi}"),
+ select_op!("l", "%r13", "%r1", "{val_lo}"),
+ "cgr %r0, {val_hi}",
+ "locgre %r13, {tmp}",
+}
+// We use atomic_update for atomic min/max on pre-z196 because
+// z10 doesn't seem to have a good way to implement 128-bit min/max.
+// loc{,g}r requires z196 or later.
+// https://godbolt.org/z/j8KG9q5oq
+#[cfg(not(any(
+ target_feature = "load-store-on-cond",
+ portable_atomic_target_feature = "load-store-on-cond",
+)))]
+atomic_rmw_by_atomic_update!(cmp);
+
+atomic_rmw_cas_2! {
+ atomic_not,
+ "lgr %r13, %r1",
+ "xihf %r13, 4294967295",
+ "xilf %r13, 4294967295",
+ "lgr %r12, %r0",
+ "xihf %r12, 4294967295",
+ "xilf %r12, 4294967295",
+}
+atomic_rmw_cas_2! {
+ atomic_neg,
+ "lghi %r13, 0",
+ "slgr %r13, %r1",
+ "lghi %r12, 0",
+ "slbgr %r12, %r0",
+}
+
+#[inline]
+const fn is_lock_free() -> bool {
+ IS_ALWAYS_LOCK_FREE
+}
+const IS_ALWAYS_LOCK_FREE: bool = true;
+
+atomic128!(AtomicI128, i128, atomic_max, atomic_min);
+atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ test_atomic_int!(i128);
+ test_atomic_int!(u128);
+
+ // load/store/swap implementation is not affected by signedness, so it is
+ // enough to test only unsigned types.
+ stress_test!(u128);
+}