diff options
Diffstat (limited to 'vendor/portable-atomic/src/imp/atomic128/x86_64.rs')
-rw-r--r-- | vendor/portable-atomic/src/imp/atomic128/x86_64.rs | 854 |
1 files changed, 854 insertions, 0 deletions
diff --git a/vendor/portable-atomic/src/imp/atomic128/x86_64.rs b/vendor/portable-atomic/src/imp/atomic128/x86_64.rs new file mode 100644 index 0000000..3b9d141 --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/x86_64.rs @@ -0,0 +1,854 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Atomic{I,U}128 implementation on x86_64 using CMPXCHG16B (DWCAS). +// +// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +// this module and use intrinsics.rs instead. +// +// Refs: +// - x86 and amd64 instruction reference https://www.felixcloutier.com/x86 +// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit +// +// Generated asm: +// - x86_64 (+cmpxchg16b) https://godbolt.org/z/55n54WeKr + +include!("macros.rs"); + +#[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))] +#[path = "../fallback/outline_atomics.rs"] +mod fallback; + +#[cfg(not(portable_atomic_no_outline_atomics))] +#[cfg(not(target_env = "sgx"))] +#[path = "detect/x86_64.rs"] +mod detect; + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; +use core::sync::atomic::Ordering; + +use crate::utils::{Pair, U128}; + +// Asserts that the function is called in the correct context. +macro_rules! debug_assert_cmpxchg16b { + () => { + #[cfg(not(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + )))] + { + debug_assert!(detect::detect().has_cmpxchg16b()); + } + }; +} +#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] +#[cfg(target_feature = "sse")] +macro_rules! debug_assert_vmovdqa_atomic { + () => {{ + debug_assert_cmpxchg16b!(); + debug_assert!(detect::detect().has_vmovdqa_atomic()); + }}; +} + +#[allow(unused_macros)] +#[cfg(target_pointer_width = "32")] +macro_rules! ptr_modifier { + () => { + ":e" + }; +} +#[allow(unused_macros)] +#[cfg(target_pointer_width = "64")] +macro_rules! ptr_modifier { + () => { + "" + }; +} + +#[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") +)] +#[inline] +unsafe fn cmpxchg16b(dst: *mut u128, old: u128, new: u128) -> (u128, bool) { + debug_assert!(dst as usize % 16 == 0); + debug_assert_cmpxchg16b!(); + + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned (required by CMPXCHG16B), that there are no + // concurrent non-atomic operations, and that the CPU supports CMPXCHG16B. + // + // If the value at `dst` (destination operand) and rdx:rax are equal, the + // 128-bit value in rcx:rbx is stored in the `dst`, otherwise the value at + // `dst` is loaded to rdx:rax. + // + // The ZF flag is set if the value at `dst` and rdx:rax are equal, + // otherwise it is cleared. Other flags are unaffected. + // + // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + unsafe { + // cmpxchg16b is always SeqCst. + let r: u8; + let old = U128 { whole: old }; + let new = U128 { whole: new }; + let (prev_lo, prev_hi); + macro_rules! cmpxchg16b { + ($rdi:tt) => { + asm!( + // rbx is reserved by LLVM + "xchg {rbx_tmp}, rbx", + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "sete r8b", + "mov rbx, {rbx_tmp}", // restore rbx + rbx_tmp = inout(reg) new.pair.lo => _, + in("rcx") new.pair.hi, + inout("rax") old.pair.lo => prev_lo, + inout("rdx") old.pair.hi => prev_hi, + in($rdi) dst, + out("r8b") r, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ) + }; + } + #[cfg(target_pointer_width = "32")] + cmpxchg16b!("edi"); + #[cfg(target_pointer_width = "64")] + cmpxchg16b!("rdi"); + (U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole, r != 0) + } +} + +// VMOVDQA is atomic on Intel and AMD CPUs with AVX. +// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details. +// +// Refs: https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 +// +// Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. +// https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html +#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] +#[cfg(target_feature = "sse")] +#[target_feature(enable = "avx")] +#[inline] +unsafe fn atomic_load_vmovdqa(src: *mut u128) -> u128 { + debug_assert!(src as usize % 16 == 0); + debug_assert_vmovdqa_atomic!(); + + // SAFETY: the caller must uphold the safety contract. + // + // atomic load by vmovdqa is always SeqCst. + unsafe { + let out: core::arch::x86_64::__m128; + asm!( + concat!("vmovdqa {out}, xmmword ptr [{src", ptr_modifier!(), "}]"), + src = in(reg) src, + out = out(xmm_reg) out, + options(nostack, preserves_flags), + ); + core::mem::transmute(out) + } +} +#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] +#[cfg(target_feature = "sse")] +#[target_feature(enable = "avx")] +#[inline] +unsafe fn atomic_store_vmovdqa(dst: *mut u128, val: u128, order: Ordering) { + debug_assert!(dst as usize % 16 == 0); + debug_assert_vmovdqa_atomic!(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let val: core::arch::x86_64::__m128 = core::mem::transmute(val); + match order { + // Relaxed and Release stores are equivalent. + Ordering::Relaxed | Ordering::Release => { + asm!( + concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"), + dst = in(reg) dst, + val = in(xmm_reg) val, + options(nostack, preserves_flags), + ); + } + Ordering::SeqCst => { + asm!( + concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"), + "mfence", + dst = in(reg) dst, + val = in(xmm_reg) val, + options(nostack, preserves_flags), + ); + } + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), +)))] +macro_rules! load_store_detect { + ( + vmovdqa = $vmovdqa:ident + cmpxchg16b = $cmpxchg16b:ident + fallback = $fallback:ident + ) => {{ + let cpuid = detect::detect(); + #[cfg(not(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + )))] + { + // Check CMPXCHG16B first to prevent mixing atomic and non-atomic access. + if cpuid.has_cmpxchg16b() { + // We do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. + #[cfg(target_feature = "sse")] + { + if cpuid.has_vmovdqa_atomic() { + $vmovdqa + } else { + $cmpxchg16b + } + } + #[cfg(not(target_feature = "sse"))] + { + $cmpxchg16b + } + } else { + fallback::$fallback + } + } + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + { + if cpuid.has_vmovdqa_atomic() { + $vmovdqa + } else { + $cmpxchg16b + } + } + }}; +} + +#[inline] +unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 { + // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. + // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html + // SGX doesn't support CPUID. + #[cfg(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + ))] + // SAFETY: the caller must uphold the safety contract. + // cfg guarantees that CMPXCHG16B is available at compile-time. + unsafe { + // cmpxchg16b is always SeqCst. + atomic_load_cmpxchg16b(src) + } + #[cfg(not(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + )))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + ifunc!(unsafe fn(src: *mut u128) -> u128 { + load_store_detect! { + vmovdqa = atomic_load_vmovdqa + cmpxchg16b = atomic_load_cmpxchg16b + // Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst. + fallback = atomic_load_seqcst + } + }) + } +} +#[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") +)] +#[inline] +unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 { + debug_assert!(src as usize % 16 == 0); + debug_assert_cmpxchg16b!(); + + // SAFETY: the caller must guarantee that `src` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports CMPXCHG16B. + // + // See cmpxchg16b function for more. + // + // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows + // omitting the storing of condition flags and avoid use of xchg to handle rbx. + unsafe { + // cmpxchg16b is always SeqCst. + let (out_lo, out_hi); + macro_rules! cmpxchg16b { + ($rdi:tt) => { + asm!( + // rbx is reserved by LLVM + "mov {rbx_tmp}, rbx", + "xor rbx, rbx", // zeroed rbx + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "mov rbx, {rbx_tmp}", // restore rbx + // set old/new args of cmpxchg16b to 0 (rbx is zeroed after saved to rbx_tmp, to avoid xchg) + rbx_tmp = out(reg) _, + in("rcx") 0_u64, + inout("rax") 0_u64 => out_lo, + inout("rdx") 0_u64 => out_hi, + in($rdi) src, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ) + }; + } + #[cfg(target_pointer_width = "32")] + cmpxchg16b!("edi"); + #[cfg(target_pointer_width = "64")] + cmpxchg16b!("rdi"); + U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole + } +} + +#[inline] +unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { + // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. + // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html + // SGX doesn't support CPUID. + #[cfg(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + ))] + // SAFETY: the caller must uphold the safety contract. + // cfg guarantees that CMPXCHG16B is available at compile-time. + unsafe { + // cmpxchg16b is always SeqCst. + let _ = order; + atomic_store_cmpxchg16b(dst, val); + } + #[cfg(not(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + )))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + #[cfg(target_feature = "sse")] + fn_alias! { + #[target_feature(enable = "avx")] + unsafe fn(dst: *mut u128, val: u128); + // atomic store by vmovdqa has at least release semantics. + atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release); + atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst); + } + match order { + // Relaxed and Release stores are equivalent in all implementations + // that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback). + // core::arch's cmpxchg16b will never called here. + Ordering::Relaxed | Ordering::Release => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + load_store_detect! { + vmovdqa = atomic_store_vmovdqa_non_seqcst + cmpxchg16b = atomic_store_cmpxchg16b + fallback = atomic_store_non_seqcst + } + }); + } + Ordering::SeqCst => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + load_store_detect! { + vmovdqa = atomic_store_vmovdqa_seqcst + cmpxchg16b = atomic_store_cmpxchg16b + fallback = atomic_store_seqcst + } + }); + } + _ => unreachable!("{:?}", order), + } + } +} +#[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") +)] +unsafe fn atomic_store_cmpxchg16b(dst: *mut u128, val: u128) { + // SAFETY: the caller must uphold the safety contract. + unsafe { + // cmpxchg16b is always SeqCst. + atomic_swap_cmpxchg16b(dst, val, Ordering::SeqCst); + } +} + +#[inline] +unsafe fn atomic_compare_exchange( + dst: *mut u128, + old: u128, + new: u128, + _success: Ordering, + _failure: Ordering, +) -> Result<u128, u128> { + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, that there are no concurrent non-atomic operations, + // and cfg guarantees that CMPXCHG16B is available at compile-time. + let (prev, ok) = unsafe { cmpxchg16b(dst, old, new) }; + #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))] + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no different kinds of concurrent accesses. + let (prev, ok) = unsafe { + ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool) { + if detect::detect().has_cmpxchg16b() { + cmpxchg16b + } else { + // Use SeqCst because cmpxchg16b is always SeqCst. + fallback::atomic_compare_exchange_seqcst + } + }) + }; + if ok { + Ok(prev) + } else { + Err(prev) + } +} + +// cmpxchg16b is always strong. +use atomic_compare_exchange as atomic_compare_exchange_weak; + +#[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] +use atomic_swap_cmpxchg16b as atomic_swap; +#[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") +)] +#[inline] +unsafe fn atomic_swap_cmpxchg16b(dst: *mut u128, val: u128, _order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + debug_assert_cmpxchg16b!(); + + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports CMPXCHG16B. + // + // See cmpxchg16b function for more. + // + // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows + // omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx. + // + // Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap. + unsafe { + // cmpxchg16b is always SeqCst. + let val = U128 { whole: val }; + let (mut prev_lo, mut prev_hi); + macro_rules! cmpxchg16b { + ($rdi:tt) => { + asm!( + // rbx is reserved by LLVM + "xchg {rbx_tmp}, rbx", + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + // + // This is based on the code generated for the first load in DW RMWs by LLVM. + // + // Note that the C++20 memory model does not allow mixed-sized atomic access, + // so we must use inline assembly to implement this. + // (i.e., byte-wise atomic based on the standard library's atomic types + // cannot be used here). + concat!("mov rax, qword ptr [", $rdi, "]"), + concat!("mov rdx, qword ptr [", $rdi, " + 8]"), + "2:", + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "jne 2b", + "mov rbx, {rbx_tmp}", // restore rbx + rbx_tmp = inout(reg) val.pair.lo => _, + in("rcx") val.pair.hi, + out("rax") prev_lo, + out("rdx") prev_hi, + in($rdi) dst, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ) + }; + } + #[cfg(target_pointer_width = "32")] + cmpxchg16b!("edi"); + #[cfg(target_pointer_width = "64")] + cmpxchg16b!("rdi"); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} + +/// Atomic RMW by CAS loop (3 arguments) +/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;` +/// +/// `$op` can use the following registers: +/// - rsi/r8 pair: val argument (read-only for `$op`) +/// - rax/rdx pair: previous value loaded (read-only for `$op`) +/// - rbx/rcx pair: new value that will be stored +// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows +// omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx. +macro_rules! atomic_rmw_cas_3 { + ($name:ident as $reexport_name:ident, $($op:tt)*) => { + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + use $name as $reexport_name; + #[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") + )] + #[inline] + unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + debug_assert_cmpxchg16b!(); + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports CMPXCHG16B. + // + // See cmpxchg16b function for more. + unsafe { + // cmpxchg16b is always SeqCst. + let val = U128 { whole: val }; + let (mut prev_lo, mut prev_hi); + macro_rules! cmpxchg16b { + ($rdi:tt) => { + asm!( + // rbx is reserved by LLVM + "mov {rbx_tmp}, rbx", + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + // + // This is based on the code generated for the first load in DW RMWs by LLVM. + // + // Note that the C++20 memory model does not allow mixed-sized atomic access, + // so we must use inline assembly to implement this. + // (i.e., byte-wise atomic based on the standard library's atomic types + // cannot be used here). + concat!("mov rax, qword ptr [", $rdi, "]"), + concat!("mov rdx, qword ptr [", $rdi, " + 8]"), + "2:", + $($op)* + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "jne 2b", + "mov rbx, {rbx_tmp}", // restore rbx + rbx_tmp = out(reg) _, + out("rcx") _, + out("rax") prev_lo, + out("rdx") prev_hi, + in($rdi) dst, + in("rsi") val.pair.lo, + in("r8") val.pair.hi, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ) + }; + } + #[cfg(target_pointer_width = "32")] + cmpxchg16b!("edi"); + #[cfg(target_pointer_width = "64")] + cmpxchg16b!("rdi"); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } + } + }; +} +/// Atomic RMW by CAS loop (2 arguments) +/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;` +/// +/// `$op` can use the following registers: +/// - rax/rdx pair: previous value loaded (read-only for `$op`) +/// - rbx/rcx pair: new value that will be stored +// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows +// omitting the storing of condition flags and avoid use of xchg to handle rbx. +macro_rules! atomic_rmw_cas_2 { + ($name:ident as $reexport_name:ident, $($op:tt)*) => { + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + use $name as $reexport_name; + #[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") + )] + #[inline] + unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + debug_assert_cmpxchg16b!(); + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports CMPXCHG16B. + // + // See cmpxchg16b function for more. + unsafe { + // cmpxchg16b is always SeqCst. + let (mut prev_lo, mut prev_hi); + macro_rules! cmpxchg16b { + ($rdi:tt) => { + asm!( + // rbx is reserved by LLVM + "mov {rbx_tmp}, rbx", + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + // + // This is based on the code generated for the first load in DW RMWs by LLVM. + // + // Note that the C++20 memory model does not allow mixed-sized atomic access, + // so we must use inline assembly to implement this. + // (i.e., byte-wise atomic based on the standard library's atomic types + // cannot be used here). + concat!("mov rax, qword ptr [", $rdi, "]"), + concat!("mov rdx, qword ptr [", $rdi, " + 8]"), + "2:", + $($op)* + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "jne 2b", + "mov rbx, {rbx_tmp}", // restore rbx + rbx_tmp = out(reg) _, + out("rcx") _, + out("rax") prev_lo, + out("rdx") prev_hi, + in($rdi) dst, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ) + }; + } + #[cfg(target_pointer_width = "32")] + cmpxchg16b!("edi"); + #[cfg(target_pointer_width = "64")] + cmpxchg16b!("rdi"); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } + } + }; +} + +atomic_rmw_cas_3! { + atomic_add_cmpxchg16b as atomic_add, + "mov rbx, rax", + "add rbx, rsi", + "mov rcx, rdx", + "adc rcx, r8", +} +atomic_rmw_cas_3! { + atomic_sub_cmpxchg16b as atomic_sub, + "mov rbx, rax", + "sub rbx, rsi", + "mov rcx, rdx", + "sbb rcx, r8", +} +atomic_rmw_cas_3! { + atomic_and_cmpxchg16b as atomic_and, + "mov rbx, rax", + "and rbx, rsi", + "mov rcx, rdx", + "and rcx, r8", +} +atomic_rmw_cas_3! { + atomic_nand_cmpxchg16b as atomic_nand, + "mov rbx, rax", + "and rbx, rsi", + "not rbx", + "mov rcx, rdx", + "and rcx, r8", + "not rcx", +} +atomic_rmw_cas_3! { + atomic_or_cmpxchg16b as atomic_or, + "mov rbx, rax", + "or rbx, rsi", + "mov rcx, rdx", + "or rcx, r8", +} +atomic_rmw_cas_3! { + atomic_xor_cmpxchg16b as atomic_xor, + "mov rbx, rax", + "xor rbx, rsi", + "mov rcx, rdx", + "xor rcx, r8", +} + +atomic_rmw_cas_2! { + atomic_not_cmpxchg16b as atomic_not, + "mov rbx, rax", + "not rbx", + "mov rcx, rdx", + "not rcx", +} +atomic_rmw_cas_2! { + atomic_neg_cmpxchg16b as atomic_neg, + "mov rbx, rax", + "neg rbx", + "mov rcx, 0", + "sbb rcx, rdx", +} + +atomic_rmw_cas_3! { + atomic_max_cmpxchg16b as atomic_max, + "cmp rsi, rax", + "mov rcx, r8", + "sbb rcx, rdx", + "mov rcx, r8", + "cmovl rcx, rdx", + "mov rbx, rsi", + "cmovl rbx, rax", +} +atomic_rmw_cas_3! { + atomic_umax_cmpxchg16b as atomic_umax, + "cmp rsi, rax", + "mov rcx, r8", + "sbb rcx, rdx", + "mov rcx, r8", + "cmovb rcx, rdx", + "mov rbx, rsi", + "cmovb rbx, rax", +} +atomic_rmw_cas_3! { + atomic_min_cmpxchg16b as atomic_min, + "cmp rsi, rax", + "mov rcx, r8", + "sbb rcx, rdx", + "mov rcx, r8", + "cmovge rcx, rdx", + "mov rbx, rsi", + "cmovge rbx, rax", +} +atomic_rmw_cas_3! { + atomic_umin_cmpxchg16b as atomic_umin, + "cmp rsi, rax", + "mov rcx, r8", + "sbb rcx, rdx", + "mov rcx, r8", + "cmovae rcx, rdx", + "mov rbx, rsi", + "cmovae rbx, rax", +} + +macro_rules! atomic_rmw_with_ifunc { + ( + unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?; + cmpxchg16b = $cmpxchg16b_fn:ident; + fallback = $seqcst_fallback_fn:ident; + ) => { + #[cfg(not(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + )))] + #[inline] + unsafe fn $name($($arg)*, _order: Ordering) $(-> $ret_ty)? { + fn_alias! { + #[cfg_attr( + not(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + )), + target_feature(enable = "cmpxchg16b") + )] + unsafe fn($($arg)*) $(-> $ret_ty)?; + // cmpxchg16b is always SeqCst. + cmpxchg16b_seqcst_fn = $cmpxchg16b_fn(Ordering::SeqCst); + } + // SAFETY: the caller must uphold the safety contract. + // we only calls cmpxchg16b_fn if cmpxchg16b is available. + unsafe { + ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { + if detect::detect().has_cmpxchg16b() { + cmpxchg16b_seqcst_fn + } else { + // Use SeqCst because cmpxchg16b is always SeqCst. + fallback::$seqcst_fallback_fn + } + }) + } + } + }; +} + +atomic_rmw_with_ifunc! { + unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_swap_cmpxchg16b; + fallback = atomic_swap_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_add_cmpxchg16b; + fallback = atomic_add_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_sub_cmpxchg16b; + fallback = atomic_sub_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_and_cmpxchg16b; + fallback = atomic_and_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_nand_cmpxchg16b; + fallback = atomic_nand_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_or_cmpxchg16b; + fallback = atomic_or_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_xor_cmpxchg16b; + fallback = atomic_xor_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_max_cmpxchg16b; + fallback = atomic_max_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_umax_cmpxchg16b; + fallback = atomic_umax_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_min_cmpxchg16b; + fallback = atomic_min_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_umin_cmpxchg16b; + fallback = atomic_umin_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_not(dst: *mut u128) -> u128; + cmpxchg16b = atomic_not_cmpxchg16b; + fallback = atomic_not_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_neg(dst: *mut u128) -> u128; + cmpxchg16b = atomic_neg_cmpxchg16b; + fallback = atomic_neg_seqcst; +} + +#[inline] +fn is_lock_free() -> bool { + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + { + // CMPXCHG16B is available at compile-time. + true + } + #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))] + { + detect::detect().has_cmpxchg16b() + } +} +const IS_ALWAYS_LOCK_FREE: bool = + cfg!(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")); + +atomic128!(AtomicI128, i128, atomic_max, atomic_min); +atomic128!(AtomicU128, u128, atomic_umax, atomic_umin); + +#[allow(clippy::undocumented_unsafe_blocks, clippy::wildcard_imports)] +#[cfg(test)] +mod tests { + use super::*; + + test_atomic_int!(i128); + test_atomic_int!(u128); + + // load/store/swap implementation is not affected by signedness, so it is + // enough to test only unsigned types. + stress_test!(u128); +} |