// SPDX-License-Identifier: Apache-2.0 OR MIT // Atomic{I,U}128 implementation on x86_64 using CMPXCHG16B (DWCAS). // // Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use // this module and use intrinsics.rs instead. // // Refs: // - x86 and amd64 instruction reference https://www.felixcloutier.com/x86 // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit // // Generated asm: // - x86_64 (+cmpxchg16b) https://godbolt.org/z/55n54WeKr include!("macros.rs"); #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))] #[path = "../fallback/outline_atomics.rs"] mod fallback; #[cfg(not(portable_atomic_no_outline_atomics))] #[cfg(not(target_env = "sgx"))] #[path = "detect/x86_64.rs"] mod detect; #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; use core::sync::atomic::Ordering; use crate::utils::{Pair, U128}; // Asserts that the function is called in the correct context. macro_rules! debug_assert_cmpxchg16b { () => { #[cfg(not(any( target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b", )))] { debug_assert!(detect::detect().has_cmpxchg16b()); } }; } #[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] #[cfg(target_feature = "sse")] macro_rules! debug_assert_vmovdqa_atomic { () => {{ debug_assert_cmpxchg16b!(); debug_assert!(detect::detect().has_vmovdqa_atomic()); }}; } #[allow(unused_macros)] #[cfg(target_pointer_width = "32")] macro_rules! ptr_modifier { () => { ":e" }; } #[allow(unused_macros)] #[cfg(target_pointer_width = "64")] macro_rules! ptr_modifier { () => { "" }; } #[cfg_attr( not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), target_feature(enable = "cmpxchg16b") )] #[inline] unsafe fn cmpxchg16b(dst: *mut u128, old: u128, new: u128) -> (u128, bool) { debug_assert!(dst as usize % 16 == 0); debug_assert_cmpxchg16b!(); // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned (required by CMPXCHG16B), that there are no // concurrent non-atomic operations, and that the CPU supports CMPXCHG16B. // // If the value at `dst` (destination operand) and rdx:rax are equal, the // 128-bit value in rcx:rbx is stored in the `dst`, otherwise the value at // `dst` is loaded to rdx:rax. // // The ZF flag is set if the value at `dst` and rdx:rax are equal, // otherwise it is cleared. Other flags are unaffected. // // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b unsafe { // cmpxchg16b is always SeqCst. let r: u8; let old = U128 { whole: old }; let new = U128 { whole: new }; let (prev_lo, prev_hi); macro_rules! cmpxchg16b { ($rdi:tt) => { asm!( // rbx is reserved by LLVM "xchg {rbx_tmp}, rbx", concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), "sete r8b", "mov rbx, {rbx_tmp}", // restore rbx rbx_tmp = inout(reg) new.pair.lo => _, in("rcx") new.pair.hi, inout("rax") old.pair.lo => prev_lo, inout("rdx") old.pair.hi => prev_hi, in($rdi) dst, out("r8b") r, // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. options(nostack), ) }; } #[cfg(target_pointer_width = "32")] cmpxchg16b!("edi"); #[cfg(target_pointer_width = "64")] cmpxchg16b!("rdi"); (U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole, r != 0) } } // VMOVDQA is atomic on Intel and AMD CPUs with AVX. // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details. // // Refs: https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 // // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html #[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] #[cfg(target_feature = "sse")] #[target_feature(enable = "avx")] #[inline] unsafe fn atomic_load_vmovdqa(src: *mut u128) -> u128 { debug_assert!(src as usize % 16 == 0); debug_assert_vmovdqa_atomic!(); // SAFETY: the caller must uphold the safety contract. // // atomic load by vmovdqa is always SeqCst. unsafe { let out: core::arch::x86_64::__m128; asm!( concat!("vmovdqa {out}, xmmword ptr [{src", ptr_modifier!(), "}]"), src = in(reg) src, out = out(xmm_reg) out, options(nostack, preserves_flags), ); core::mem::transmute(out) } } #[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] #[cfg(target_feature = "sse")] #[target_feature(enable = "avx")] #[inline] unsafe fn atomic_store_vmovdqa(dst: *mut u128, val: u128, order: Ordering) { debug_assert!(dst as usize % 16 == 0); debug_assert_vmovdqa_atomic!(); // SAFETY: the caller must uphold the safety contract. unsafe { let val: core::arch::x86_64::__m128 = core::mem::transmute(val); match order { // Relaxed and Release stores are equivalent. Ordering::Relaxed | Ordering::Release => { asm!( concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"), dst = in(reg) dst, val = in(xmm_reg) val, options(nostack, preserves_flags), ); } Ordering::SeqCst => { asm!( concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"), "mfence", dst = in(reg) dst, val = in(xmm_reg) val, options(nostack, preserves_flags), ); } _ => unreachable!("{:?}", order), } } } #[cfg(not(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), )))] macro_rules! load_store_detect { ( vmovdqa = $vmovdqa:ident cmpxchg16b = $cmpxchg16b:ident fallback = $fallback:ident ) => {{ let cpuid = detect::detect(); #[cfg(not(any( target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b", )))] { // Check CMPXCHG16B first to prevent mixing atomic and non-atomic access. if cpuid.has_cmpxchg16b() { // We do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. #[cfg(target_feature = "sse")] { if cpuid.has_vmovdqa_atomic() { $vmovdqa } else { $cmpxchg16b } } #[cfg(not(target_feature = "sse"))] { $cmpxchg16b } } else { fallback::$fallback } } #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] { if cpuid.has_vmovdqa_atomic() { $vmovdqa } else { $cmpxchg16b } } }}; } #[inline] unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 { // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html // SGX doesn't support CPUID. #[cfg(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), ))] // SAFETY: the caller must uphold the safety contract. // cfg guarantees that CMPXCHG16B is available at compile-time. unsafe { // cmpxchg16b is always SeqCst. atomic_load_cmpxchg16b(src) } #[cfg(not(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), )))] // SAFETY: the caller must uphold the safety contract. unsafe { ifunc!(unsafe fn(src: *mut u128) -> u128 { load_store_detect! { vmovdqa = atomic_load_vmovdqa cmpxchg16b = atomic_load_cmpxchg16b // Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst. fallback = atomic_load_seqcst } }) } } #[cfg_attr( not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), target_feature(enable = "cmpxchg16b") )] #[inline] unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 { debug_assert!(src as usize % 16 == 0); debug_assert_cmpxchg16b!(); // SAFETY: the caller must guarantee that `src` is valid for both writes and // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. // cfg guarantees that the CPU supports CMPXCHG16B. // // See cmpxchg16b function for more. // // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows // omitting the storing of condition flags and avoid use of xchg to handle rbx. unsafe { // cmpxchg16b is always SeqCst. let (out_lo, out_hi); macro_rules! cmpxchg16b { ($rdi:tt) => { asm!( // rbx is reserved by LLVM "mov {rbx_tmp}, rbx", "xor rbx, rbx", // zeroed rbx concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), "mov rbx, {rbx_tmp}", // restore rbx // set old/new args of cmpxchg16b to 0 (rbx is zeroed after saved to rbx_tmp, to avoid xchg) rbx_tmp = out(reg) _, in("rcx") 0_u64, inout("rax") 0_u64 => out_lo, inout("rdx") 0_u64 => out_hi, in($rdi) src, // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. options(nostack), ) }; } #[cfg(target_pointer_width = "32")] cmpxchg16b!("edi"); #[cfg(target_pointer_width = "64")] cmpxchg16b!("rdi"); U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole } } #[inline] unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html // SGX doesn't support CPUID. #[cfg(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), ))] // SAFETY: the caller must uphold the safety contract. // cfg guarantees that CMPXCHG16B is available at compile-time. unsafe { // cmpxchg16b is always SeqCst. let _ = order; atomic_store_cmpxchg16b(dst, val); } #[cfg(not(all( any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), )))] // SAFETY: the caller must uphold the safety contract. unsafe { #[cfg(target_feature = "sse")] fn_alias! { #[target_feature(enable = "avx")] unsafe fn(dst: *mut u128, val: u128); // atomic store by vmovdqa has at least release semantics. atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release); atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst); } match order { // Relaxed and Release stores are equivalent in all implementations // that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback). // core::arch's cmpxchg16b will never called here. Ordering::Relaxed | Ordering::Release => { ifunc!(unsafe fn(dst: *mut u128, val: u128) { load_store_detect! { vmovdqa = atomic_store_vmovdqa_non_seqcst cmpxchg16b = atomic_store_cmpxchg16b fallback = atomic_store_non_seqcst } }); } Ordering::SeqCst => { ifunc!(unsafe fn(dst: *mut u128, val: u128) { load_store_detect! { vmovdqa = atomic_store_vmovdqa_seqcst cmpxchg16b = atomic_store_cmpxchg16b fallback = atomic_store_seqcst } }); } _ => unreachable!("{:?}", order), } } } #[cfg_attr( not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), target_feature(enable = "cmpxchg16b") )] unsafe fn atomic_store_cmpxchg16b(dst: *mut u128, val: u128) { // SAFETY: the caller must uphold the safety contract. unsafe { // cmpxchg16b is always SeqCst. atomic_swap_cmpxchg16b(dst, val, Ordering::SeqCst); } } #[inline] unsafe fn atomic_compare_exchange( dst: *mut u128, old: u128, new: u128, _success: Ordering, _failure: Ordering, ) -> Result { #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, that there are no concurrent non-atomic operations, // and cfg guarantees that CMPXCHG16B is available at compile-time. let (prev, ok) = unsafe { cmpxchg16b(dst, old, new) }; #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))] // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, and that there are no different kinds of concurrent accesses. let (prev, ok) = unsafe { ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool) { if detect::detect().has_cmpxchg16b() { cmpxchg16b } else { // Use SeqCst because cmpxchg16b is always SeqCst. fallback::atomic_compare_exchange_seqcst } }) }; if ok { Ok(prev) } else { Err(prev) } } // cmpxchg16b is always strong. use atomic_compare_exchange as atomic_compare_exchange_weak; #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] use atomic_swap_cmpxchg16b as atomic_swap; #[cfg_attr( not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), target_feature(enable = "cmpxchg16b") )] #[inline] unsafe fn atomic_swap_cmpxchg16b(dst: *mut u128, val: u128, _order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); debug_assert_cmpxchg16b!(); // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. // cfg guarantees that the CPU supports CMPXCHG16B. // // See cmpxchg16b function for more. // // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows // omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx. // // Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap. unsafe { // cmpxchg16b is always SeqCst. let val = U128 { whole: val }; let (mut prev_lo, mut prev_hi); macro_rules! cmpxchg16b { ($rdi:tt) => { asm!( // rbx is reserved by LLVM "xchg {rbx_tmp}, rbx", // This is not single-copy atomic reads, but this is ok because subsequent // CAS will check for consistency. // // This is based on the code generated for the first load in DW RMWs by LLVM. // // Note that the C++20 memory model does not allow mixed-sized atomic access, // so we must use inline assembly to implement this. // (i.e., byte-wise atomic based on the standard library's atomic types // cannot be used here). concat!("mov rax, qword ptr [", $rdi, "]"), concat!("mov rdx, qword ptr [", $rdi, " + 8]"), "2:", concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), "jne 2b", "mov rbx, {rbx_tmp}", // restore rbx rbx_tmp = inout(reg) val.pair.lo => _, in("rcx") val.pair.hi, out("rax") prev_lo, out("rdx") prev_hi, in($rdi) dst, // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. options(nostack), ) }; } #[cfg(target_pointer_width = "32")] cmpxchg16b!("edi"); #[cfg(target_pointer_width = "64")] cmpxchg16b!("rdi"); U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole } } /// Atomic RMW by CAS loop (3 arguments) /// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;` /// /// `$op` can use the following registers: /// - rsi/r8 pair: val argument (read-only for `$op`) /// - rax/rdx pair: previous value loaded (read-only for `$op`) /// - rbx/rcx pair: new value that will be stored // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows // omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx. macro_rules! atomic_rmw_cas_3 { ($name:ident as $reexport_name:ident, $($op:tt)*) => { #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] use $name as $reexport_name; #[cfg_attr( not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), target_feature(enable = "cmpxchg16b") )] #[inline] unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); debug_assert_cmpxchg16b!(); // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. // cfg guarantees that the CPU supports CMPXCHG16B. // // See cmpxchg16b function for more. unsafe { // cmpxchg16b is always SeqCst. let val = U128 { whole: val }; let (mut prev_lo, mut prev_hi); macro_rules! cmpxchg16b { ($rdi:tt) => { asm!( // rbx is reserved by LLVM "mov {rbx_tmp}, rbx", // This is not single-copy atomic reads, but this is ok because subsequent // CAS will check for consistency. // // This is based on the code generated for the first load in DW RMWs by LLVM. // // Note that the C++20 memory model does not allow mixed-sized atomic access, // so we must use inline assembly to implement this. // (i.e., byte-wise atomic based on the standard library's atomic types // cannot be used here). concat!("mov rax, qword ptr [", $rdi, "]"), concat!("mov rdx, qword ptr [", $rdi, " + 8]"), "2:", $($op)* concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), "jne 2b", "mov rbx, {rbx_tmp}", // restore rbx rbx_tmp = out(reg) _, out("rcx") _, out("rax") prev_lo, out("rdx") prev_hi, in($rdi) dst, in("rsi") val.pair.lo, in("r8") val.pair.hi, // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. options(nostack), ) }; } #[cfg(target_pointer_width = "32")] cmpxchg16b!("edi"); #[cfg(target_pointer_width = "64")] cmpxchg16b!("rdi"); U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole } } }; } /// Atomic RMW by CAS loop (2 arguments) /// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;` /// /// `$op` can use the following registers: /// - rax/rdx pair: previous value loaded (read-only for `$op`) /// - rbx/rcx pair: new value that will be stored // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows // omitting the storing of condition flags and avoid use of xchg to handle rbx. macro_rules! atomic_rmw_cas_2 { ($name:ident as $reexport_name:ident, $($op:tt)*) => { #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] use $name as $reexport_name; #[cfg_attr( not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), target_feature(enable = "cmpxchg16b") )] #[inline] unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); debug_assert_cmpxchg16b!(); // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. // cfg guarantees that the CPU supports CMPXCHG16B. // // See cmpxchg16b function for more. unsafe { // cmpxchg16b is always SeqCst. let (mut prev_lo, mut prev_hi); macro_rules! cmpxchg16b { ($rdi:tt) => { asm!( // rbx is reserved by LLVM "mov {rbx_tmp}, rbx", // This is not single-copy atomic reads, but this is ok because subsequent // CAS will check for consistency. // // This is based on the code generated for the first load in DW RMWs by LLVM. // // Note that the C++20 memory model does not allow mixed-sized atomic access, // so we must use inline assembly to implement this. // (i.e., byte-wise atomic based on the standard library's atomic types // cannot be used here). concat!("mov rax, qword ptr [", $rdi, "]"), concat!("mov rdx, qword ptr [", $rdi, " + 8]"), "2:", $($op)* concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), "jne 2b", "mov rbx, {rbx_tmp}", // restore rbx rbx_tmp = out(reg) _, out("rcx") _, out("rax") prev_lo, out("rdx") prev_hi, in($rdi) dst, // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. options(nostack), ) }; } #[cfg(target_pointer_width = "32")] cmpxchg16b!("edi"); #[cfg(target_pointer_width = "64")] cmpxchg16b!("rdi"); U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole } } }; } atomic_rmw_cas_3! { atomic_add_cmpxchg16b as atomic_add, "mov rbx, rax", "add rbx, rsi", "mov rcx, rdx", "adc rcx, r8", } atomic_rmw_cas_3! { atomic_sub_cmpxchg16b as atomic_sub, "mov rbx, rax", "sub rbx, rsi", "mov rcx, rdx", "sbb rcx, r8", } atomic_rmw_cas_3! { atomic_and_cmpxchg16b as atomic_and, "mov rbx, rax", "and rbx, rsi", "mov rcx, rdx", "and rcx, r8", } atomic_rmw_cas_3! { atomic_nand_cmpxchg16b as atomic_nand, "mov rbx, rax", "and rbx, rsi", "not rbx", "mov rcx, rdx", "and rcx, r8", "not rcx", } atomic_rmw_cas_3! { atomic_or_cmpxchg16b as atomic_or, "mov rbx, rax", "or rbx, rsi", "mov rcx, rdx", "or rcx, r8", } atomic_rmw_cas_3! { atomic_xor_cmpxchg16b as atomic_xor, "mov rbx, rax", "xor rbx, rsi", "mov rcx, rdx", "xor rcx, r8", } atomic_rmw_cas_2! { atomic_not_cmpxchg16b as atomic_not, "mov rbx, rax", "not rbx", "mov rcx, rdx", "not rcx", } atomic_rmw_cas_2! { atomic_neg_cmpxchg16b as atomic_neg, "mov rbx, rax", "neg rbx", "mov rcx, 0", "sbb rcx, rdx", } atomic_rmw_cas_3! { atomic_max_cmpxchg16b as atomic_max, "cmp rsi, rax", "mov rcx, r8", "sbb rcx, rdx", "mov rcx, r8", "cmovl rcx, rdx", "mov rbx, rsi", "cmovl rbx, rax", } atomic_rmw_cas_3! { atomic_umax_cmpxchg16b as atomic_umax, "cmp rsi, rax", "mov rcx, r8", "sbb rcx, rdx", "mov rcx, r8", "cmovb rcx, rdx", "mov rbx, rsi", "cmovb rbx, rax", } atomic_rmw_cas_3! { atomic_min_cmpxchg16b as atomic_min, "cmp rsi, rax", "mov rcx, r8", "sbb rcx, rdx", "mov rcx, r8", "cmovge rcx, rdx", "mov rbx, rsi", "cmovge rbx, rax", } atomic_rmw_cas_3! { atomic_umin_cmpxchg16b as atomic_umin, "cmp rsi, rax", "mov rcx, r8", "sbb rcx, rdx", "mov rcx, r8", "cmovae rcx, rdx", "mov rbx, rsi", "cmovae rbx, rax", } macro_rules! atomic_rmw_with_ifunc { ( unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?; cmpxchg16b = $cmpxchg16b_fn:ident; fallback = $seqcst_fallback_fn:ident; ) => { #[cfg(not(any( target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b", )))] #[inline] unsafe fn $name($($arg)*, _order: Ordering) $(-> $ret_ty)? { fn_alias! { #[cfg_attr( not(any( target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b", )), target_feature(enable = "cmpxchg16b") )] unsafe fn($($arg)*) $(-> $ret_ty)?; // cmpxchg16b is always SeqCst. cmpxchg16b_seqcst_fn = $cmpxchg16b_fn(Ordering::SeqCst); } // SAFETY: the caller must uphold the safety contract. // we only calls cmpxchg16b_fn if cmpxchg16b is available. unsafe { ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { if detect::detect().has_cmpxchg16b() { cmpxchg16b_seqcst_fn } else { // Use SeqCst because cmpxchg16b is always SeqCst. fallback::$seqcst_fallback_fn } }) } } }; } atomic_rmw_with_ifunc! { unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_swap_cmpxchg16b; fallback = atomic_swap_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_add_cmpxchg16b; fallback = atomic_add_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_sub_cmpxchg16b; fallback = atomic_sub_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_and_cmpxchg16b; fallback = atomic_and_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_nand_cmpxchg16b; fallback = atomic_nand_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_or_cmpxchg16b; fallback = atomic_or_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_xor_cmpxchg16b; fallback = atomic_xor_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_max_cmpxchg16b; fallback = atomic_max_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_umax_cmpxchg16b; fallback = atomic_umax_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_min_cmpxchg16b; fallback = atomic_min_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128; cmpxchg16b = atomic_umin_cmpxchg16b; fallback = atomic_umin_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_not(dst: *mut u128) -> u128; cmpxchg16b = atomic_not_cmpxchg16b; fallback = atomic_not_seqcst; } atomic_rmw_with_ifunc! { unsafe fn atomic_neg(dst: *mut u128) -> u128; cmpxchg16b = atomic_neg_cmpxchg16b; fallback = atomic_neg_seqcst; } #[inline] fn is_lock_free() -> bool { #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] { // CMPXCHG16B is available at compile-time. true } #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))] { detect::detect().has_cmpxchg16b() } } const IS_ALWAYS_LOCK_FREE: bool = cfg!(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")); atomic128!(AtomicI128, i128, atomic_max, atomic_min); atomic128!(AtomicU128, u128, atomic_umax, atomic_umin); #[allow(clippy::undocumented_unsafe_blocks, clippy::wildcard_imports)] #[cfg(test)] mod tests { use super::*; test_atomic_int!(i128); test_atomic_int!(u128); // load/store/swap implementation is not affected by signedness, so it is // enough to test only unsigned types. stress_test!(u128); }