40 files changed, 20114 insertions, 0 deletions
diff --git a/vendor/portable-atomic/src/cfgs.rs b/vendor/portable-atomic/src/cfgs.rs
new file mode 100644
index 0000000..6722b98
--- /dev/null
+++ b/vendor/portable-atomic/src/cfgs.rs
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+#![allow(missing_docs)]
+
+#[cfg(not(all(
+    portable_atomic_no_atomic_load_store,
+    not(any(
+        target_arch = "avr",
+        target_arch = "msp430",
+        target_arch = "riscv32",
+        target_arch = "riscv64",
+        feature = "critical-section",
+    )),
+)))]
+#[macro_use]
+mod atomic_8_16_macros {
+    #[macro_export]
+    macro_rules! cfg_has_atomic_8 {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+    #[macro_export]
+    macro_rules! cfg_no_atomic_8 {
+        ($($tt:tt)*) => {};
+    }
+    #[macro_export]
+    macro_rules! cfg_has_atomic_16 {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+    #[macro_export]
+    macro_rules! cfg_no_atomic_16 {
+        ($($tt:tt)*) => {};
+    }
+}
+#[cfg(all(
+    portable_atomic_no_atomic_load_store,
+    not(any(
+        target_arch = "avr",
+        target_arch = "msp430",
+        target_arch = "riscv32",
+        target_arch = "riscv64",
+        feature = "critical-section",
+    )),
+))]
+#[macro_use]
+mod atomic_8_16_macros {
+    #[macro_export]
+    macro_rules! cfg_has_atomic_8 {
+        ($($tt:tt)*) => {};
+    }
+    #[macro_export]
+    macro_rules! cfg_no_atomic_8 {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+    #[macro_export]
+    macro_rules! cfg_has_atomic_16 {
+        ($($tt:tt)*) => {};
+    }
+    #[macro_export]
+    macro_rules! cfg_no_atomic_16 {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+}
+
+#[cfg(all(
+    any(not(target_pointer_width = "16"), feature = "fallback"),
+    not(all(
+        portable_atomic_no_atomic_load_store,
+        not(any(
+            target_arch = "avr",
+            target_arch = "msp430",
+            target_arch = "riscv32",
+            target_arch = "riscv64",
+            feature = "critical-section",
+        )),
+    )),
+))]
+#[macro_use]
+mod atomic_32_macros {
+    #[macro_export]
+    macro_rules! cfg_has_atomic_32 {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+    #[macro_export]
+    macro_rules! cfg_no_atomic_32 {
+        ($($tt:tt)*) => {};
+    }
+}
+#[cfg(not(all(
+    any(not(target_pointer_width = "16"), feature = "fallback"),
+    not(all(
+        portable_atomic_no_atomic_load_store,
+        not(any(
+            target_arch = "avr",
+            target_arch = "msp430",
+            target_arch = "riscv32",
+            target_arch = "riscv64",
+            feature = "critical-section",
+        )),
+    )),
+)))]
+#[macro_use]
+mod atomic_32_macros {
+    #[macro_export]
+    macro_rules! cfg_has_atomic_32 {
+        ($($tt:tt)*) => {};
+    }
+    #[macro_export]
+    macro_rules! cfg_no_atomic_32 {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+}
+
+#[cfg_attr(
+    portable_atomic_no_cfg_target_has_atomic,
+    cfg(any(
+        all(
+            feature = "fallback",
+            any(
+                not(portable_atomic_no_atomic_cas),
+                portable_atomic_unsafe_assume_single_core,
+                feature = "critical-section",
+                target_arch = "avr",
+                target_arch = "msp430",
+            ),
+        ),
+        not(portable_atomic_no_atomic_64),
+        not(any(target_pointer_width = "16", target_pointer_width = "32")),
+    ))
+)]
+#[cfg_attr(
+    not(portable_atomic_no_cfg_target_has_atomic),
+    cfg(any(
+        all(
+            feature = "fallback",
+            any(
+                target_has_atomic = "ptr",
+                portable_atomic_unsafe_assume_single_core,
+                feature = "critical-section",
+                target_arch = "avr",
+                target_arch = "msp430",
+            ),
+        ),
+        target_has_atomic = "64",
+        not(any(target_pointer_width = "16", target_pointer_width = "32")),
+    ))
+)]
+#[macro_use]
+mod atomic_64_macros {
+    #[macro_export]
+    macro_rules! cfg_has_atomic_64 {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+    #[macro_export]
+    macro_rules! cfg_no_atomic_64 {
+        ($($tt:tt)*) => {};
+    }
+}
+#[cfg_attr(
+    portable_atomic_no_cfg_target_has_atomic,
+    cfg(not(any(
+        all(
+            feature = "fallback",
+            any(
+                not(portable_atomic_no_atomic_cas),
+                portable_atomic_unsafe_assume_single_core,
+                feature = "critical-section",
+                target_arch = "avr",
+                target_arch = "msp430",
+            ),
+        ),
+        not(portable_atomic_no_atomic_64),
+        not(any(target_pointer_width = "16", target_pointer_width = "32")),
+    )))
+)]
+#[cfg_attr(
+    not(portable_atomic_no_cfg_target_has_atomic),
+    cfg(not(any(
+        all(
+            feature = "fallback",
+            any(
+                target_has_atomic = "ptr",
+                portable_atomic_unsafe_assume_single_core,
+                feature = "critical-section",
+                target_arch = "avr",
+                target_arch = "msp430",
+            ),
+        ),
+        target_has_atomic = "64",
+        not(any(target_pointer_width = "16", target_pointer_width = "32")),
+    )))
+)]
+#[macro_use]
+mod atomic_64_macros {
+    #[macro_export]
+    macro_rules! cfg_has_atomic_64 {
+        ($($tt:tt)*) => {};
+    }
+    #[macro_export]
+    macro_rules! cfg_no_atomic_64 {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+}
+
+#[cfg_attr(
+    not(feature = "fallback"),
+    cfg(any(
+        all(
+            target_arch = "aarch64",
+            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
+        ),
+        all(
+            target_arch = "x86_64",
+            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
+            any(
+                target_feature = "cmpxchg16b",
+                portable_atomic_target_feature = "cmpxchg16b",
+                all(
+                    feature = "fallback",
+                    not(portable_atomic_no_cmpxchg16b_target_feature),
+                    not(portable_atomic_no_outline_atomics),
+                    not(any(target_env = "sgx", miri)),
+                ),
+            ),
+        ),
+        all(
+            target_arch = "powerpc64",
+            portable_atomic_unstable_asm_experimental_arch,
+            any(
+                target_feature = "quadword-atomics",
+                portable_atomic_target_feature = "quadword-atomics",
+                all(
+                    feature = "fallback",
+                    not(portable_atomic_no_outline_atomics),
+                    portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default
+                    any(
+                        all(
+                            target_os = "linux",
+                            any(
+                                target_env = "gnu",
+                                all(
+                                    any(target_env = "musl", target_env = "ohos"),
+                                    not(target_feature = "crt-static"),
+                                ),
+                                portable_atomic_outline_atomics,
+                            ),
+                        ),
+                        target_os = "android",
+                        target_os = "freebsd",
+                    ),
+                    not(any(miri, portable_atomic_sanitize_thread)),
+                ),
+            ),
+        ),
+        all(target_arch = "s390x", portable_atomic_unstable_asm_experimental_arch),
+    ))
+)]
+#[cfg_attr(
+    all(feature = "fallback", portable_atomic_no_cfg_target_has_atomic),
+    cfg(any(
+        not(portable_atomic_no_atomic_cas),
+        portable_atomic_unsafe_assume_single_core,
+        feature = "critical-section",
+        target_arch = "avr",
+        target_arch = "msp430",
+    ))
+)]
+#[cfg_attr(
+    all(feature = "fallback", not(portable_atomic_no_cfg_target_has_atomic)),
+    cfg(any(
+        target_has_atomic = "ptr",
+        portable_atomic_unsafe_assume_single_core,
+        feature = "critical-section",
+        target_arch = "avr",
+        target_arch = "msp430",
+    ))
+)]
+#[macro_use]
+mod atomic_128_macros {
+    #[macro_export]
+    macro_rules! cfg_has_atomic_128 {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+    #[macro_export]
+    macro_rules! cfg_no_atomic_128 {
+        ($($tt:tt)*) => {};
+    }
+}
+#[cfg_attr(
+    not(feature = "fallback"),
+    cfg(not(any(
+        all(
+            target_arch = "aarch64",
+            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
+        ),
+        all(
+            target_arch = "x86_64",
+            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
+            any(
+                target_feature = "cmpxchg16b",
+                portable_atomic_target_feature = "cmpxchg16b",
+                all(
+                    feature = "fallback",
+                    not(portable_atomic_no_cmpxchg16b_target_feature),
+                    not(portable_atomic_no_outline_atomics),
+                    not(any(target_env = "sgx", miri)),
+                ),
+            ),
+        ),
+        all(
+            target_arch = "powerpc64",
+            portable_atomic_unstable_asm_experimental_arch,
+            any(
+                target_feature = "quadword-atomics",
+                portable_atomic_target_feature = "quadword-atomics",
+                all(
+                    feature = "fallback",
+                    not(portable_atomic_no_outline_atomics),
+                    portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default
+                    any(
+                        all(
+                            target_os = "linux",
+                            any(
+                                target_env = "gnu",
+                                all(
+                                    any(target_env = "musl", target_env = "ohos"),
+                                    not(target_feature = "crt-static"),
+                                ),
+                                portable_atomic_outline_atomics,
+                            ),
+                        ),
+                        target_os = "android",
+                        target_os = "freebsd",
+                    ),
+                    not(any(miri, portable_atomic_sanitize_thread)),
+                ),
+            ),
+        ),
+        all(target_arch = "s390x", portable_atomic_unstable_asm_experimental_arch),
+    )))
+)]
+#[cfg_attr(
+    all(feature = "fallback", portable_atomic_no_cfg_target_has_atomic),
+    cfg(not(any(
+        not(portable_atomic_no_atomic_cas),
+        portable_atomic_unsafe_assume_single_core,
+        feature = "critical-section",
+        target_arch = "avr",
+        target_arch = "msp430",
+    )))
+)]
+#[cfg_attr(
+    all(feature = "fallback", not(portable_atomic_no_cfg_target_has_atomic)),
+    cfg(not(any(
+        target_has_atomic = "ptr",
+        portable_atomic_unsafe_assume_single_core,
+        feature = "critical-section",
+        target_arch = "avr",
+        target_arch = "msp430",
+    )))
+)]
+#[macro_use]
+mod atomic_128_macros {
+    #[macro_export]
+    macro_rules! cfg_has_atomic_128 {
+        ($($tt:tt)*) => {};
+    }
+    #[macro_export]
+    macro_rules! cfg_no_atomic_128 {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+}
+
+#[cfg_attr(
+    portable_atomic_no_cfg_target_has_atomic,
+    cfg(any(
+        not(portable_atomic_no_atomic_cas),
+        portable_atomic_unsafe_assume_single_core,
+        feature = "critical-section",
+        target_arch = "avr",
+        target_arch = "msp430",
+    ))
+)]
+#[cfg_attr(
+    not(portable_atomic_no_cfg_target_has_atomic),
+    cfg(any(
+        target_has_atomic = "ptr",
+        portable_atomic_unsafe_assume_single_core,
+        feature = "critical-section",
+        target_arch = "avr",
+        target_arch = "msp430",
+    ))
+)]
+#[macro_use]
+mod atomic_cas_macros {
+    #[macro_export]
+    macro_rules! cfg_has_atomic_cas {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+    #[macro_export]
+    macro_rules! cfg_no_atomic_cas {
+        ($($tt:tt)*) => {};
+    }
+}
+#[cfg_attr(
+    portable_atomic_no_cfg_target_has_atomic,
+    cfg(not(any(
+        not(portable_atomic_no_atomic_cas),
+        portable_atomic_unsafe_assume_single_core,
+        feature = "critical-section",
+        target_arch = "avr",
+        target_arch = "msp430",
+    )))
+)]
+#[cfg_attr(
+    not(portable_atomic_no_cfg_target_has_atomic),
+    cfg(not(any(
+        target_has_atomic = "ptr",
+        portable_atomic_unsafe_assume_single_core,
+        feature = "critical-section",
+        target_arch = "avr",
+        target_arch = "msp430",
+    )))
+)]
+#[macro_use]
+mod atomic_cas_macros {
+    #[macro_export]
+    macro_rules! cfg_has_atomic_cas {
+        ($($tt:tt)*) => {};
+    }
+    #[macro_export]
+    macro_rules! cfg_no_atomic_cas {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+}
+
+// Check that all cfg_ macros work.
+mod check {
+    crate::cfg_has_atomic_8! { type _Atomic8 = (); }
+    crate::cfg_no_atomic_8! { type _Atomic8 = (); }
+    crate::cfg_has_atomic_16! { type _Atomic16 = (); }
+    crate::cfg_no_atomic_16! { type _Atomic16 = (); }
+    crate::cfg_has_atomic_32! { type _Atomic32 = (); }
+    crate::cfg_no_atomic_32! { type _Atomic32 = (); }
+    crate::cfg_has_atomic_64! { type _Atomic64 = (); }
+    crate::cfg_no_atomic_64! { type _Atomic64 = (); }
+    crate::cfg_has_atomic_128! { type _Atomic128 = (); }
+    crate::cfg_no_atomic_128! { type _Atomic128 = (); }
+    crate::cfg_has_atomic_ptr! { type _AtomicPtr = (); }
+    crate::cfg_no_atomic_ptr! { type _AtomicPtr = (); }
+    crate::cfg_has_atomic_cas! { type __AtomicPtr = (); }
+    crate::cfg_no_atomic_cas! { type __AtomicPtr = (); }
+    #[allow(unused_imports)]
+    use {
+        _Atomic128 as _, _Atomic16 as _, _Atomic32 as _, _Atomic64 as _, _Atomic8 as _,
+        _AtomicPtr as _, __AtomicPtr as _,
+    };
+}
diff --git a/vendor/portable-atomic/src/gen/utils.rs b/vendor/portable-atomic/src/gen/utils.rs
new file mode 100644
index 0000000..61f6c99
--- /dev/null
+++ b/vendor/portable-atomic/src/gen/utils.rs
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+// This file is @generated by target_spec.sh.
+// It is not intended for manual editing.
+
+#![allow(unused_macros)]
+
+// On AArch64, the base register of load/store/atomic instructions must be 64-bit.
+// Passing a 32-bit value to `in(reg)` on AArch64 results in the upper bits
+// having an undefined value, but to work correctly with ILP32 ABI, the upper
+// bits must be zero, which is handled here by casting to u64. Another way to
+// handle this is to pass it as a pointer and clear the upper bits inside asm,
+// but it is easier to overlook than cast, which can catch overlooks by
+// asm_sub_register lint.
+// See also https://github.com/ARM-software/abi-aa/blob/2023Q1/aapcs64/aapcs64.rst#57pointers
+//
+// Except for x86_64, which can use 32-bit registers in the destination operand
+// (on x86_64, we use the ptr_modifier macro to handle this), we need to do the
+// same for ILP32 ABI on other 64-bit architectures. (At least, as far as I can
+// see from the assembly generated by LLVM, this is also required for MIPS64 N32
+// ABI. I don't know about the RISC-V s64ilp32 ABI for which a patch was
+// recently submitted to the kernel, but in any case, this should be a safe
+// default for such ABIs).
+//
+// Known architectures that have such ABI are x86_64 (X32), aarch64 (ILP32),
+// mips64 (N32), and riscv64 (s64ilp32, not merged yet though). (As of
+// 2023-06-05, only the former two are supported by rustc.) However, we list all
+// known 64-bit architectures because similar ABIs may exist or future added for
+// other architectures.
+#[cfg(all(
+    target_pointer_width = "32",
+    any(
+        target_arch = "aarch64",
+        target_arch = "bpf",
+        target_arch = "loongarch64",
+        target_arch = "mips64",
+        target_arch = "mips64r6",
+        target_arch = "nvptx64",
+        target_arch = "powerpc64",
+        target_arch = "riscv64",
+        target_arch = "s390x",
+        target_arch = "sparc64",
+        target_arch = "wasm64",
+        target_arch = "x86_64",
+    ),
+))]
+macro_rules! ptr_reg {
+    ($ptr:ident) => {{
+        let _: *const _ = $ptr; // ensure $ptr is a pointer (*mut _ or *const _)
+        #[cfg(not(portable_atomic_no_asm_maybe_uninit))]
+        #[allow(clippy::ptr_as_ptr)]
+        {
+            // If we cast to u64 here, the provenance will be lost,
+            // so we convert to MaybeUninit<u64> via zero extend helper.
+            crate::utils::zero_extend64_ptr($ptr as *mut ())
+        }
+        #[cfg(portable_atomic_no_asm_maybe_uninit)]
+        {
+            // Use cast on old rustc because it does not support MaybeUninit
+            // registers. This is still permissive-provenance compatible and
+            // is sound.
+            $ptr as u64
+        }
+    }};
+}
+#[cfg(not(all(
+    target_pointer_width = "32",
+    any(
+        target_arch = "aarch64",
+        target_arch = "bpf",
+        target_arch = "loongarch64",
+        target_arch = "mips64",
+        target_arch = "mips64r6",
+        target_arch = "nvptx64",
+        target_arch = "powerpc64",
+        target_arch = "riscv64",
+        target_arch = "s390x",
+        target_arch = "sparc64",
+        target_arch = "wasm64",
+        target_arch = "x86_64",
+    ),
+)))]
+macro_rules! ptr_reg {
+    ($ptr:ident) => {{
+        let _: *const _ = $ptr; // ensure $ptr is a pointer (*mut _ or *const _)
+        $ptr // cast is unnecessary here.
+    }};
+}
+
+// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
+// AArch64 ILP32 ABI, MIPS64 N32 ABI). On those targets, AtomicU64 is available
+// and fast, so use it to implement normal sequence lock.
+//
+// See ptr_reg macro for the reason why all known 64-bit architectures are listed.
+#[cfg(feature = "fallback")]
+#[cfg(any(
+    not(any(target_pointer_width = "16", target_pointer_width = "32")), // i.e., 64-bit or greater
+    target_arch = "aarch64",
+    target_arch = "bpf",
+    target_arch = "loongarch64",
+    target_arch = "mips64",
+    target_arch = "mips64r6",
+    target_arch = "nvptx64",
+    target_arch = "powerpc64",
+    target_arch = "riscv64",
+    target_arch = "s390x",
+    target_arch = "sparc64",
+    target_arch = "wasm64",
+    target_arch = "x86_64",
+))]
+#[macro_use]
+mod fast_atomic_64_macros {
+    macro_rules! cfg_has_fast_atomic_64 {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+    macro_rules! cfg_no_fast_atomic_64 {
+        ($($tt:tt)*) => {};
+    }
+}
+#[cfg(feature = "fallback")]
+#[cfg(not(any(
+    not(any(target_pointer_width = "16", target_pointer_width = "32")), // i.e., 64-bit or greater
+    target_arch = "aarch64",
+    target_arch = "bpf",
+    target_arch = "loongarch64",
+    target_arch = "mips64",
+    target_arch = "mips64r6",
+    target_arch = "nvptx64",
+    target_arch = "powerpc64",
+    target_arch = "riscv64",
+    target_arch = "s390x",
+    target_arch = "sparc64",
+    target_arch = "wasm64",
+    target_arch = "x86_64",
+)))]
+#[macro_use]
+mod fast_atomic_64_macros {
+    macro_rules! cfg_has_fast_atomic_64 {
+        ($($tt:tt)*) => {};
+    }
+    macro_rules! cfg_no_fast_atomic_64 {
+        ($($tt:tt)*) => {
+            $($tt)*
+        };
+    }
+}
diff --git a/vendor/portable-atomic/src/imp/arm_linux.rs b/vendor/portable-atomic/src/imp/arm_linux.rs
new file mode 100644
index 0000000..623a282
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/arm_linux.rs
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// 64-bit atomic implementation using kuser_cmpxchg64 on pre-v6 ARM Linux/Android.
+//
+// Refs:
+// - https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt
+// - https://github.com/rust-lang/compiler-builtins/blob/0.1.88/src/arm_linux.rs
+//
+// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
+// this module and use fallback implementation instead.
+
+// TODO: Since Rust 1.64, the Linux kernel requirement for Rust when using std is 3.2+, so it should
+// be possible to omit the dynamic kernel version check if the std feature is enabled on Rust 1.64+.
+// https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html
+
+#[path = "fallback/outline_atomics.rs"]
+mod fallback;
+
+use core::{arch::asm, cell::UnsafeCell, mem, sync::atomic::Ordering};
+
+use crate::utils::{Pair, U64};
+
+// https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt
+const KUSER_HELPER_VERSION: usize = 0xFFFF0FFC;
+// __kuser_helper_version >= 5 (kernel version 3.1+)
+const KUSER_CMPXCHG64: usize = 0xFFFF0F60;
+#[inline]
+fn __kuser_helper_version() -> i32 {
+    use core::sync::atomic::AtomicI32;
+
+    static CACHE: AtomicI32 = AtomicI32::new(0);
+    let mut v = CACHE.load(Ordering::Relaxed);
+    if v != 0 {
+        return v;
+    }
+    // SAFETY: core assumes that at least __kuser_memory_barrier (__kuser_helper_version >= 3) is
+    // available on this platform. __kuser_helper_version is always available on such a platform.
+    v = unsafe { (KUSER_HELPER_VERSION as *const i32).read() };
+    CACHE.store(v, Ordering::Relaxed);
+    v
+}
+#[inline]
+fn has_kuser_cmpxchg64() -> bool {
+    // Note: detect_false cfg is intended to make it easy for portable-atomic developers to
+    // test cases such as has_cmpxchg16b == false, has_lse == false,
+    // __kuser_helper_version < 5, etc., and is not a public API.
+    if cfg!(portable_atomic_test_outline_atomics_detect_false) {
+        return false;
+    }
+    __kuser_helper_version() >= 5
+}
+#[inline]
+unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut u64) -> bool {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let f: extern "C" fn(*const u64, *const u64, *mut u64) -> u32 =
+            mem::transmute(KUSER_CMPXCHG64 as *const ());
+        f(old_val, new_val, ptr) == 0
+    }
+}
+
+// 64-bit atomic load by two 32-bit atomic loads.
+#[inline]
+unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let (out_lo, out_hi);
+        asm!(
+            "ldr {out_lo}, [{src}]",
+            "ldr {out_hi}, [{src}, #4]",
+            src = in(reg) src,
+            out_lo = out(reg) out_lo,
+            out_hi = out(reg) out_hi,
+            options(pure, nostack, preserves_flags, readonly),
+        );
+        U64 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
+    }
+}
+
+#[inline(always)]
+unsafe fn atomic_update_kuser_cmpxchg64<F>(dst: *mut u64, mut f: F) -> u64
+where
+    F: FnMut(u64) -> u64,
+{
+    debug_assert!(dst as usize % 8 == 0);
+    debug_assert!(has_kuser_cmpxchg64());
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        loop {
+            // This is not single-copy atomic reads, but this is ok because subsequent
+            // CAS will check for consistency.
+            //
+            // Note that the C++20 memory model does not allow mixed-sized atomic access,
+            // so we must use inline assembly to implement byte_wise_atomic_load.
+            // (i.e., byte-wise atomic based on the standard library's atomic types
+            // cannot be used here).
+            let prev = byte_wise_atomic_load(dst);
+            let next = f(prev);
+            if __kuser_cmpxchg64(&prev, &next, dst) {
+                return prev;
+            }
+        }
+    }
+}
+
+macro_rules! atomic_with_ifunc {
+    (
+        unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)? { $($kuser_cmpxchg64_fn_body:tt)* }
+        fallback = $seqcst_fallback_fn:ident
+    ) => {
+        #[inline]
+        unsafe fn $name($($arg)*) $(-> $ret_ty)? {
+            unsafe fn kuser_cmpxchg64_fn($($arg)*) $(-> $ret_ty)? {
+                $($kuser_cmpxchg64_fn_body)*
+            }
+            // SAFETY: the caller must uphold the safety contract.
+            // we only calls __kuser_cmpxchg64 if it is available.
+            unsafe {
+                ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+                    if has_kuser_cmpxchg64() {
+                        kuser_cmpxchg64_fn
+                    } else {
+                        // Use SeqCst because __kuser_cmpxchg64 is always SeqCst.
+                        // https://github.com/torvalds/linux/blob/v6.1/arch/arm/kernel/entry-armv.S#L918-L925
+                        fallback::$seqcst_fallback_fn
+                    }
+                })
+            }
+        }
+    };
+}
+
+atomic_with_ifunc! {
+    unsafe fn atomic_load(src: *mut u64) -> u64 {
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe { atomic_update_kuser_cmpxchg64(src, |old| old) }
+    }
+    fallback = atomic_load_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_store(dst: *mut u64, val: u64) {
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe { atomic_update_kuser_cmpxchg64(dst, |_| val); }
+    }
+    fallback = atomic_store_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_swap(dst: *mut u64, val: u64) -> u64 {
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe { atomic_update_kuser_cmpxchg64(dst, |_| val) }
+    }
+    fallback = atomic_swap_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_compare_exchange(dst: *mut u64, old: u64, new: u64) -> (u64, bool) {
+        // SAFETY: the caller must uphold the safety contract.
+        let prev = unsafe {
+            atomic_update_kuser_cmpxchg64(dst, |v| if v == old { new } else { v })
+        };
+        (prev, prev == old)
+    }
+    fallback = atomic_compare_exchange_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_add(dst: *mut u64, val: u64) -> u64 {
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe { atomic_update_kuser_cmpxchg64(dst, |x| x.wrapping_add(val)) }
+    }
+    fallback = atomic_add_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_sub(dst: *mut u64, val: u64) -> u64 {
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe { atomic_update_kuser_cmpxchg64(dst, |x| x.wrapping_sub(val)) }
+    }
+    fallback = atomic_sub_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_and(dst: *mut u64, val: u64) -> u64 {
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe { atomic_update_kuser_cmpxchg64(dst, |x| x & val) }
+    }
+    fallback = atomic_and_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_nand(dst: *mut u64, val: u64) -> u64 {
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe { atomic_update_kuser_cmpxchg64(dst, |x| !(x & val)) }
+    }
+    fallback = atomic_nand_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_or(dst: *mut u64, val: u64) -> u64 {
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe { atomic_update_kuser_cmpxchg64(dst, |x| x | val) }
+    }
+    fallback = atomic_or_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_xor(dst: *mut u64, val: u64) -> u64 {
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe { atomic_update_kuser_cmpxchg64(dst, |x| x ^ val) }
+    }
+    fallback = atomic_xor_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_max(dst: *mut u64, val: u64) -> u64 {
+        #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe {
+            atomic_update_kuser_cmpxchg64(dst, |x| core::cmp::max(x as i64, val as i64) as u64)
+        }
+    }
+    fallback = atomic_max_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_umax(dst: *mut u64, val: u64) -> u64 {
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe { atomic_update_kuser_cmpxchg64(dst, |x| core::cmp::max(x, val)) }
+    }
+    fallback = atomic_umax_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_min(dst: *mut u64, val: u64) -> u64 {
+        #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe {
+            atomic_update_kuser_cmpxchg64(dst, |x| core::cmp::min(x as i64, val as i64) as u64)
+        }
+    }
+    fallback = atomic_min_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_umin(dst: *mut u64, val: u64) -> u64 {
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe { atomic_update_kuser_cmpxchg64(dst, |x| core::cmp::min(x, val)) }
+    }
+    fallback = atomic_umin_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_not(dst: *mut u64) -> u64 {
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe { atomic_update_kuser_cmpxchg64(dst, |x| !x) }
+    }
+    fallback = atomic_not_seqcst
+}
+atomic_with_ifunc! {
+    unsafe fn atomic_neg(dst: *mut u64) -> u64 {
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe { atomic_update_kuser_cmpxchg64(dst, u64::wrapping_neg) }
+    }
+    fallback = atomic_neg_seqcst
+}
+
+macro_rules! atomic64 {
+    ($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => {
+        #[repr(C, align(8))]
+        pub(crate) struct $atomic_type {
+            v: UnsafeCell<$int_type>,
+        }
+
+        // Send is implicitly implemented.
+        // SAFETY: any data races are prevented by the kernel user helper or the lock.
+        unsafe impl Sync for $atomic_type {}
+
+        impl_default_no_fetch_ops!($atomic_type, $int_type);
+        impl_default_bit_opts!($atomic_type, $int_type);
+        impl $atomic_type {
+            #[inline]
+            pub(crate) const fn new(v: $int_type) -> Self {
+                Self { v: UnsafeCell::new(v) }
+            }
+
+            #[inline]
+            pub(crate) fn is_lock_free() -> bool {
+                has_kuser_cmpxchg64()
+            }
+            #[inline]
+            pub(crate) const fn is_always_lock_free() -> bool {
+                false
+            }
+
+            #[inline]
+            pub(crate) fn get_mut(&mut self) -> &mut $int_type {
+                // SAFETY: the mutable reference guarantees unique ownership.
+                // (UnsafeCell::get_mut requires Rust 1.50)
+                unsafe { &mut *self.v.get() }
+            }
+
+            #[inline]
+            pub(crate) fn into_inner(self) -> $int_type {
+                self.v.into_inner()
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn load(&self, order: Ordering) -> $int_type {
+                crate::utils::assert_load_ordering(order);
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_load(self.v.get().cast::<u64>()) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn store(&self, val: $int_type, order: Ordering) {
+                crate::utils::assert_store_ordering(order);
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_store(self.v.get().cast::<u64>(), val as u64) }
+            }
+
+            #[inline]
+            pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_swap(self.v.get().cast::<u64>(), val as u64) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                crate::utils::assert_compare_exchange_ordering(success, failure);
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    let (prev, ok) = atomic_compare_exchange(
+                        self.v.get().cast::<u64>(),
+                        current as u64,
+                        new as u64,
+                    );
+                    if ok {
+                        Ok(prev as $int_type)
+                    } else {
+                        Err(prev as $int_type)
+                    }
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange_weak(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                self.compare_exchange(current, new, success, failure)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_add(self.v.get().cast::<u64>(), val as u64) as $int_type }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_sub(self.v.get().cast::<u64>(), val as u64) as $int_type }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_and(self.v.get().cast::<u64>(), val as u64) as $int_type }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_nand(self.v.get().cast::<u64>(), val as u64) as $int_type }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_or(self.v.get().cast::<u64>(), val as u64) as $int_type }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_xor(self.v.get().cast::<u64>(), val as u64) as $int_type }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe { $atomic_max(self.v.get().cast::<u64>(), val as u64) as $int_type }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe { $atomic_min(self.v.get().cast::<u64>(), val as u64) as $int_type }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_not(self.v.get().cast::<u64>()) as $int_type }
+            }
+            #[inline]
+            pub(crate) fn not(&self, order: Ordering) {
+                self.fetch_not(order);
+            }
+
+            #[inline]
+            pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by the kernel user helper or the lock
+                // and the raw pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_neg(self.v.get().cast::<u64>()) as $int_type }
+            }
+            #[inline]
+            pub(crate) fn neg(&self, order: Ordering) {
+                self.fetch_neg(order);
+            }
+
+            #[inline]
+            pub(crate) const fn as_ptr(&self) -> *mut $int_type {
+                self.v.get()
+            }
+        }
+    };
+}
+
+atomic64!(AtomicI64, i64, atomic_max, atomic_min);
+atomic64!(AtomicU64, u64, atomic_umax, atomic_umin);
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn kuser_helper_version() {
+        let version = __kuser_helper_version();
+        assert!(version >= 5, "{:?}", version);
+        assert_eq!(version, unsafe { (KUSER_HELPER_VERSION as *const i32).read() });
+    }
+
+    test_atomic_int!(i64);
+    test_atomic_int!(u64);
+
+    // load/store/swap implementation is not affected by signedness, so it is
+    // enough to test only unsigned types.
+    stress_test!(u64);
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/README.md b/vendor/portable-atomic/src/imp/atomic128/README.md
new file mode 100644
index 0000000..799a49b
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/README.md
@@ -0,0 +1,59 @@
+# Implementation of 128-bit atomics
+
+## 128-bit atomics instructions
+
+Here is the table of targets that support 128-bit atomics and the instructions used:
+
+| target_arch | load | store | CAS | RMW | note |
+| ----------- | ---- | ----- | --- | --- | ---- |
+| x86_64 | cmpxchg16b or vmovdqa | cmpxchg16b or vmovdqa | cmpxchg16b | cmpxchg16b | cmpxchg16b target feature required. vmovdqa requires Intel or AMD CPU with AVX. <br> Both compile-time and run-time detection are supported for cmpxchg16b. vmovdqa is currently run-time detection only. <br> Requires rustc 1.59+ when cmpxchg16b target feature is enabled at compile-time, otherwise requires rustc 1.69+ |
+| aarch64 | ldxp/stxp or casp or ldp/ldiapp | ldxp/stxp or casp or stp/stilp/swpp | ldxp/stxp or casp | ldxp/stxp or casp/swpp/ldclrp/ldsetp | casp requires lse target feature, ldp/stp requires lse2 target feature, ldiapp/stilp requires lse2 and rcpc3 target features, swpp/ldclrp/ldsetp requires lse128 target feature. <br> Both compile-time and run-time detection are supported for lse and lse2. Others are currently compile-time detection only. <br> Requires rustc 1.59+ |
+| powerpc64 | lq | stq | lqarx/stqcx. | lqarx/stqcx. | Requires target-cpu pwr8+ (powerpc64le is pwr8 by default). Both compile-time and run-time detection are supported (run-time detection is currently disabled by default). <br> Requires nightly |
+| s390x | lpq | stpq | cdsg | cdsg | Requires nightly |
+
+On compiler versions or platforms where these are not supported, the fallback implementation is used.
+
+See [aarch64.rs](aarch64.rs) module-level comments for more details on the instructions used on aarch64.
+
+## Comparison with core::intrinsics::atomic_\* (core::sync::atomic::Atomic{I,U}128)
+
+This directory has target-specific implementations with inline assembly ([aarch64.rs](aarch64.rs), [x86_64.rs](x86_64.rs), [powerpc64.rs](powerpc64.rs), [s390x.rs](s390x.rs)) and an implementation without inline assembly ([intrinsics.rs](intrinsics.rs)). The latter currently always needs nightly compilers and is only used for Miri and ThreadSanitizer, which do not support inline assembly.
+
+Implementations with inline assembly generate assemblies almost equivalent to the `core::intrinsics::atomic_*` (used in `core::sync::atomic::Atomic{I,U}128`) for many operations, but some operations may or may not generate more efficient code. For example:
+
+- On x86_64, implementation with inline assembly contains additional optimizations (e.g., [#16](https://github.com/taiki-e/portable-atomic/pull/16)) and is much faster for some operations.
+- On aarch64, implementation with inline assembly supports outline-atomics on more operating systems, and may be faster in environments where outline-atomics can improve performance.
+- On powerpc64 and s390x, LLVM does not support generating some 128-bit atomic operations (see [intrinsics.rs](intrinsics.rs) module-level comments), and we use CAS loop to implement them, so implementation with inline assembly may be faster for those operations.
+- In implementations without inline assembly, the compiler may reuse condition flags that have changed as a result of the operation, or use immediate values instead of registers, depending on the situation.
+
+As 128-bit atomics-related APIs stabilize in the standard library, implementations with inline assembly are planned to be updated to get the benefits of both.
+
+## Run-time feature detection
+
+[detect](detect) module has run-time feature detection implementations.
+
+Here is the table of targets that support run-time feature detection and the instruction or API used:
+
+| target_arch | target_os/target_env | instruction/API | features | note |
+| ----------- | -------------------- | --------------- | -------- | ---- |
+| x86_64      | all (except for sgx) | cpuid           | all      | Enabled by default |
+| aarch64     | linux                | getauxval       | all      | Only enabled by default on `*-linux-gnu*`, and `*-linux-musl*"` (default is static linking)/`*-linux-ohos*` (default is dynamic linking) with dynamic linking enabled. |
+| aarch64     | android              | getauxval       | all      | Enabled by default |
+| aarch64     | freebsd              | elf_aux_info    | lse, lse2 | Enabled by default |
+| aarch64     | netbsd               | sysctl          | all      | Enabled by default |
+| aarch64     | openbsd              | sysctl          | lse      | Enabled by default |
+| aarch64     | macos                | sysctl          | all      | Currently only used in tests because FEAT_LSE and FEAT_LSE2 are always available at compile-time. |
+| aarch64     | windows              | IsProcessorFeaturePresent | lse | Enabled by default |
+| aarch64     | fuchsia              | zx_system_get_features | lse | Enabled by default |
+| powerpc64   | linux                | getauxval       | all      | Disabled by default |
+| powerpc64   | freebsd              | elf_aux_info    | all      | Disabled by default |
+
+Run-time detection is enabled by default on most targets and can be disabled with `--cfg portable_atomic_no_outline_atomics`.
+
+On some targets, run-time detection is disabled by default mainly for compatibility with older versions of operating systems or incomplete build environments, and can be enabled by `--cfg portable_atomic_outline_atomics`. (When both cfg are enabled, `*_no_*` cfg is preferred.)
+
+For targets not included in the above table, run-time detection is always disabled and works the same as when `--cfg portable_atomic_no_outline_atomics` is set.
+
+See [detect/auxv.rs](detect/auxv.rs) module-level comments for more details on Linux/Android/FreeBSD.
+
+See also [docs on `portable_atomic_no_outline_atomics`](https://github.com/taiki-e/portable-atomic/blob/HEAD/README.md#optional-cfg-no-outline-atomics) in the top-level readme.
diff --git a/vendor/portable-atomic/src/imp/atomic128/aarch64.rs b/vendor/portable-atomic/src/imp/atomic128/aarch64.rs
new file mode 100644
index 0000000..32528a7
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/aarch64.rs
@@ -0,0 +1,1708 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic{I,U}128 implementation on AArch64.
+//
+// There are a few ways to implement 128-bit atomic operations in AArch64.
+//
+// - LDXP/STXP loop (DW LL/SC)
+// - CASP (DWCAS) added as FEAT_LSE (mandatory from armv8.1-a)
+// - LDP/STP (DW load/store) if FEAT_LSE2 (optional from armv8.2-a, mandatory from armv8.4-a) is available
+// - LDIAPP/STILP (DW acquire-load/release-store) added as FEAT_LRCPC3 (optional from armv8.9-a/armv9.4-a) (if FEAT_LSE2 is also available)
+// - LDCLRP/LDSETP/SWPP (DW RMW) added as FEAT_LSE128 (optional from armv9.4-a)
+//
+// If outline-atomics is not enabled and FEAT_LSE is not available at
+// compile-time, we use LDXP/STXP loop.
+// If outline-atomics is enabled and FEAT_LSE is not available at
+// compile-time, we use CASP for CAS if FEAT_LSE is available
+// at run-time, otherwise, use LDXP/STXP loop.
+// If FEAT_LSE is available at compile-time, we use CASP for load/store/CAS/RMW.
+// However, when portable_atomic_ll_sc_rmw cfg is set, use LDXP/STXP loop instead of CASP
+// loop for RMW (by default, it is set on Apple hardware; see build script for details).
+// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store.
+// If FEAT_LSE128 is available at compile-time, we use LDCLRP/LDSETP/SWPP for fetch_and/fetch_or/swap/{release,seqcst}-store.
+// If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store.
+//
+// Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2.
+//
+// Note that we do not separate LL and SC into separate functions, but handle
+// them within a single asm block. This is because it is theoretically possible
+// for the compiler to insert operations that might clear the reservation between
+// LL and SC. Considering the type of operations we are providing and the fact
+// that [progress64](https://github.com/ARM-software/progress64) uses such code,
+// this is probably not a problem for aarch64, but it seems that aarch64 doesn't
+// guarantee it and hexagon is the only architecture with hardware guarantees
+// that such code works. See also:
+//
+// - https://yarchive.net/comp/linux/cmpxchg_ll_sc_portability.html
+// - https://lists.llvm.org/pipermail/llvm-dev/2016-May/099490.html
+// - https://lists.llvm.org/pipermail/llvm-dev/2018-June/123993.html
+//
+// Also, even when using a CAS loop to implement atomic RMW, include the loop itself
+// in the asm block because it is more efficient for some codegen backends.
+// https://github.com/rust-lang/compiler-builtins/issues/339#issuecomment-1191260474
+//
+// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
+// this module and use intrinsics.rs instead.
+//
+// Refs:
+// - ARM Compiler armasm User Guide
+//   https://developer.arm.com/documentation/dui0801/latest
+// - Arm A-profile A64 Instruction Set Architecture
+//   https://developer.arm.com/documentation/ddi0602/latest
+// - Arm Architecture Reference Manual for A-profile architecture
+//   https://developer.arm.com/documentation/ddi0487/latest
+// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
+//
+// Generated asm:
+// - aarch64 https://godbolt.org/z/5Mz1E33vz
+// - aarch64 msvc https://godbolt.org/z/P53d1MsGY
+// - aarch64 (+lse) https://godbolt.org/z/qvaE8n79K
+// - aarch64 msvc (+lse) https://godbolt.org/z/dj4aYerfr
+// - aarch64 (+lse,+lse2) https://godbolt.org/z/1E15jjxah
+// - aarch64 (+lse,+lse2,+rcpc3) https://godbolt.org/z/YreM4n84o
+// - aarch64 (+lse2,+lse128) https://godbolt.org/z/Kfeqs54ox
+// - aarch64 (+lse2,+lse128,+rcpc3) https://godbolt.org/z/n6zhjE77s
+
+include!("macros.rs");
+
+// On musl with static linking, it seems that getauxval is not always available.
+// See detect/auxv.rs for more.
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(
+    test,
+    not(all(
+        any(target_feature = "lse2", portable_atomic_target_feature = "lse2"),
+        any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+    )),
+))]
+#[cfg(any(
+    all(
+        target_os = "linux",
+        any(
+            target_env = "gnu",
+            all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")),
+            portable_atomic_outline_atomics,
+        ),
+    ),
+    target_os = "android",
+    target_os = "freebsd",
+))]
+#[path = "detect/auxv.rs"]
+mod detect;
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg_attr(
+    target_os = "netbsd",
+    cfg(any(
+        test,
+        not(all(
+            any(target_feature = "lse2", portable_atomic_target_feature = "lse2"),
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+        )),
+    ))
+)]
+#[cfg_attr(
+    target_os = "openbsd",
+    cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))
+)]
+#[cfg(any(target_os = "netbsd", target_os = "openbsd"))]
+#[path = "detect/aarch64_aa64reg.rs"]
+mod detect;
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
+#[cfg(target_os = "fuchsia")]
+#[path = "detect/aarch64_fuchsia.rs"]
+mod detect;
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
+#[cfg(target_os = "windows")]
+#[path = "detect/aarch64_windows.rs"]
+mod detect;
+
+// test only
+#[cfg(test)]
+#[cfg(not(qemu))]
+#[cfg(not(valgrind))]
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(target_os = "linux", target_os = "android", target_os = "freebsd"))]
+#[path = "detect/aarch64_aa64reg.rs"]
+mod detect_aa64reg;
+#[cfg(test)]
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(target_os = "macos")]
+#[path = "detect/aarch64_macos.rs"]
+mod detect_macos;
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+use core::sync::atomic::Ordering;
+
+use crate::utils::{Pair, U128};
+
+#[cfg(any(
+    target_feature = "lse",
+    portable_atomic_target_feature = "lse",
+    not(portable_atomic_no_outline_atomics),
+))]
+macro_rules! debug_assert_lse {
+    () => {
+        #[cfg(all(
+            not(portable_atomic_no_outline_atomics),
+            any(
+                all(
+                    target_os = "linux",
+                    any(
+                        target_env = "gnu",
+                        all(
+                            any(target_env = "musl", target_env = "ohos"),
+                            not(target_feature = "crt-static"),
+                        ),
+                        portable_atomic_outline_atomics,
+                    ),
+                ),
+                target_os = "android",
+                target_os = "freebsd",
+                target_os = "netbsd",
+                target_os = "openbsd",
+                target_os = "fuchsia",
+                target_os = "windows",
+            ),
+        ))]
+        #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
+        {
+            debug_assert!(detect::detect().has_lse());
+        }
+    };
+}
+#[rustfmt::skip]
+#[cfg(any(
+    target_feature = "lse2",
+    portable_atomic_target_feature = "lse2",
+    not(portable_atomic_no_outline_atomics),
+))]
+macro_rules! debug_assert_lse2 {
+    () => {
+        #[cfg(all(
+            not(portable_atomic_no_outline_atomics),
+            any(
+                all(
+                    target_os = "linux",
+                    any(
+                        target_env = "gnu",
+                        all(
+                            any(target_env = "musl", target_env = "ohos"),
+                            not(target_feature = "crt-static"),
+                        ),
+                        portable_atomic_outline_atomics,
+                    ),
+                ),
+                target_os = "android",
+                target_os = "freebsd",
+                target_os = "netbsd",
+                // These don't support detection of FEAT_LSE2.
+                // target_os = "openbsd",
+                // target_os = "fuchsia",
+                // target_os = "windows",
+            ),
+        ))]
+        #[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))]
+        {
+            debug_assert!(detect::detect().has_lse2());
+        }
+    };
+}
+
+// Refs: https://developer.arm.com/documentation/100067/0612/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en
+//
+// This is similar to #[target_feature(enable = "lse")], except that there are
+// no compiler guarantees regarding (un)inlining, and the scope is within an asm
+// block rather than a function. We use this directive to support outline-atomics
+// on pre-1.61 rustc (aarch64_target_feature stabilized in Rust 1.61).
+//
+// The .arch_extension directive is effective until the end of the assembly block and
+// is not propagated to subsequent code, so the end_lse macro is unneeded.
+// https://godbolt.org/z/4oMEW8vWc
+// https://github.com/torvalds/linux/commit/e0d5896bd356cd577f9710a02d7a474cdf58426b
+// https://github.com/torvalds/linux/commit/dd1f6308b28edf0452dd5dc7877992903ec61e69
+// (It seems GCC effectively ignores this directive and always allow FEAT_LSE instructions: https://godbolt.org/z/W9W6rensG)
+//
+// The .arch directive has a similar effect, but we don't use it due to the following issue:
+// https://github.com/torvalds/linux/commit/dd1f6308b28edf0452dd5dc7877992903ec61e69
+//
+// This is also needed for compatibility with rustc_codegen_cranelift:
+// https://github.com/rust-lang/rustc_codegen_cranelift/issues/1400#issuecomment-1774599775
+//
+// Note: If FEAT_LSE is not available at compile-time, we must guarantee that
+// the function that uses it is not inlined into a function where it is not
+// clear whether FEAT_LSE is available. Otherwise, (even if we checked whether
+// FEAT_LSE is available at run-time) optimizations that reorder its
+// instructions across the if condition might introduce undefined behavior.
+// (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts)
+// However, our code uses the ifunc helper macro that works with function pointers,
+// so we don't have to worry about this unless calling without helper macro.
+#[cfg(any(
+    target_feature = "lse",
+    portable_atomic_target_feature = "lse",
+    not(portable_atomic_no_outline_atomics),
+))]
+macro_rules! start_lse {
+    () => {
+        ".arch_extension lse"
+    };
+}
+
+#[cfg(target_endian = "little")]
+macro_rules! select_le_or_be {
+    ($le:expr, $be:expr) => {
+        $le
+    };
+}
+#[cfg(target_endian = "big")]
+macro_rules! select_le_or_be {
+    ($le:expr, $be:expr) => {
+        $be
+    };
+}
+
+macro_rules! atomic_rmw {
+    ($op:ident, $order:ident) => {
+        atomic_rmw!($op, $order, write = $order)
+    };
+    ($op:ident, $order:ident, write = $write:ident) => {
+        match $order {
+            Ordering::Relaxed => $op!("", "", ""),
+            Ordering::Acquire => $op!("a", "", ""),
+            Ordering::Release => $op!("", "l", ""),
+            Ordering::AcqRel => $op!("a", "l", ""),
+            // In MSVC environments, SeqCst stores/writes needs fences after writes.
+            // https://reviews.llvm.org/D141748
+            #[cfg(target_env = "msvc")]
+            Ordering::SeqCst if $write == Ordering::SeqCst => $op!("a", "l", "dmb ish"),
+            // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
+            Ordering::SeqCst => $op!("a", "l", ""),
+            _ => unreachable!("{:?}", $order),
+        }
+    };
+}
+
+// cfg guarantee that the CPU supports FEAT_LSE2.
+#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))]
+use _atomic_load_ldp as atomic_load;
+#[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))]
+#[inline]
+unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
+    #[inline]
+    unsafe fn atomic_load_no_lse2(src: *mut u128, order: Ordering) -> u128 {
+        #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+        // SAFETY: the caller must uphold the safety contract.
+        // cfg guarantee that the CPU supports FEAT_LSE.
+        unsafe {
+            _atomic_load_casp(src, order)
+        }
+        #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe {
+            _atomic_load_ldxp_stxp(src, order)
+        }
+    }
+    #[cfg(not(all(
+        not(portable_atomic_no_outline_atomics),
+        any(
+            all(
+                target_os = "linux",
+                any(
+                    target_env = "gnu",
+                    all(
+                        any(target_env = "musl", target_env = "ohos"),
+                        not(target_feature = "crt-static"),
+                    ),
+                    portable_atomic_outline_atomics,
+                ),
+            ),
+            target_os = "android",
+            target_os = "freebsd",
+            target_os = "netbsd",
+            // These don't support detection of FEAT_LSE2.
+            // target_os = "openbsd",
+            // target_os = "fuchsia",
+            // target_os = "windows",
+        ),
+    )))]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        atomic_load_no_lse2(src, order)
+    }
+    #[cfg(all(
+        not(portable_atomic_no_outline_atomics),
+        any(
+            all(
+                target_os = "linux",
+                any(
+                    target_env = "gnu",
+                    all(
+                        any(target_env = "musl", target_env = "ohos"),
+                        not(target_feature = "crt-static"),
+                    ),
+                    portable_atomic_outline_atomics,
+                ),
+            ),
+            target_os = "android",
+            target_os = "freebsd",
+            target_os = "netbsd",
+            // These don't support detection of FEAT_LSE2.
+            // target_os = "openbsd",
+            // target_os = "fuchsia",
+            // target_os = "windows",
+        ),
+    ))]
+    {
+        fn_alias! {
+            // inline(never) is just a hint and also not strictly necessary
+            // because we use ifunc helper macro, but used for clarity.
+            #[inline(never)]
+            unsafe fn(src: *mut u128) -> u128;
+            atomic_load_lse2_relaxed = _atomic_load_ldp(Ordering::Relaxed);
+            atomic_load_lse2_acquire = _atomic_load_ldp(Ordering::Acquire);
+            atomic_load_lse2_seqcst = _atomic_load_ldp(Ordering::SeqCst);
+        }
+        fn_alias! {
+            unsafe fn(src: *mut u128) -> u128;
+            atomic_load_no_lse2_relaxed = atomic_load_no_lse2(Ordering::Relaxed);
+            atomic_load_no_lse2_acquire = atomic_load_no_lse2(Ordering::Acquire);
+            atomic_load_no_lse2_seqcst = atomic_load_no_lse2(Ordering::SeqCst);
+        }
+        // SAFETY: the caller must uphold the safety contract.
+        // and we've checked if FEAT_LSE2 is available.
+        unsafe {
+            match order {
+                Ordering::Relaxed => {
+                    ifunc!(unsafe fn(src: *mut u128) -> u128 {
+                        let cpuinfo = detect::detect();
+                        if cpuinfo.has_lse2() {
+                            atomic_load_lse2_relaxed
+                        } else {
+                            atomic_load_no_lse2_relaxed
+                        }
+                    })
+                }
+                Ordering::Acquire => {
+                    ifunc!(unsafe fn(src: *mut u128) -> u128 {
+                        let cpuinfo = detect::detect();
+                        if cpuinfo.has_lse2() {
+                            atomic_load_lse2_acquire
+                        } else {
+                            atomic_load_no_lse2_acquire
+                        }
+                    })
+                }
+                Ordering::SeqCst => {
+                    ifunc!(unsafe fn(src: *mut u128) -> u128 {
+                        let cpuinfo = detect::detect();
+                        if cpuinfo.has_lse2() {
+                            atomic_load_lse2_seqcst
+                        } else {
+                            atomic_load_no_lse2_seqcst
+                        }
+                    })
+                }
+                _ => unreachable!("{:?}", order),
+            }
+        }
+    }
+}
+// If CPU supports FEAT_LSE2, LDP/LDIAPP is single-copy atomic reads,
+// otherwise it is two single-copy atomic reads.
+// Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile
+#[cfg(any(
+    target_feature = "lse2",
+    portable_atomic_target_feature = "lse2",
+    not(portable_atomic_no_outline_atomics),
+))]
+#[inline]
+unsafe fn _atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+    debug_assert_lse2!();
+
+    // SAFETY: the caller must guarantee that `dst` is valid for reads,
+    // 16-byte aligned, that there are no concurrent non-atomic operations.
+    //
+    // Refs:
+    // - LDP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDP--A64-
+    unsafe {
+        let (out_lo, out_hi);
+        macro_rules! atomic_load_relaxed {
+            ($acquire:tt $(, $readonly:tt)?) => {
+                asm!(
+                    "ldp {out_lo}, {out_hi}, [{src}]",
+                    $acquire,
+                    src = in(reg) ptr_reg!(src),
+                    out_hi = lateout(reg) out_hi,
+                    out_lo = lateout(reg) out_lo,
+                    options(nostack, preserves_flags $(, $readonly)?),
+                )
+            };
+        }
+        match order {
+            Ordering::Relaxed => atomic_load_relaxed!("", readonly),
+            #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))]
+            Ordering::Acquire => {
+                // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
+                // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-Pair-of-registers-
+                asm!(
+                    "ldiapp {out_lo}, {out_hi}, [{src}]",
+                    src = in(reg) ptr_reg!(src),
+                    out_hi = lateout(reg) out_hi,
+                    out_lo = lateout(reg) out_lo,
+                    options(nostack, preserves_flags),
+                );
+            }
+            #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
+            Ordering::Acquire => atomic_load_relaxed!("dmb ishld"),
+            Ordering::SeqCst => {
+                asm!(
+                    // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp.
+                    // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891 for details.
+                    "ldar {tmp}, [{src}]",
+                    "ldp {out_lo}, {out_hi}, [{src}]",
+                    "dmb ishld",
+                    src = in(reg) ptr_reg!(src),
+                    out_hi = lateout(reg) out_hi,
+                    out_lo = lateout(reg) out_lo,
+                    tmp = out(reg) _,
+                    options(nostack, preserves_flags),
+                );
+            }
+            _ => unreachable!("{:?}", order),
+        }
+        U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
+    }
+}
+// Do not use _atomic_compare_exchange_casp because it needs extra MOV to implement load.
+#[cfg(any(test, not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))))]
+#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+#[inline]
+unsafe fn _atomic_load_casp(src: *mut u128, order: Ordering) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+    debug_assert_lse!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantee that the CPU supports FEAT_LSE.
+    unsafe {
+        let (out_lo, out_hi);
+        macro_rules! atomic_load {
+            ($acquire:tt, $release:tt) => {
+                asm!(
+                    start_lse!(),
+                    concat!("casp", $acquire, $release, " x2, x3, x2, x3, [{src}]"),
+                    src = in(reg) ptr_reg!(src),
+                    // must be allocated to even/odd register pair
+                    inout("x2") 0_u64 => out_lo,
+                    inout("x3") 0_u64 => out_hi,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        match order {
+            Ordering::Relaxed => atomic_load!("", ""),
+            Ordering::Acquire => atomic_load!("a", ""),
+            Ordering::SeqCst => atomic_load!("a", "l"),
+            _ => unreachable!("{:?}", order),
+        }
+        U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
+    }
+}
+#[cfg(any(
+    test,
+    all(
+        not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")),
+        not(any(target_feature = "lse", portable_atomic_target_feature = "lse")),
+    ),
+))]
+#[inline]
+unsafe fn _atomic_load_ldxp_stxp(src: *mut u128, order: Ordering) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let (mut out_lo, mut out_hi);
+        macro_rules! atomic_load {
+            ($acquire:tt, $release:tt) => {
+                asm!(
+                    "2:",
+                        concat!("ld", $acquire, "xp {out_lo}, {out_hi}, [{src}]"),
+                        concat!("st", $release, "xp {r:w}, {out_lo}, {out_hi}, [{src}]"),
+                        // 0 if the store was successful, 1 if no store was performed
+                        "cbnz {r:w}, 2b",
+                    src = in(reg) ptr_reg!(src),
+                    out_lo = out(reg) out_lo,
+                    out_hi = out(reg) out_hi,
+                    r = out(reg) _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        match order {
+            Ordering::Relaxed => atomic_load!("", ""),
+            Ordering::Acquire => atomic_load!("a", ""),
+            Ordering::SeqCst => atomic_load!("a", "l"),
+            _ => unreachable!("{:?}", order),
+        }
+        U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
+    }
+}
+
+// cfg guarantee that the CPU supports FEAT_LSE2.
+#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))]
+use _atomic_store_stp as atomic_store;
+#[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))]
+#[inline]
+unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
+    #[inline]
+    unsafe fn atomic_store_no_lse2(dst: *mut u128, val: u128, order: Ordering) {
+        // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
+        // we use CAS-based atomic RMW.
+        #[cfg(all(
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+            not(portable_atomic_ll_sc_rmw),
+        ))]
+        // SAFETY: the caller must uphold the safety contract.
+        // cfg guarantee that the CPU supports FEAT_LSE.
+        unsafe {
+            _atomic_swap_casp(dst, val, order);
+        }
+        #[cfg(not(all(
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+            not(portable_atomic_ll_sc_rmw),
+        )))]
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe {
+            _atomic_store_ldxp_stxp(dst, val, order);
+        }
+    }
+    #[cfg(not(all(
+        not(portable_atomic_no_outline_atomics),
+        any(
+            all(
+                target_os = "linux",
+                any(
+                    target_env = "gnu",
+                    all(
+                        any(target_env = "musl", target_env = "ohos"),
+                        not(target_feature = "crt-static"),
+                    ),
+                    portable_atomic_outline_atomics,
+                ),
+            ),
+            target_os = "android",
+            target_os = "freebsd",
+            target_os = "netbsd",
+            // These don't support detection of FEAT_LSE2.
+            // target_os = "openbsd",
+            // target_os = "fuchsia",
+            // target_os = "windows",
+        ),
+    )))]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        atomic_store_no_lse2(dst, val, order);
+    }
+    #[cfg(all(
+        not(portable_atomic_no_outline_atomics),
+        any(
+            all(
+                target_os = "linux",
+                any(
+                    target_env = "gnu",
+                    all(
+                        any(target_env = "musl", target_env = "ohos"),
+                        not(target_feature = "crt-static"),
+                    ),
+                    portable_atomic_outline_atomics,
+                ),
+            ),
+            target_os = "android",
+            target_os = "freebsd",
+            target_os = "netbsd",
+            // These don't support detection of FEAT_LSE2.
+            // target_os = "openbsd",
+            // target_os = "fuchsia",
+            // target_os = "windows",
+        ),
+    ))]
+    {
+        fn_alias! {
+            // inline(never) is just a hint and also not strictly necessary
+            // because we use ifunc helper macro, but used for clarity.
+            #[inline(never)]
+            unsafe fn(dst: *mut u128, val: u128);
+            atomic_store_lse2_relaxed = _atomic_store_stp(Ordering::Relaxed);
+            atomic_store_lse2_release = _atomic_store_stp(Ordering::Release);
+            atomic_store_lse2_seqcst = _atomic_store_stp(Ordering::SeqCst);
+        }
+        fn_alias! {
+            unsafe fn(dst: *mut u128, val: u128);
+            atomic_store_no_lse2_relaxed = atomic_store_no_lse2(Ordering::Relaxed);
+            atomic_store_no_lse2_release = atomic_store_no_lse2(Ordering::Release);
+            atomic_store_no_lse2_seqcst = atomic_store_no_lse2(Ordering::SeqCst);
+        }
+        // SAFETY: the caller must uphold the safety contract.
+        // and we've checked if FEAT_LSE2 is available.
+        unsafe {
+            match order {
+                Ordering::Relaxed => {
+                    ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                        let cpuinfo = detect::detect();
+                        if cpuinfo.has_lse2() {
+                            atomic_store_lse2_relaxed
+                        } else {
+                            atomic_store_no_lse2_relaxed
+                        }
+                    });
+                }
+                Ordering::Release => {
+                    ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                        let cpuinfo = detect::detect();
+                        if cpuinfo.has_lse2() {
+                            atomic_store_lse2_release
+                        } else {
+                            atomic_store_no_lse2_release
+                        }
+                    });
+                }
+                Ordering::SeqCst => {
+                    ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                        let cpuinfo = detect::detect();
+                        if cpuinfo.has_lse2() {
+                            atomic_store_lse2_seqcst
+                        } else {
+                            atomic_store_no_lse2_seqcst
+                        }
+                    });
+                }
+                _ => unreachable!("{:?}", order),
+            }
+        }
+    }
+}
+// If CPU supports FEAT_LSE2, STP/STILP is single-copy atomic writes,
+// otherwise it is two single-copy atomic writes.
+// Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile
+#[cfg(any(
+    target_feature = "lse2",
+    portable_atomic_target_feature = "lse2",
+    not(portable_atomic_no_outline_atomics),
+))]
+#[inline]
+unsafe fn _atomic_store_stp(dst: *mut u128, val: u128, order: Ordering) {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_lse2!();
+
+    // SAFETY: the caller must guarantee that `dst` is valid for writes,
+    // 16-byte aligned, that there are no concurrent non-atomic operations.
+    //
+    // Refs:
+    // - STP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STP--A64-
+    unsafe {
+        #[rustfmt::skip]
+        macro_rules! atomic_store {
+            ($acquire:tt, $release:tt) => {{
+                let val = U128 { whole: val };
+                asm!(
+                    $release,
+                    "stp {val_lo}, {val_hi}, [{dst}]",
+                    $acquire,
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = in(reg) val.pair.lo,
+                    val_hi = in(reg) val.pair.hi,
+                    options(nostack, preserves_flags),
+                );
+            }};
+        }
+        match order {
+            Ordering::Relaxed => atomic_store!("", ""),
+            #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))]
+            Ordering::Release => {
+                let val = U128 { whole: val };
+                // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
+                // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/STILP--Store-Release-ordered-Pair-of-registers-
+                asm!(
+                    "stilp {val_lo}, {val_hi}, [{dst}]",
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = in(reg) val.pair.lo,
+                    val_hi = in(reg) val.pair.hi,
+                    options(nostack, preserves_flags),
+                );
+            }
+            #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
+            #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+            Ordering::Release => {
+                // Use swpp if stp requires fences.
+                // https://reviews.llvm.org/D143506
+                // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
+                _atomic_swap_swpp(dst, val, order);
+            }
+            #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
+            #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+            Ordering::Release => atomic_store!("", "dmb ish"),
+            #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+            Ordering::SeqCst => {
+                // Use swpp if stp requires fences.
+                // https://reviews.llvm.org/D143506
+                // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
+                _atomic_swap_swpp(dst, val, order);
+            }
+            #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+            Ordering::SeqCst => atomic_store!("dmb ish", "dmb ish"),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+// Do not use _atomic_swap_ldxp_stxp because it needs extra registers to implement store.
+#[cfg(any(
+    test,
+    not(all(
+        any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+        not(portable_atomic_ll_sc_rmw),
+    ))
+))]
+#[inline]
+unsafe fn _atomic_store_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let val = U128 { whole: val };
+        macro_rules! store {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    "2:",
+                        concat!("ld", $acquire, "xp xzr, {tmp}, [{dst}]"),
+                        concat!("st", $release, "xp {tmp:w}, {val_lo}, {val_hi}, [{dst}]"),
+                        // 0 if the store was successful, 1 if no store was performed
+                        "cbnz {tmp:w}, 2b",
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = in(reg) val.pair.lo,
+                    val_hi = in(reg) val.pair.hi,
+                    tmp = out(reg) _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(store, order);
+    }
+}
+
+#[inline]
+unsafe fn atomic_compare_exchange(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> Result<u128, u128> {
+    #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantee that the CPU supports FEAT_LSE.
+    let prev = unsafe { _atomic_compare_exchange_casp(dst, old, new, success, failure) };
+    #[cfg(not(all(
+        not(portable_atomic_no_outline_atomics),
+        any(
+            all(
+                target_os = "linux",
+                any(
+                    target_env = "gnu",
+                    all(
+                        any(target_env = "musl", target_env = "ohos"),
+                        not(target_feature = "crt-static"),
+                    ),
+                    portable_atomic_outline_atomics,
+                ),
+            ),
+            target_os = "android",
+            target_os = "freebsd",
+            target_os = "netbsd",
+            target_os = "openbsd",
+            target_os = "fuchsia",
+            target_os = "windows",
+        ),
+    )))]
+    #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
+    // SAFETY: the caller must uphold the safety contract.
+    let prev = unsafe { _atomic_compare_exchange_ldxp_stxp(dst, old, new, success, failure) };
+    #[cfg(all(
+        not(portable_atomic_no_outline_atomics),
+        any(
+            all(
+                target_os = "linux",
+                any(
+                    target_env = "gnu",
+                    all(
+                        any(target_env = "musl", target_env = "ohos"),
+                        not(target_feature = "crt-static"),
+                    ),
+                    portable_atomic_outline_atomics,
+                ),
+            ),
+            target_os = "android",
+            target_os = "freebsd",
+            target_os = "netbsd",
+            target_os = "openbsd",
+            target_os = "fuchsia",
+            target_os = "windows",
+        ),
+    ))]
+    #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
+    let prev = {
+        fn_alias! {
+            // inline(never) is just a hint and also not strictly necessary
+            // because we use ifunc helper macro, but used for clarity.
+            #[inline(never)]
+            unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128;
+            atomic_compare_exchange_casp_relaxed
+                = _atomic_compare_exchange_casp(Ordering::Relaxed, Ordering::Relaxed);
+            atomic_compare_exchange_casp_acquire
+                = _atomic_compare_exchange_casp(Ordering::Acquire, Ordering::Acquire);
+            atomic_compare_exchange_casp_release
+                = _atomic_compare_exchange_casp(Ordering::Release, Ordering::Relaxed);
+            atomic_compare_exchange_casp_acqrel
+                = _atomic_compare_exchange_casp(Ordering::AcqRel, Ordering::Acquire);
+            // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
+            #[cfg(target_env = "msvc")]
+            atomic_compare_exchange_casp_seqcst
+                = _atomic_compare_exchange_casp(Ordering::SeqCst, Ordering::SeqCst);
+        }
+        fn_alias! {
+            unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128;
+            atomic_compare_exchange_ldxp_stxp_relaxed
+                = _atomic_compare_exchange_ldxp_stxp(Ordering::Relaxed, Ordering::Relaxed);
+            atomic_compare_exchange_ldxp_stxp_acquire
+                = _atomic_compare_exchange_ldxp_stxp(Ordering::Acquire, Ordering::Acquire);
+            atomic_compare_exchange_ldxp_stxp_release
+                = _atomic_compare_exchange_ldxp_stxp(Ordering::Release, Ordering::Relaxed);
+            atomic_compare_exchange_ldxp_stxp_acqrel
+                = _atomic_compare_exchange_ldxp_stxp(Ordering::AcqRel, Ordering::Acquire);
+            // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
+            #[cfg(target_env = "msvc")]
+            atomic_compare_exchange_ldxp_stxp_seqcst
+                = _atomic_compare_exchange_ldxp_stxp(Ordering::SeqCst, Ordering::SeqCst);
+        }
+        // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+        // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+        // and we've checked if FEAT_LSE is available.
+        unsafe {
+            let success = crate::utils::upgrade_success_ordering(success, failure);
+            match success {
+                Ordering::Relaxed => {
+                    ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+                        if detect::detect().has_lse() {
+                            atomic_compare_exchange_casp_relaxed
+                        } else {
+                            atomic_compare_exchange_ldxp_stxp_relaxed
+                        }
+                    })
+                }
+                Ordering::Acquire => {
+                    ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+                        if detect::detect().has_lse() {
+                            atomic_compare_exchange_casp_acquire
+                        } else {
+                            atomic_compare_exchange_ldxp_stxp_acquire
+                        }
+                    })
+                }
+                Ordering::Release => {
+                    ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+                        if detect::detect().has_lse() {
+                            atomic_compare_exchange_casp_release
+                        } else {
+                            atomic_compare_exchange_ldxp_stxp_release
+                        }
+                    })
+                }
+                // AcqRel and SeqCst RMWs are equivalent in both implementations in non-MSVC environments.
+                #[cfg(not(target_env = "msvc"))]
+                Ordering::AcqRel | Ordering::SeqCst => {
+                    ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+                        if detect::detect().has_lse() {
+                            atomic_compare_exchange_casp_acqrel
+                        } else {
+                            atomic_compare_exchange_ldxp_stxp_acqrel
+                        }
+                    })
+                }
+                #[cfg(target_env = "msvc")]
+                Ordering::AcqRel => {
+                    ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+                        if detect::detect().has_lse() {
+                            atomic_compare_exchange_casp_acqrel
+                        } else {
+                            atomic_compare_exchange_ldxp_stxp_acqrel
+                        }
+                    })
+                }
+                #[cfg(target_env = "msvc")]
+                Ordering::SeqCst => {
+                    ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+                        if detect::detect().has_lse() {
+                            atomic_compare_exchange_casp_seqcst
+                        } else {
+                            atomic_compare_exchange_ldxp_stxp_seqcst
+                        }
+                    })
+                }
+                _ => unreachable!("{:?}", success),
+            }
+        }
+    };
+    if prev == old {
+        Ok(prev)
+    } else {
+        Err(prev)
+    }
+}
+#[cfg(any(
+    target_feature = "lse",
+    portable_atomic_target_feature = "lse",
+    not(portable_atomic_no_outline_atomics),
+))]
+#[inline]
+unsafe fn _atomic_compare_exchange_casp(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_lse!();
+    let order = crate::utils::upgrade_success_ordering(success, failure);
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+    // and the CPU supports FEAT_LSE.
+    //
+    // Refs:
+    // - https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/CASPA--CASPAL--CASP--CASPL--CASPAL--CASP--CASPL--A64-
+    // - https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-Swap-Pair-of-words-or-doublewords-in-memory-
+    unsafe {
+        let old = U128 { whole: old };
+        let new = U128 { whole: new };
+        let (prev_lo, prev_hi);
+        macro_rules! cmpxchg {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    start_lse!(),
+                    concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"),
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    // must be allocated to even/odd register pair
+                    inout("x6") old.pair.lo => prev_lo,
+                    inout("x7") old.pair.hi => prev_hi,
+                    // must be allocated to even/odd register pair
+                    in("x4") new.pair.lo,
+                    in("x5") new.pair.hi,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(cmpxchg, order, write = success);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
+#[inline]
+unsafe fn _atomic_compare_exchange_ldxp_stxp(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+    let order = crate::utils::upgrade_success_ordering(success, failure);
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
+    //
+    // Refs:
+    // - LDXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDXP--A64-
+    // - LDAXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDAXP--A64-
+    // - STXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STXP--A64-
+    // - STLXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STLXP--A64-
+    //
+    // Note: Load-Exclusive pair (by itself) does not guarantee atomicity; to complete an atomic
+    // operation (even load/store), a corresponding Store-Exclusive pair must succeed.
+    // See Arm Architecture Reference Manual for A-profile architecture
+    // Section B2.2.1 "Requirements for single-copy atomicity", and
+    // Section B2.9 "Synchronization and semaphores" for more.
+    unsafe {
+        let old = U128 { whole: old };
+        let new = U128 { whole: new };
+        let (mut prev_lo, mut prev_hi);
+        macro_rules! cmpxchg {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    "2:",
+                        concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
+                        "cmp {prev_lo}, {old_lo}",
+                        "cset {r:w}, ne",
+                        "cmp {prev_hi}, {old_hi}",
+                        "cinc {r:w}, {r:w}, ne",
+                        "cbz {r:w}, 3f",
+                        concat!("st", $release, "xp {r:w}, {prev_lo}, {prev_hi}, [{dst}]"),
+                        // 0 if the store was successful, 1 if no store was performed
+                        "cbnz {r:w}, 2b",
+                        "b 4f",
+                    "3:",
+                        concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"),
+                        // 0 if the store was successful, 1 if no store was performed
+                        "cbnz {r:w}, 2b",
+                    "4:",
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    old_lo = in(reg) old.pair.lo,
+                    old_hi = in(reg) old.pair.hi,
+                    new_lo = in(reg) new.pair.lo,
+                    new_hi = in(reg) new.pair.hi,
+                    prev_lo = out(reg) prev_lo,
+                    prev_hi = out(reg) prev_hi,
+                    r = out(reg) _,
+                    // Do not use `preserves_flags` because CMP modifies the condition flags.
+                    options(nostack),
+                )
+            };
+        }
+        atomic_rmw!(cmpxchg, order, write = success);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+
+// casp is always strong, and ldxp requires a corresponding (succeed) stxp for
+// its atomicity (see code comment in _atomic_compare_exchange_ldxp_stxp).
+// (i.e., aarch64 doesn't have 128-bit weak CAS)
+use self::atomic_compare_exchange as atomic_compare_exchange_weak;
+
+// If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
+// we use CAS-based atomic RMW.
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+#[cfg(all(
+    any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+    not(portable_atomic_ll_sc_rmw),
+))]
+use _atomic_swap_casp as atomic_swap;
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+#[cfg(not(all(
+    any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+    not(portable_atomic_ll_sc_rmw),
+)))]
+use _atomic_swap_ldxp_stxp as atomic_swap;
+#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+use _atomic_swap_swpp as atomic_swap;
+#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+#[inline]
+unsafe fn _atomic_swap_swpp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+    // and the CPU supports FEAT_LSE128.
+    //
+    // Refs:
+    // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/SWPP--SWPPA--SWPPAL--SWPPL--Swap-quadword-in-memory-?lang=en
+    unsafe {
+        let val = U128 { whole: val };
+        let (prev_lo, prev_hi);
+        macro_rules! swap {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"),
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = inout(reg) val.pair.lo => prev_lo,
+                    val_hi = inout(reg) val.pair.hi => prev_hi,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(swap, order);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+// Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap.
+#[cfg(any(test, not(portable_atomic_ll_sc_rmw)))]
+#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+#[inline]
+unsafe fn _atomic_swap_casp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_lse!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantee that the CPU supports FEAT_LSE.
+    unsafe {
+        let val = U128 { whole: val };
+        let (mut prev_lo, mut prev_hi);
+        macro_rules! swap {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    start_lse!(),
+                    // If FEAT_LSE2 is not supported, this works like byte-wise atomic.
+                    // This is not single-copy atomic reads, but this is ok because subsequent
+                    // CAS will check for consistency.
+                    "ldp x4, x5, [{dst}]",
+                    "2:",
+                        // casp writes the current value to the first register pair,
+                        // so copy the `out`'s value for later comparison.
+                        "mov {tmp_lo}, x4",
+                        "mov {tmp_hi}, x5",
+                        concat!("casp", $acquire, $release, " x4, x5, x2, x3, [{dst}]"),
+                        "cmp {tmp_hi}, x5",
+                        "ccmp {tmp_lo}, x4, #0, eq",
+                        "b.ne 2b",
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    tmp_lo = out(reg) _,
+                    tmp_hi = out(reg) _,
+                    // must be allocated to even/odd register pair
+                    out("x4") prev_lo,
+                    out("x5") prev_hi,
+                    // must be allocated to even/odd register pair
+                    in("x2") val.pair.lo,
+                    in("x3") val.pair.hi,
+                    // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
+                    options(nostack),
+                )
+            };
+        }
+        atomic_rmw!(swap, order);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+// Do not use atomic_rmw_ll_sc_3 because it needs extra MOV to implement swap.
+#[cfg(any(
+    test,
+    not(all(
+        any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+        not(portable_atomic_ll_sc_rmw),
+    ))
+))]
+#[inline]
+unsafe fn _atomic_swap_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let val = U128 { whole: val };
+        let (mut prev_lo, mut prev_hi);
+        macro_rules! swap {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    "2:",
+                        concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
+                        concat!("st", $release, "xp {r:w}, {val_lo}, {val_hi}, [{dst}]"),
+                        // 0 if the store was successful, 1 if no store was performed
+                        "cbnz {r:w}, 2b",
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = in(reg) val.pair.lo,
+                    val_hi = in(reg) val.pair.hi,
+                    prev_lo = out(reg) prev_lo,
+                    prev_hi = out(reg) prev_hi,
+                    r = out(reg) _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(swap, order);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+
+/// Atomic RMW by LL/SC loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - val_lo/val_hi pair: val argument (read-only for `$op`)
+/// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`)
+/// - new_lo/new_hi pair: new value that will be stored by sc
+macro_rules! atomic_rmw_ll_sc_3 {
+    ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => {
+        // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
+        // we use CAS-based atomic RMW generated by atomic_rmw_cas_3! macro instead.
+        #[cfg(not(all(
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+            not(portable_atomic_ll_sc_rmw),
+        )))]
+        use $name as $reexport_name;
+        #[cfg(any(
+            test,
+            not(all(
+                any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+                not(portable_atomic_ll_sc_rmw),
+            ))
+        ))]
+        #[inline]
+        unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                let val = U128 { whole: val };
+                let (mut prev_lo, mut prev_hi);
+                macro_rules! op {
+                    ($acquire:tt, $release:tt, $fence:tt) => {
+                        asm!(
+                            "2:",
+                                concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
+                                $($op)*
+                                concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"),
+                                // 0 if the store was successful, 1 if no store was performed
+                                "cbnz {r:w}, 2b",
+                            $fence,
+                            dst = in(reg) ptr_reg!(dst),
+                            val_lo = in(reg) val.pair.lo,
+                            val_hi = in(reg) val.pair.hi,
+                            prev_lo = out(reg) prev_lo,
+                            prev_hi = out(reg) prev_hi,
+                            new_lo = out(reg) _,
+                            new_hi = out(reg) _,
+                            r = out(reg) _,
+                            options(nostack $(, $preserves_flags)?),
+                        )
+                    };
+                }
+                atomic_rmw!(op, order);
+                U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+            }
+        }
+    };
+}
+/// Atomic RMW by CAS loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - val_lo/val_hi pair: val argument (read-only for `$op`)
+/// - x6/x7 pair: previous value loaded (read-only for `$op`)
+/// - x4/x5 pair: new value that will be stored
+macro_rules! atomic_rmw_cas_3 {
+    ($name:ident as $reexport_name:ident, $($op:tt)*) => {
+        // If FEAT_LSE is not available at compile-time or portable_atomic_ll_sc_rmw cfg is set,
+        // we use LL/SC-based atomic RMW generated by atomic_rmw_ll_sc_3! macro instead.
+        #[cfg(all(
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+            not(portable_atomic_ll_sc_rmw),
+        ))]
+        use $name as $reexport_name;
+        #[cfg(any(test, not(portable_atomic_ll_sc_rmw)))]
+        #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+        #[inline]
+        unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            debug_assert_lse!();
+            // SAFETY: the caller must uphold the safety contract.
+            // cfg guarantee that the CPU supports FEAT_LSE.
+            unsafe {
+                let val = U128 { whole: val };
+                let (mut prev_lo, mut prev_hi);
+                macro_rules! op {
+                    ($acquire:tt, $release:tt, $fence:tt) => {
+                        asm!(
+                            start_lse!(),
+                            // If FEAT_LSE2 is not supported, this works like byte-wise atomic.
+                            // This is not single-copy atomic reads, but this is ok because subsequent
+                            // CAS will check for consistency.
+                            "ldp x6, x7, [{dst}]",
+                            "2:",
+                                // casp writes the current value to the first register pair,
+                                // so copy the `out`'s value for later comparison.
+                                "mov {tmp_lo}, x6",
+                                "mov {tmp_hi}, x7",
+                                $($op)*
+                                concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"),
+                                "cmp {tmp_hi}, x7",
+                                "ccmp {tmp_lo}, x6, #0, eq",
+                                "b.ne 2b",
+                            $fence,
+                            dst = in(reg) ptr_reg!(dst),
+                            val_lo = in(reg) val.pair.lo,
+                            val_hi = in(reg) val.pair.hi,
+                            tmp_lo = out(reg) _,
+                            tmp_hi = out(reg) _,
+                            // must be allocated to even/odd register pair
+                            out("x6") prev_lo,
+                            out("x7") prev_hi,
+                            // must be allocated to even/odd register pair
+                            out("x4") _,
+                            out("x5") _,
+                            // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
+                            options(nostack),
+                        )
+                    };
+                }
+                atomic_rmw!(op, order);
+                U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+            }
+        }
+    };
+}
+
+/// Atomic RMW by LL/SC loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`)
+/// - new_lo/new_hi pair: new value that will be stored by sc
+macro_rules! atomic_rmw_ll_sc_2 {
+    ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => {
+        // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
+        // we use CAS-based atomic RMW generated by atomic_rmw_cas_2! macro instead.
+        #[cfg(not(all(
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+            not(portable_atomic_ll_sc_rmw),
+        )))]
+        use $name as $reexport_name;
+        #[cfg(any(
+            test,
+            not(all(
+                any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+                not(portable_atomic_ll_sc_rmw),
+            ))
+        ))]
+        #[inline]
+        unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                let (mut prev_lo, mut prev_hi);
+                macro_rules! op {
+                    ($acquire:tt, $release:tt, $fence:tt) => {
+                        asm!(
+                            "2:",
+                                concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
+                                $($op)*
+                                concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"),
+                                // 0 if the store was successful, 1 if no store was performed
+                                "cbnz {r:w}, 2b",
+                            $fence,
+                            dst = in(reg) ptr_reg!(dst),
+                            prev_lo = out(reg) prev_lo,
+                            prev_hi = out(reg) prev_hi,
+                            new_lo = out(reg) _,
+                            new_hi = out(reg) _,
+                            r = out(reg) _,
+                            options(nostack $(, $preserves_flags)?),
+                        )
+                    };
+                }
+                atomic_rmw!(op, order);
+                U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+            }
+        }
+    };
+}
+/// Atomic RMW by CAS loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - x6/x7 pair: previous value loaded (read-only for `$op`)
+/// - x4/x5 pair: new value that will be stored
+macro_rules! atomic_rmw_cas_2 {
+    ($name:ident as $reexport_name:ident, $($op:tt)*) => {
+        // If FEAT_LSE is not available at compile-time or portable_atomic_ll_sc_rmw cfg is set,
+        // we use LL/SC-based atomic RMW generated by atomic_rmw_ll_sc_3! macro instead.
+        #[cfg(all(
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+            not(portable_atomic_ll_sc_rmw),
+        ))]
+        use $name as $reexport_name;
+        #[cfg(any(test, not(portable_atomic_ll_sc_rmw)))]
+        #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+        #[inline]
+        unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            debug_assert_lse!();
+            // SAFETY: the caller must uphold the safety contract.
+            // cfg guarantee that the CPU supports FEAT_LSE.
+            unsafe {
+                let (mut prev_lo, mut prev_hi);
+                macro_rules! op {
+                    ($acquire:tt, $release:tt, $fence:tt) => {
+                        asm!(
+                            start_lse!(),
+                            // If FEAT_LSE2 is not supported, this works like byte-wise atomic.
+                            // This is not single-copy atomic reads, but this is ok because subsequent
+                            // CAS will check for consistency.
+                            "ldp x6, x7, [{dst}]",
+                            "2:",
+                                // casp writes the current value to the first register pair,
+                                // so copy the `out`'s value for later comparison.
+                                "mov {tmp_lo}, x6",
+                                "mov {tmp_hi}, x7",
+                                $($op)*
+                                concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"),
+                                "cmp {tmp_hi}, x7",
+                                "ccmp {tmp_lo}, x6, #0, eq",
+                                "b.ne 2b",
+                            $fence,
+                            dst = in(reg) ptr_reg!(dst),
+                            tmp_lo = out(reg) _,
+                            tmp_hi = out(reg) _,
+                            // must be allocated to even/odd register pair
+                            out("x6") prev_lo,
+                            out("x7") prev_hi,
+                            // must be allocated to even/odd register pair
+                            out("x4") _,
+                            out("x5") _,
+                            // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
+                            options(nostack),
+                        )
+                    };
+                }
+                atomic_rmw!(op, order);
+                U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+            }
+        }
+    };
+}
+
+// Do not use `preserves_flags` because ADDS modifies the condition flags.
+atomic_rmw_ll_sc_3! {
+    _atomic_add_ldxp_stxp as atomic_add,
+    select_le_or_be!("adds {new_lo}, {prev_lo}, {val_lo}", "adds {new_hi}, {prev_hi}, {val_hi}"),
+    select_le_or_be!("adc {new_hi}, {prev_hi}, {val_hi}", "adc {new_lo}, {prev_lo}, {val_lo}"),
+}
+atomic_rmw_cas_3! {
+    _atomic_add_casp as atomic_add,
+    select_le_or_be!("adds x4, x6, {val_lo}", "adds x5, x7, {val_hi}"),
+    select_le_or_be!("adc x5, x7, {val_hi}", "adc x4, x6, {val_lo}"),
+}
+
+// Do not use `preserves_flags` because SUBS modifies the condition flags.
+atomic_rmw_ll_sc_3! {
+    _atomic_sub_ldxp_stxp as atomic_sub,
+    select_le_or_be!("subs {new_lo}, {prev_lo}, {val_lo}", "subs {new_hi}, {prev_hi}, {val_hi}"),
+    select_le_or_be!("sbc {new_hi}, {prev_hi}, {val_hi}", "sbc {new_lo}, {prev_lo}, {val_lo}"),
+}
+atomic_rmw_cas_3! {
+    _atomic_sub_casp as atomic_sub,
+    select_le_or_be!("subs x4, x6, {val_lo}", "subs x5, x7, {val_hi}"),
+    select_le_or_be!("sbc x5, x7, {val_hi}", "sbc x4, x6, {val_lo}"),
+}
+
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+atomic_rmw_ll_sc_3! {
+    _atomic_and_ldxp_stxp as atomic_and (preserves_flags),
+    "and {new_lo}, {prev_lo}, {val_lo}",
+    "and {new_hi}, {prev_hi}, {val_hi}",
+}
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+atomic_rmw_cas_3! {
+    _atomic_and_casp as atomic_and,
+    "and x4, x6, {val_lo}",
+    "and x5, x7, {val_hi}",
+}
+#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+#[inline]
+unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+    // and the CPU supports FEAT_LSE128.
+    //
+    // Refs:
+    // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDCLRP--LDCLRPA--LDCLRPAL--LDCLRPL--Atomic-bit-clear-on-quadword-in-memory-?lang=en
+    unsafe {
+        let val = U128 { whole: !val };
+        let (prev_lo, prev_hi);
+        macro_rules! and {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    concat!("ldclrp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"),
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = inout(reg) val.pair.lo => prev_lo,
+                    val_hi = inout(reg) val.pair.hi => prev_hi,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(and, order);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+
+atomic_rmw_ll_sc_3! {
+    _atomic_nand_ldxp_stxp as atomic_nand (preserves_flags),
+    "and {new_lo}, {prev_lo}, {val_lo}",
+    "mvn {new_lo}, {new_lo}",
+    "and {new_hi}, {prev_hi}, {val_hi}",
+    "mvn {new_hi}, {new_hi}",
+}
+atomic_rmw_cas_3! {
+    _atomic_nand_casp as atomic_nand,
+    "and x4, x6, {val_lo}",
+    "mvn x4, x4",
+    "and x5, x7, {val_hi}",
+    "mvn x5, x5",
+}
+
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+atomic_rmw_ll_sc_3! {
+    _atomic_or_ldxp_stxp as atomic_or (preserves_flags),
+    "orr {new_lo}, {prev_lo}, {val_lo}",
+    "orr {new_hi}, {prev_hi}, {val_hi}",
+}
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+atomic_rmw_cas_3! {
+    _atomic_or_casp as atomic_or,
+    "orr x4, x6, {val_lo}",
+    "orr x5, x7, {val_hi}",
+}
+#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+#[inline]
+unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+    // and the CPU supports FEAT_LSE128.
+    //
+    // Refs:
+    // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDSETP--LDSETPA--LDSETPAL--LDSETPL--Atomic-bit-set-on-quadword-in-memory-?lang=en
+    unsafe {
+        let val = U128 { whole: val };
+        let (prev_lo, prev_hi);
+        macro_rules! or {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    concat!("ldsetp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"),
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = inout(reg) val.pair.lo => prev_lo,
+                    val_hi = inout(reg) val.pair.hi => prev_hi,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(or, order);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+
+atomic_rmw_ll_sc_3! {
+    _atomic_xor_ldxp_stxp as atomic_xor (preserves_flags),
+    "eor {new_lo}, {prev_lo}, {val_lo}",
+    "eor {new_hi}, {prev_hi}, {val_hi}",
+}
+atomic_rmw_cas_3! {
+    _atomic_xor_casp as atomic_xor,
+    "eor x4, x6, {val_lo}",
+    "eor x5, x7, {val_hi}",
+}
+
+atomic_rmw_ll_sc_2! {
+    _atomic_not_ldxp_stxp as atomic_not (preserves_flags),
+    "mvn {new_lo}, {prev_lo}",
+    "mvn {new_hi}, {prev_hi}",
+}
+atomic_rmw_cas_2! {
+    _atomic_not_casp as atomic_not,
+    "mvn x4, x6",
+    "mvn x5, x7",
+}
+
+// Do not use `preserves_flags` because NEGS modifies the condition flags.
+atomic_rmw_ll_sc_2! {
+    _atomic_neg_ldxp_stxp as atomic_neg,
+    select_le_or_be!("negs {new_lo}, {prev_lo}", "negs {new_hi}, {prev_hi}"),
+    select_le_or_be!("ngc {new_hi}, {prev_hi}", "ngc {new_lo}, {prev_lo}"),
+}
+atomic_rmw_cas_2! {
+    _atomic_neg_casp as atomic_neg,
+    select_le_or_be!("negs x4, x6", "negs x5, x7"),
+    select_le_or_be!("ngc x5, x7", "ngc x4, x6"),
+}
+
+// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
+atomic_rmw_ll_sc_3! {
+    _atomic_max_ldxp_stxp as atomic_max,
+    select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
+    select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
+    "csel {new_hi}, {prev_hi}, {val_hi}, lt", // select hi 64-bit
+    "csel {new_lo}, {prev_lo}, {val_lo}, lt", // select lo 64-bit
+}
+atomic_rmw_cas_3! {
+    _atomic_max_casp as atomic_max,
+    select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
+    select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
+    "csel x5, x7, {val_hi}, lt", // select hi 64-bit
+    "csel x4, x6, {val_lo}, lt", // select lo 64-bit
+}
+
+// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
+atomic_rmw_ll_sc_3! {
+    _atomic_umax_ldxp_stxp as atomic_umax,
+    select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
+    select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
+    "csel {new_hi}, {prev_hi}, {val_hi}, lo", // select hi 64-bit
+    "csel {new_lo}, {prev_lo}, {val_lo}, lo", // select lo 64-bit
+}
+atomic_rmw_cas_3! {
+    _atomic_umax_casp as atomic_umax,
+    select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
+    select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
+    "csel x5, x7, {val_hi}, lo", // select hi 64-bit
+    "csel x4, x6, {val_lo}, lo", // select lo 64-bit
+}
+
+// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
+atomic_rmw_ll_sc_3! {
+    _atomic_min_ldxp_stxp as atomic_min,
+    select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
+    select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
+    "csel {new_hi}, {prev_hi}, {val_hi}, ge", // select hi 64-bit
+    "csel {new_lo}, {prev_lo}, {val_lo}, ge", // select lo 64-bit
+}
+atomic_rmw_cas_3! {
+    _atomic_min_casp as atomic_min,
+    select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
+    select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
+    "csel x5, x7, {val_hi}, ge", // select hi 64-bit
+    "csel x4, x6, {val_lo}, ge", // select lo 64-bit
+}
+
+// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
+atomic_rmw_ll_sc_3! {
+    _atomic_umin_ldxp_stxp as atomic_umin,
+    select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
+    select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
+    "csel {new_hi}, {prev_hi}, {val_hi}, hs", // select hi 64-bit
+    "csel {new_lo}, {prev_lo}, {val_lo}, hs", // select lo 64-bit
+}
+atomic_rmw_cas_3! {
+    _atomic_umin_casp as atomic_umin,
+    select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
+    select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
+    "csel x5, x7, {val_hi}, hs", // select hi 64-bit
+    "csel x4, x6, {val_lo}, hs", // select lo 64-bit
+}
+
+#[inline]
+const fn is_lock_free() -> bool {
+    IS_ALWAYS_LOCK_FREE
+}
+const IS_ALWAYS_LOCK_FREE: bool = true;
+
+atomic128!(AtomicI128, i128, atomic_max, atomic_min);
+atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    test_atomic_int!(i128);
+    test_atomic_int!(u128);
+
+    // load/store/swap implementation is not affected by signedness, so it is
+    // enough to test only unsigned types.
+    stress_test!(u128);
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_aa64reg.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_aa64reg.rs
new file mode 100644
index 0000000..4cbdb51
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_aa64reg.rs
@@ -0,0 +1,628 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Run-time feature detection on aarch64 Linux/FreeBSD/NetBSD/OpenBSD by parsing system registers.
+//
+// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection on NetBSD/OpenBSD.
+// https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/mod.rs
+// https://github.com/rust-lang/stdarch/pull/1374
+//
+// Refs:
+// - https://developer.arm.com/documentation/ddi0601/latest/AArch64-Registers
+// - https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt
+// - https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/aarch64.rs
+//
+// Supported platforms:
+// - Linux 4.11+ (emulate mrs instruction)
+//   https://github.com/torvalds/linux/commit/77c97b4ee21290f5f083173d957843b615abbff2
+// - FreeBSD 12.0+ (emulate mrs instruction)
+//   https://github.com/freebsd/freebsd-src/commit/398810619cb32abf349f8de23f29510b2ee0839b
+// - NetBSD 9.0+ (through sysctl)
+//   https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb
+// - OpenBSD 7.1+ (through sysctl)
+//   https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8
+//
+// For now, this module is only used on NetBSD/OpenBSD.
+// On Linux/FreeBSD, this module is test-only:
+// - On Linux, this approach requires a higher kernel version than Rust supports,
+//   and also does not work with qemu-user (as of QEMU 7.2) and Valgrind.
+//   (Looking into HWCAP_CPUID in auxvec, it appears that Valgrind is setting it
+//   to false correctly, but qemu-user is setting it to true.)
+// - On FreeBSD, this approach does not work on FreeBSD 12 on QEMU (confirmed on
+//   FreeBSD 12.{2,3,4}), and we got SIGILL (worked on FreeBSD 13 and 14).
+
+include!("common.rs");
+
+#[cfg_attr(test, derive(Debug, PartialEq))]
+struct AA64Reg {
+    aa64isar0: u64,
+    #[cfg(test)]
+    aa64isar1: u64,
+    // OpenBSD has an API to get this, but currently always returns 0.
+    // https://github.com/openbsd/src/blob/6a233889798dc3ecb18acc52dce1e57862af2957/sys/arch/arm64/arm64/machdep.c#L371-L377
+    #[cfg_attr(target_os = "openbsd", cfg(test))]
+    aa64mmfr2: u64,
+}
+
+#[cold]
+fn _detect(info: &mut CpuInfo) {
+    let AA64Reg {
+        aa64isar0,
+        #[cfg(test)]
+        aa64isar1,
+        #[cfg_attr(target_os = "openbsd", cfg(test))]
+        aa64mmfr2,
+    } = imp::aa64reg();
+
+    // ID_AA64ISAR0_EL1, Instruction Set Attribute Register 0
+    // https://developer.arm.com/documentation/ddi0601/2023-06/AArch64-Registers/ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0?lang=en
+    let atomic = extract(aa64isar0, 23, 20);
+    if atomic >= 2 {
+        info.set(CpuInfo::HAS_LSE);
+        // we currently only use FEAT_LSE and FEAT_LSE2 in outline-atomics.
+        #[cfg(test)]
+        {
+            if atomic >= 3 {
+                info.set(CpuInfo::HAS_LSE128);
+            }
+        }
+    }
+    // we currently only use FEAT_LSE and FEAT_LSE2 in outline-atomics.
+    #[cfg(test)]
+    {
+        // ID_AA64ISAR1_EL1, Instruction Set Attribute Register 1
+        // https://developer.arm.com/documentation/ddi0601/2023-06/AArch64-Registers/ID-AA64ISAR1-EL1--AArch64-Instruction-Set-Attribute-Register-1?lang=en
+        if extract(aa64isar1, 23, 20) >= 3 {
+            info.set(CpuInfo::HAS_RCPC3);
+        }
+    }
+    // OpenBSD has an API to get this, but currently always returns 0.
+    // https://github.com/openbsd/src/blob/6a233889798dc3ecb18acc52dce1e57862af2957/sys/arch/arm64/arm64/machdep.c#L371-L377
+    #[cfg_attr(target_os = "openbsd", cfg(test))]
+    {
+        // ID_AA64MMFR2_EL1, AArch64 Memory Model Feature Register 2
+        // https://developer.arm.com/documentation/ddi0601/2023-06/AArch64-Registers/ID-AA64MMFR2-EL1--AArch64-Memory-Model-Feature-Register-2?lang=en
+        if extract(aa64mmfr2, 35, 32) >= 1 {
+            info.set(CpuInfo::HAS_LSE2);
+        }
+    }
+}
+
+fn extract(x: u64, high: usize, low: usize) -> u64 {
+    (x >> low) & ((1 << (high - low + 1)) - 1)
+}
+
+#[cfg(not(any(target_os = "netbsd", target_os = "openbsd")))]
+mod imp {
+    // This module is test-only. See parent module docs for details.
+
+    #[cfg(not(portable_atomic_no_asm))]
+    use core::arch::asm;
+
+    use super::AA64Reg;
+
+    pub(super) fn aa64reg() -> AA64Reg {
+        // SAFETY: This is safe on FreeBSD 12.0+. FreeBSD 11 was EoL on 2021-09-30.
+        // Note that stdarch has been doing the same thing since before FreeBSD 11 was EoL.
+        // https://github.com/rust-lang/stdarch/pull/611
+        unsafe {
+            let aa64isar0: u64;
+            asm!(
+                "mrs {0}, ID_AA64ISAR0_EL1",
+                out(reg) aa64isar0,
+                options(pure, nomem, nostack, preserves_flags)
+            );
+            #[cfg(test)]
+            let aa64isar1: u64;
+            #[cfg(test)]
+            {
+                asm!(
+                    "mrs {0}, ID_AA64ISAR1_EL1",
+                    out(reg) aa64isar1,
+                    options(pure, nomem, nostack, preserves_flags)
+                );
+            }
+            let aa64mmfr2: u64;
+            asm!(
+                "mrs {0}, ID_AA64MMFR2_EL1",
+                out(reg) aa64mmfr2,
+                options(pure, nomem, nostack, preserves_flags)
+            );
+            AA64Reg {
+                aa64isar0,
+                #[cfg(test)]
+                aa64isar1,
+                aa64mmfr2,
+            }
+        }
+    }
+}
+#[cfg(target_os = "netbsd")]
+mod imp {
+    // NetBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl.
+    // https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb
+    // https://github.com/golang/sys/commit/ef9fd89ba245e184bdd308f7f2b4f3c551fa5b0f
+
+    use core::ptr;
+
+    use super::AA64Reg;
+
+    // core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
+    #[allow(non_camel_case_types)]
+    pub(super) mod ffi {
+        pub(crate) use super::super::c_types::{c_char, c_int, c_size_t, c_void};
+
+        extern "C" {
+            // Defined in sys/sysctl.h.
+            // https://man.netbsd.org/sysctl.3
+            // https://github.com/NetBSD/src/blob/167403557cf60bed09a63fc84d941a1a4bd7d52e/sys/sys/sysctl.h
+            // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/bsd/netbsdlike/netbsd/mod.rs#L2582
+            pub(crate) fn sysctlbyname(
+                name: *const c_char,
+                old_p: *mut c_void,
+                old_len_p: *mut c_size_t,
+                new_p: *const c_void,
+                new_len: c_size_t,
+            ) -> c_int;
+        }
+
+        // Defined in aarch64/armreg.h.
+        // https://github.com/NetBSD/src/blob/167403557cf60bed09a63fc84d941a1a4bd7d52e/sys/arch/aarch64/include/armreg.h#L1626
+        #[derive(Clone, Copy)]
+        #[repr(C)]
+        pub(crate) struct aarch64_sysctl_cpu_id {
+            // NetBSD 9.0+
+            // https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb
+            pub(crate) midr: u64,
+            pub(crate) revidr: u64,
+            pub(crate) mpidr: u64,
+            pub(crate) aa64dfr0: u64,
+            pub(crate) aa64dfr1: u64,
+            pub(crate) aa64isar0: u64,
+            pub(crate) aa64isar1: u64,
+            pub(crate) aa64mmfr0: u64,
+            pub(crate) aa64mmfr1: u64,
+            pub(crate) aa64mmfr2: u64,
+            pub(crate) aa64pfr0: u64,
+            pub(crate) aa64pfr1: u64,
+            pub(crate) aa64zfr0: u64,
+            pub(crate) mvfr0: u32,
+            pub(crate) mvfr1: u32,
+            pub(crate) mvfr2: u32,
+            // NetBSD 10.0+
+            // https://github.com/NetBSD/src/commit/0c7bdc13f0e332cccec56e307f023b4888638973
+            pub(crate) pad: u32,
+            pub(crate) clidr: u64,
+            pub(crate) ctr: u64,
+        }
+    }
+
+    pub(super) unsafe fn sysctl_cpu_id(name: &[u8]) -> Option<AA64Reg> {
+        const OUT_LEN: ffi::c_size_t =
+            core::mem::size_of::<ffi::aarch64_sysctl_cpu_id>() as ffi::c_size_t;
+
+        debug_assert_eq!(name.last(), Some(&0), "{:?}", name);
+        debug_assert_eq!(name.iter().filter(|&&v| v == 0).count(), 1, "{:?}", name);
+
+        // SAFETY: all fields of aarch64_sysctl_cpu_id are zero-able and we use
+        // the result when machdep.cpuN.cpu_id sysctl was successful.
+        let mut buf: ffi::aarch64_sysctl_cpu_id = unsafe { core::mem::zeroed() };
+        let mut out_len = OUT_LEN;
+        // SAFETY:
+        // - the caller must guarantee that `name` is ` machdep.cpuN.cpu_id` in a C string.
+        // - `out_len` does not exceed the size of the value at `buf`.
+        // - `sysctlbyname` is thread-safe.
+        let res = unsafe {
+            ffi::sysctlbyname(
+                name.as_ptr().cast::<ffi::c_char>(),
+                (&mut buf as *mut ffi::aarch64_sysctl_cpu_id).cast::<ffi::c_void>(),
+                &mut out_len,
+                ptr::null_mut(),
+                0,
+            )
+        };
+        if res != 0 {
+            return None;
+        }
+        Some(AA64Reg {
+            aa64isar0: buf.aa64isar0,
+            #[cfg(test)]
+            aa64isar1: buf.aa64isar1,
+            aa64mmfr2: buf.aa64mmfr2,
+        })
+    }
+
+    pub(super) fn aa64reg() -> AA64Reg {
+        // Get system registers for cpu0.
+        // If failed, returns default because machdep.cpuN.cpu_id sysctl is not available.
+        // machdep.cpuN.cpu_id sysctl was added on NetBSD 9.0 so it is not available on older versions.
+        // SAFETY: we passed a valid name in a C string.
+        // It is ok to check only cpu0, even if there are more CPUs.
+        // https://github.com/NetBSD/src/commit/bd9707e06ea7d21b5c24df6dfc14cb37c2819416
+        // https://github.com/golang/sys/commit/ef9fd89ba245e184bdd308f7f2b4f3c551fa5b0f
+        match unsafe { sysctl_cpu_id(b"machdep.cpu0.cpu_id\0") } {
+            Some(cpu_id) => cpu_id,
+            None => AA64Reg {
+                aa64isar0: 0,
+                #[cfg(test)]
+                aa64isar1: 0,
+                aa64mmfr2: 0,
+            },
+        }
+    }
+}
+#[cfg(target_os = "openbsd")]
+mod imp {
+    // OpenBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl.
+    // https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8
+    // https://github.com/golang/go/commit/cd54ef1f61945459486e9eea2f016d99ef1da925
+
+    use core::ptr;
+
+    use super::AA64Reg;
+
+    // core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
+    #[allow(non_camel_case_types)]
+    pub(super) mod ffi {
+        pub(crate) use super::super::c_types::{c_int, c_size_t, c_uint, c_void};
+
+        // Defined in sys/sysctl.h.
+        // https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/sys/sysctl.h#L82
+        pub(crate) const CTL_MACHDEP: c_int = 7;
+        // Defined in machine/cpu.h.
+        // https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/arch/arm64/include/cpu.h#L25-L40
+        pub(crate) const CPU_ID_AA64ISAR0: c_int = 2;
+        #[cfg(test)]
+        pub(crate) const CPU_ID_AA64ISAR1: c_int = 3;
+        // OpenBSD has an API to get this, but currently always returns 0.
+        // https://github.com/openbsd/src/blob/6a233889798dc3ecb18acc52dce1e57862af2957/sys/arch/arm64/arm64/machdep.c#L371-L377
+        #[cfg(test)]
+        pub(crate) const CPU_ID_AA64MMFR2: c_int = 7;
+
+        extern "C" {
+            // Defined in sys/sysctl.h.
+            // https://man.openbsd.org/sysctl.2
+            // https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/sys/sysctl.h
+            // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/bsd/netbsdlike/openbsd/mod.rs#L1817-L1824
+            pub(crate) fn sysctl(
+                name: *const c_int,
+                name_len: c_uint,
+                old_p: *mut c_void,
+                old_len_p: *mut c_size_t,
+                new_p: *mut c_void,
+                new_len: c_size_t,
+            ) -> c_int;
+        }
+    }
+
+    // ID_AA64ISAR0_EL1 and ID_AA64ISAR1_EL1 are supported on OpenBSD 7.1+.
+    // https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8
+    // Others are supported on OpenBSD 7.3+.
+    // https://github.com/openbsd/src/commit/c7654cd65262d532212f65123ee3905ba200365c
+    // sysctl returns an unsupported error if operation is not supported,
+    // so we can safely use this function on older versions of OpenBSD.
+    pub(super) fn aa64reg() -> AA64Reg {
+        let aa64isar0 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR0]).unwrap_or(0);
+        #[cfg(test)]
+        let aa64isar1 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR1]).unwrap_or(0);
+        #[cfg(test)]
+        let aa64mmfr2 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64MMFR2]).unwrap_or(0);
+        AA64Reg {
+            aa64isar0,
+            #[cfg(test)]
+            aa64isar1,
+            #[cfg(test)]
+            aa64mmfr2,
+        }
+    }
+
+    fn sysctl64(mib: &[ffi::c_int]) -> Option<u64> {
+        const OUT_LEN: ffi::c_size_t = core::mem::size_of::<u64>() as ffi::c_size_t;
+        let mut out = 0_u64;
+        let mut out_len = OUT_LEN;
+        #[allow(clippy::cast_possible_truncation)]
+        // SAFETY:
+        // - `mib.len()` does not exceed the size of `mib`.
+        // - `out_len` does not exceed the size of `out`.
+        // - `sysctl` is thread-safe.
+        let res = unsafe {
+            ffi::sysctl(
+                mib.as_ptr(),
+                mib.len() as ffi::c_uint,
+                (&mut out as *mut u64).cast::<ffi::c_void>(),
+                &mut out_len,
+                ptr::null_mut(),
+                0,
+            )
+        };
+        if res == -1 {
+            return None;
+        }
+        debug_assert_eq!(out_len, OUT_LEN);
+        Some(out)
+    }
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests {
+    use std::{
+        process::Command,
+        string::{String, ToString},
+    };
+
+    use super::*;
+
+    #[test]
+    fn test_aa64reg() {
+        let AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } = imp::aa64reg();
+        std::eprintln!("aa64isar0={}", aa64isar0);
+        std::eprintln!("aa64isar1={}", aa64isar1);
+        std::eprintln!("aa64mmfr2={}", aa64mmfr2);
+        if cfg!(target_os = "openbsd") {
+            let output = Command::new("sysctl").arg("machdep").output().unwrap();
+            assert!(output.status.success());
+            let stdout = String::from_utf8(output.stdout).unwrap();
+            // OpenBSD 7.1+
+            assert_eq!(
+                stdout.lines().find_map(|s| s.strip_prefix("machdep.id_aa64isar0=")).unwrap_or("0"),
+                aa64isar0.to_string(),
+            );
+            assert_eq!(
+                stdout.lines().find_map(|s| s.strip_prefix("machdep.id_aa64isar1=")).unwrap_or("0"),
+                aa64isar1.to_string(),
+            );
+            // OpenBSD 7.3+
+            assert_eq!(
+                stdout.lines().find_map(|s| s.strip_prefix("machdep.id_aa64mmfr2=")).unwrap_or("0"),
+                aa64mmfr2.to_string(),
+            );
+        }
+        if detect().test(CpuInfo::HAS_LSE) {
+            let atomic = extract(aa64isar0, 23, 20);
+            if detect().test(CpuInfo::HAS_LSE128) {
+                assert_eq!(atomic, 3);
+            } else {
+                assert_eq!(atomic, 2);
+            }
+        }
+        if detect().test(CpuInfo::HAS_LSE2) {
+            assert_eq!(extract(aa64mmfr2, 35, 32), 1);
+        }
+        if detect().test(CpuInfo::HAS_RCPC3) {
+            assert_eq!(extract(aa64isar1, 23, 20), 3);
+        }
+    }
+
+    #[allow(clippy::cast_possible_wrap)]
+    #[cfg(target_os = "netbsd")]
+    #[test]
+    fn test_netbsd() {
+        use c_types::*;
+        use core::{arch::asm, mem, ptr};
+        use imp::ffi;
+        use test_helper::sys;
+
+        // Call syscall using asm instead of libc.
+        // Note that NetBSD does not guarantee the stability of raw syscall as
+        // much as Linux does (It may actually be stable enough, though: https://lists.llvm.org/pipermail/llvm-dev/2019-June/133393.html).
+        //
+        // This is currently used only for testing.
+        unsafe fn sysctl_cpu_id_asm_syscall(name: &[&[u8]]) -> Result<AA64Reg, c_int> {
+            // https://github.com/golang/go/blob/4badad8d477ffd7a6b762c35bc69aed82faface7/src/syscall/asm_netbsd_arm64.s
+            #[inline]
+            unsafe fn sysctl(
+                name: *const c_int,
+                name_len: c_uint,
+                old_p: *mut c_void,
+                old_len_p: *mut c_size_t,
+                new_p: *const c_void,
+                new_len: c_size_t,
+            ) -> Result<c_int, c_int> {
+                #[allow(clippy::cast_possible_truncation)]
+                // SAFETY: the caller must uphold the safety contract.
+                unsafe {
+                    let mut n = sys::SYS___sysctl as u64;
+                    let r: i64;
+                    asm!(
+                        "svc 0",
+                        "b.cc 2f",
+                        "mov x17, x0",
+                        "mov x0, #-1",
+                        "2:",
+                        inout("x17") n,
+                        inout("x0") ptr_reg!(name) => r,
+                        inout("x1") name_len as u64 => _,
+                        in("x2") ptr_reg!(old_p),
+                        in("x3") ptr_reg!(old_len_p),
+                        in("x4") ptr_reg!(new_p),
+                        in("x5") new_len as u64,
+                        options(nostack),
+                    );
+                    if r as c_int == -1 {
+                        Err(n as c_int)
+                    } else {
+                        Ok(r as c_int)
+                    }
+                }
+            }
+
+            // https://github.com/golang/sys/blob/4badad8d477ffd7a6b762c35bc69aed82faface7/cpu/cpu_netbsd_arm64.go.
+            use std::{vec, vec::Vec};
+            fn sysctl_nodes(mib: &mut Vec<i32>) -> Result<Vec<sys::sysctlnode>, i32> {
+                mib.push(sys::CTL_QUERY);
+                let mut q_node = sys::sysctlnode {
+                    sysctl_flags: sys::SYSCTL_VERS_1,
+                    ..unsafe { mem::zeroed() }
+                };
+                let qp = (&mut q_node as *mut sys::sysctlnode).cast::<ffi::c_void>();
+                let sz = mem::size_of::<sys::sysctlnode>();
+                let mut olen = 0;
+                #[allow(clippy::cast_possible_truncation)]
+                unsafe {
+                    sysctl(mib.as_ptr(), mib.len() as c_uint, ptr::null_mut(), &mut olen, qp, sz)?;
+                }
+
+                let mut nodes = Vec::<sys::sysctlnode>::with_capacity(olen / sz);
+                let np = nodes.as_mut_ptr().cast::<ffi::c_void>();
+                #[allow(clippy::cast_possible_truncation)]
+                unsafe {
+                    sysctl(mib.as_ptr(), mib.len() as c_uint, np, &mut olen, qp, sz)?;
+                    nodes.set_len(olen / sz);
+                }
+
+                mib.pop(); // pop CTL_QUERY
+                Ok(nodes)
+            }
+            fn name_to_mib(parts: &[&[u8]]) -> Result<Vec<i32>, i32> {
+                let mut mib = vec![];
+                for (part_no, &part) in parts.iter().enumerate() {
+                    let nodes = sysctl_nodes(&mut mib)?;
+                    for node in nodes {
+                        let mut n = vec![];
+                        for b in node.sysctl_name {
+                            if b != 0 {
+                                n.push(b);
+                            }
+                        }
+                        if n == part {
+                            mib.push(node.sysctl_num);
+                            break;
+                        }
+                    }
+                    if mib.len() != part_no + 1 {
+                        return Err(0);
+                    }
+                }
+
+                Ok(mib)
+            }
+
+            const OUT_LEN: ffi::c_size_t =
+                core::mem::size_of::<ffi::aarch64_sysctl_cpu_id>() as ffi::c_size_t;
+
+            let mib = name_to_mib(name)?;
+
+            let mut buf: ffi::aarch64_sysctl_cpu_id = unsafe { core::mem::zeroed() };
+            let mut out_len = OUT_LEN;
+            #[allow(clippy::cast_possible_truncation)]
+            unsafe {
+                sysctl(
+                    mib.as_ptr(),
+                    mib.len() as c_uint,
+                    (&mut buf as *mut ffi::aarch64_sysctl_cpu_id).cast::<ffi::c_void>(),
+                    &mut out_len,
+                    ptr::null_mut(),
+                    0,
+                )?;
+            }
+            Ok(AA64Reg {
+                aa64isar0: buf.aa64isar0,
+                #[cfg(test)]
+                aa64isar1: buf.aa64isar1,
+                #[cfg(test)]
+                aa64mmfr2: buf.aa64mmfr2,
+            })
+        }
+
+        unsafe {
+            assert_eq!(
+                imp::sysctl_cpu_id(b"machdep.cpu0.cpu_id\0").unwrap(),
+                sysctl_cpu_id_asm_syscall(&[b"machdep", b"cpu0", b"cpu_id"]).unwrap()
+            );
+        }
+    }
+
+    // Static assertions for FFI bindings.
+    // This checks that FFI bindings defined in this crate, FFI bindings defined
+    // in libc, and FFI bindings generated for the platform's latest header file
+    // using bindgen have compatible signatures (or the same values if constants).
+    // Since this is static assertion, we can detect problems with
+    // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
+    // without actually running tests on these platforms.
+    // See also tools/codegen/src/ffi.rs.
+    // TODO(codegen): auto-generate this test
+    #[cfg(target_os = "netbsd")]
+    #[allow(
+        clippy::cast_possible_wrap,
+        clippy::cast_sign_loss,
+        clippy::no_effect_underscore_binding,
+        clippy::used_underscore_binding
+    )]
+    const _: fn() = || {
+        use core::mem::size_of;
+        use imp::ffi;
+        use test_helper::{libc, sys};
+        let mut _sysctlbyname: unsafe extern "C" fn(
+            *const ffi::c_char,
+            *mut ffi::c_void,
+            *mut ffi::c_size_t,
+            *const ffi::c_void,
+            ffi::c_size_t,
+        ) -> ffi::c_int = ffi::sysctlbyname;
+        _sysctlbyname = libc::sysctlbyname;
+        _sysctlbyname = sys::sysctlbyname;
+        // libc doesn't have this
+        // static_assert!(
+        //     size_of::<ffi::aarch64_sysctl_cpu_id>() == size_of::<libc::aarch64_sysctl_cpu_id>()
+        // );
+        static_assert!(
+            size_of::<ffi::aarch64_sysctl_cpu_id>() == size_of::<sys::aarch64_sysctl_cpu_id>()
+        );
+        let ffi: ffi::aarch64_sysctl_cpu_id = unsafe { core::mem::zeroed() };
+        let _ = sys::aarch64_sysctl_cpu_id {
+            ac_midr: ffi.midr,
+            ac_revidr: ffi.revidr,
+            ac_mpidr: ffi.mpidr,
+            ac_aa64dfr0: ffi.aa64dfr0,
+            ac_aa64dfr1: ffi.aa64dfr1,
+            ac_aa64isar0: ffi.aa64isar0,
+            ac_aa64isar1: ffi.aa64isar1,
+            ac_aa64mmfr0: ffi.aa64mmfr0,
+            ac_aa64mmfr1: ffi.aa64mmfr1,
+            ac_aa64mmfr2: ffi.aa64mmfr2,
+            ac_aa64pfr0: ffi.aa64pfr0,
+            ac_aa64pfr1: ffi.aa64pfr1,
+            ac_aa64zfr0: ffi.aa64zfr0,
+            ac_mvfr0: ffi.mvfr0,
+            ac_mvfr1: ffi.mvfr1,
+            ac_mvfr2: ffi.mvfr2,
+            ac_pad: ffi.pad,
+            ac_clidr: ffi.clidr,
+            ac_ctr: ffi.ctr,
+        };
+    };
+    #[cfg(target_os = "openbsd")]
+    #[allow(
+        clippy::cast_possible_wrap,
+        clippy::cast_sign_loss,
+        clippy::no_effect_underscore_binding
+    )]
+    const _: fn() = || {
+        use imp::ffi;
+        use test_helper::{libc, sys};
+        let mut _sysctl: unsafe extern "C" fn(
+            *const ffi::c_int,
+            ffi::c_uint,
+            *mut ffi::c_void,
+            *mut ffi::c_size_t,
+            *mut ffi::c_void,
+            ffi::c_size_t,
+        ) -> ffi::c_int = ffi::sysctl;
+        _sysctl = libc::sysctl;
+        _sysctl = sys::sysctl;
+        static_assert!(ffi::CTL_MACHDEP == libc::CTL_MACHDEP);
+        static_assert!(ffi::CTL_MACHDEP == sys::CTL_MACHDEP as ffi::c_int);
+        // static_assert!(ffi::CPU_ID_AA64ISAR0 == libc::CPU_ID_AA64ISAR0); // libc doesn't have this
+        static_assert!(ffi::CPU_ID_AA64ISAR0 == sys::CPU_ID_AA64ISAR0 as ffi::c_int);
+        // static_assert!(ffi::CPU_ID_AA64ISAR1 == libc::CPU_ID_AA64ISAR1); // libc doesn't have this
+        static_assert!(ffi::CPU_ID_AA64ISAR1 == sys::CPU_ID_AA64ISAR1 as ffi::c_int);
+        // static_assert!(ffi::CPU_ID_AA64MMFR2 == libc::CPU_ID_AA64MMFR2); // libc doesn't have this
+        static_assert!(ffi::CPU_ID_AA64MMFR2 == sys::CPU_ID_AA64MMFR2 as ffi::c_int);
+    };
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_fuchsia.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_fuchsia.rs
new file mode 100644
index 0000000..978418c
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_fuchsia.rs
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Run-time feature detection on aarch64 Fuchsia by using zx_system_get_features.
+//
+// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection on Fuchsia.
+// https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/mod.rs
+//
+// Refs:
+// - https://fuchsia.dev/fuchsia-src/reference/syscalls/system_get_features
+// - https://github.com/llvm/llvm-project/commit/4e731abc55681751b5d736b613f7720e50eb1ad4
+
+include!("common.rs");
+
+#[allow(non_camel_case_types)]
+mod ffi {
+    // https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/types.h
+    pub(crate) type zx_status_t = i32;
+
+    // https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/errors.h
+    pub(crate) const ZX_OK: zx_status_t = 0;
+    // https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/features.h
+    pub(crate) const ZX_FEATURE_KIND_CPU: u32 = 0;
+    pub(crate) const ZX_ARM64_FEATURE_ISA_ATOMICS: u32 = 1 << 8;
+
+    #[link(name = "zircon")]
+    extern "C" {
+        // https://fuchsia.dev/fuchsia-src/reference/syscalls/system_get_features
+        pub(crate) fn zx_system_get_features(kind: u32, features: *mut u32) -> zx_status_t;
+    }
+}
+
+fn zx_system_get_features(kind: u32) -> u32 {
+    let mut out = 0_u32;
+    // SAFETY: the pointer is valid because we got it from a reference.
+    let res = unsafe { ffi::zx_system_get_features(kind, &mut out) };
+    if res != ffi::ZX_OK {
+        return 0;
+    }
+    out
+}
+
+#[cold]
+fn _detect(info: &mut CpuInfo) {
+    let features = zx_system_get_features(ffi::ZX_FEATURE_KIND_CPU);
+    if features & ffi::ZX_ARM64_FEATURE_ISA_ATOMICS != 0 {
+        info.set(CpuInfo::HAS_LSE);
+    }
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_fuchsia() {
+        let features = zx_system_get_features(ffi::ZX_FEATURE_KIND_CPU);
+        assert_ne!(features, 0);
+        std::eprintln!("features: {:b}", features);
+    }
+
+    // Static assertions for FFI bindings.
+    // This checks that FFI bindings defined in this crate and FFI bindings
+    // generated for the platform's latest header file using bindgen have
+    // compatible signatures (or the same values if constants).
+    // Since this is static assertion, we can detect problems with
+    // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
+    // without actually running tests on these platforms.
+    // See also tools/codegen/src/ffi.rs.
+    // TODO(codegen): auto-generate this test
+    #[allow(
+        clippy::cast_possible_wrap,
+        clippy::cast_sign_loss,
+        clippy::cast_possible_truncation,
+        clippy::no_effect_underscore_binding
+    )]
+    const _: fn() = || {
+        use test_helper::sys;
+        // TODO(codegen): zx_system_get_features
+        let _: ffi::zx_status_t = 0 as sys::zx_status_t;
+        static_assert!(ffi::ZX_OK == sys::ZX_OK as ffi::zx_status_t);
+        static_assert!(ffi::ZX_FEATURE_KIND_CPU == sys::ZX_FEATURE_KIND_CPU);
+        static_assert!(ffi::ZX_ARM64_FEATURE_ISA_ATOMICS == sys::ZX_ARM64_FEATURE_ISA_ATOMICS);
+    };
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_macos.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_macos.rs
new file mode 100644
index 0000000..d6bf9d0
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_macos.rs
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Run-time feature detection on aarch64 macOS by using sysctl.
+//
+// This module is currently only enabled on tests because aarch64 macOS always supports FEAT_LSE and FEAT_LSE2.
+// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/include/llvm/TargetParser/AArch64TargetParser.h#L494
+//
+// If macOS supporting Armv9.4-a becomes popular in the future, this module will
+// be used to support outline-atomics for FEAT_LSE128/FEAT_LRCPC3.
+//
+// Refs: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
+//
+// Note that iOS doesn't support sysctl:
+// - https://developer.apple.com/forums/thread/9440
+// - https://nabla-c0d3.github.io/blog/2015/06/16/ios9-security-privacy
+
+include!("common.rs");
+
+use core::ptr;
+
+// core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
+#[allow(non_camel_case_types)]
+mod ffi {
+    pub(crate) use super::c_types::{c_char, c_int, c_size_t, c_void};
+
+    extern "C" {
+        // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname
+        // https://github.com/apple-oss-distributions/xnu/blob/5c2921b07a2480ab43ec66f5b9e41cb872bc554f/bsd/sys/sysctl.h
+        // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/bsd/apple/mod.rs#L5167-L5173
+        pub(crate) fn sysctlbyname(
+            name: *const c_char,
+            old_p: *mut c_void,
+            old_len_p: *mut c_size_t,
+            new_p: *mut c_void,
+            new_len: c_size_t,
+        ) -> c_int;
+    }
+}
+
+unsafe fn sysctlbyname32(name: &[u8]) -> Option<u32> {
+    const OUT_LEN: ffi::c_size_t = core::mem::size_of::<u32>() as ffi::c_size_t;
+
+    debug_assert_eq!(name.last(), Some(&0), "{:?}", name);
+    debug_assert_eq!(name.iter().filter(|&&v| v == 0).count(), 1, "{:?}", name);
+
+    let mut out = 0_u32;
+    let mut out_len = OUT_LEN;
+    // SAFETY:
+    // - the caller must guarantee that `name` a valid C string.
+    // - `out_len` does not exceed the size of `out`.
+    // - `sysctlbyname` is thread-safe.
+    let res = unsafe {
+        ffi::sysctlbyname(
+            name.as_ptr().cast::<ffi::c_char>(),
+            (&mut out as *mut u32).cast::<ffi::c_void>(),
+            &mut out_len,
+            ptr::null_mut(),
+            0,
+        )
+    };
+    if res != 0 {
+        return None;
+    }
+    debug_assert_eq!(out_len, OUT_LEN);
+    Some(out)
+}
+
+#[cold]
+fn _detect(info: &mut CpuInfo) {
+    // hw.optional.armv8_1_atomics is available on macOS 11+ (note: aarch64 support was added on macOS 11),
+    // hw.optional.arm.FEAT_* are only available on macOS 12+.
+    // Query both names in case future versions of macOS remove the old name.
+    // https://github.com/golang/go/commit/c15593197453b8bf90fc3a9080ba2afeaf7934ea
+    // https://github.com/google/boringssl/commit/91e0b11eba517d83b910b20fe3740eeb39ecb37e
+    // SAFETY: we passed a valid C string.
+    if unsafe {
+        sysctlbyname32(b"hw.optional.arm.FEAT_LSE\0").unwrap_or(0) != 0
+            || sysctlbyname32(b"hw.optional.armv8_1_atomics\0").unwrap_or(0) != 0
+    } {
+        info.set(CpuInfo::HAS_LSE);
+    }
+    // SAFETY: we passed a valid C string.
+    if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LSE2\0").unwrap_or(0) != 0 } {
+        info.set(CpuInfo::HAS_LSE2);
+    }
+    // we currently only use FEAT_LSE and FEAT_LSE2 in outline-atomics.
+    #[cfg(test)]
+    {
+        // SAFETY: we passed a valid C string.
+        if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LSE128\0").unwrap_or(0) != 0 } {
+            info.set(CpuInfo::HAS_LSE128);
+        }
+        // SAFETY: we passed a valid C string.
+        if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC3\0").unwrap_or(0) != 0 } {
+            info.set(CpuInfo::HAS_RCPC3);
+        }
+    }
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_macos() {
+        unsafe {
+            assert_eq!(sysctlbyname32(b"hw.optional.armv8_1_atomics\0"), Some(1));
+            assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LSE\0"), Some(1));
+            assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LSE2\0"), Some(1));
+            assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LSE128\0"), None);
+            assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::NotFound);
+            assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC\0"), Some(1));
+            assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC2\0"), Some(1));
+            assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC3\0"), None);
+            assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::NotFound);
+        }
+    }
+
+    // Static assertions for FFI bindings.
+    // This checks that FFI bindings defined in this crate, FFI bindings defined
+    // in libc, and FFI bindings generated for the platform's latest header file
+    // using bindgen have compatible signatures (or the same values if constants).
+    // Since this is static assertion, we can detect problems with
+    // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
+    // without actually running tests on these platforms.
+    // See also tools/codegen/src/ffi.rs.
+    // TODO(codegen): auto-generate this test
+    #[allow(
+        clippy::cast_possible_wrap,
+        clippy::cast_sign_loss,
+        clippy::no_effect_underscore_binding
+    )]
+    const _: fn() = || {
+        use test_helper::{libc, sys};
+        let mut _sysctlbyname: unsafe extern "C" fn(
+            *const ffi::c_char,
+            *mut ffi::c_void,
+            *mut ffi::c_size_t,
+            *mut ffi::c_void,
+            ffi::c_size_t,
+        ) -> ffi::c_int = ffi::sysctlbyname;
+        _sysctlbyname = libc::sysctlbyname;
+        _sysctlbyname = sys::sysctlbyname;
+    };
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_windows.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_windows.rs
new file mode 100644
index 0000000..6ace866
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_windows.rs
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Run-time feature detection on aarch64 Windows by using IsProcessorFeaturePresent.
+//
+// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection of FEAT_LSE on Windows.
+// https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/windows/aarch64.rs
+// https://github.com/rust-lang/stdarch/pull/1373
+//
+// Refs: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
+
+include!("common.rs");
+
+// windows-sys requires Rust 1.56
+#[allow(clippy::upper_case_acronyms)]
+mod ffi {
+    pub(crate) type DWORD = u32;
+    pub(crate) type BOOL = i32;
+
+    pub(crate) const FALSE: BOOL = 0;
+    // Defined in winnt.h of Windows SDK.
+    pub(crate) const PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE: DWORD = 34;
+
+    extern "system" {
+        // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
+        pub(crate) fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL;
+    }
+}
+
+#[cold]
+fn _detect(info: &mut CpuInfo) {
+    // SAFETY: calling IsProcessorFeaturePresent is safe, and FALSE is also
+    // returned if the HAL does not support detection of the specified feature.
+    if unsafe {
+        ffi::IsProcessorFeaturePresent(ffi::PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != ffi::FALSE
+    } {
+        info.set(CpuInfo::HAS_LSE);
+    }
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // Static assertions for FFI bindings.
+    // This checks that FFI bindings defined in this crate and FFI bindings defined
+    // in windows-sys have compatible signatures (or the same values if constants).
+    // Since this is static assertion, we can detect problems with
+    // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
+    // without actually running tests on these platforms.
+    // (Unlike libc, windows-sys programmatically generates bindings from Windows
+    // API metadata, so it should be enough to check compatibility with the
+    // windows-sys' signatures/values.)
+    // See also tools/codegen/src/ffi.rs.
+    // TODO(codegen): auto-generate this test
+    #[allow(
+        clippy::cast_possible_wrap,
+        clippy::cast_sign_loss,
+        clippy::cast_possible_truncation,
+        clippy::no_effect_underscore_binding
+    )]
+    const _: fn() = || {
+        use test_helper::windows_sys;
+        let _: ffi::DWORD = 0 as windows_sys::Win32::System::Threading::PROCESSOR_FEATURE_ID;
+        let _: ffi::BOOL = 0 as windows_sys::Win32::Foundation::BOOL;
+        let mut _sysctl: unsafe extern "system" fn(ffi::DWORD) -> ffi::BOOL =
+            ffi::IsProcessorFeaturePresent;
+        _sysctl = windows_sys::Win32::System::Threading::IsProcessorFeaturePresent;
+        static_assert!(ffi::FALSE == windows_sys::Win32::Foundation::FALSE);
+        static_assert!(
+            ffi::PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
+                == windows_sys::Win32::System::Threading::PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
+        );
+    };
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/auxv.rs b/vendor/portable-atomic/src/imp/atomic128/detect/auxv.rs
new file mode 100644
index 0000000..1be3095
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/auxv.rs
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Run-time feature detection on aarch64/powerpc64 Linux/Android/FreeBSD by parsing ELF auxiliary vectors.
+//
+// # Linux/Android
+//
+// As of nightly-2023-01-23, is_aarch64_feature_detected always uses dlsym by default
+// on aarch64 Linux/Android, but on the following platforms, so we can safely assume
+// getauxval is linked to the binary.
+//
+// - On glibc (*-linux-gnu*), [aarch64 support is available on glibc 2.17+](https://sourceware.org/legacy-ml/libc-announce/2012/msg00001.html)
+//   and is newer than [glibc 2.16 that added getauxval](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html).
+// - On musl (*-linux-musl*, *-linux-ohos*), [aarch64 support is available on musl 1.1.7+](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.7#n1422)
+//   and is newer than [musl 1.1.0 that added getauxval](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.0#n1197).
+//   https://github.com/rust-lang/rust/commit/9a04ae4997493e9260352064163285cddc43de3c
+// - On bionic (*-android*), [64-bit architecture support is available on Android 5.0+ (API level 21+)](https://android-developers.googleblog.com/2014/10/whats-new-in-android-50-lollipop.html)
+//   and is newer than [Android 4.3 (API level 18) that added getauxval](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49).
+//
+// However, on musl with static linking, it seems that getauxval is not always available, independent of version requirements: https://github.com/rust-lang/rust/issues/89626
+// (That problem may have been fixed in https://github.com/rust-lang/rust/commit/9a04ae4997493e9260352064163285cddc43de3c,
+// but even in the version containing that patch, [there is report](https://github.com/rust-lang/rust/issues/89626#issuecomment-1242636038)
+// of the same error.)
+//
+// On other Linux targets, we cannot assume that getauxval is always available, so we don't enable
+// outline-atomics by default (can be enabled by `--cfg portable_atomic_outline_atomics`).
+//
+// - On musl with static linking. See the above for more.
+//   Also, in this case, dlsym(getauxval) always returns null.
+// - On uClibc-ng (*-linux-uclibc*, *-l4re-uclibc*), [uClibc-ng 1.0.43 (released in 2023-04-05) added getauxval](https://github.com/wbx-github/uclibc-ng/commit/d869bb1600942c01a77539128f9ba5b5b55ad647).
+// - On Picolibc, [Picolibc 1.4.6 added getauxval stub](https://github.com/picolibc/picolibc#picolibc-version-146).
+//
+// See also https://github.com/rust-lang/stdarch/pull/1375
+//
+// See tests::test_linux_like and aarch64_aa64reg.rs for (test-only) alternative implementations.
+//
+// # FreeBSD
+//
+// As of nightly-2023-01-23, is_aarch64_feature_detected always uses mrs on
+// aarch64 FreeBSD. However, they do not work on FreeBSD 12 on QEMU (confirmed
+// on FreeBSD 12.{2,3,4}), and we got SIGILL (worked on FreeBSD 13 and 14).
+//
+// So use elf_aux_info instead of mrs like compiler-rt does.
+// https://man.freebsd.org/elf_aux_info(3)
+// https://reviews.llvm.org/D109330
+//
+// elf_aux_info is available on FreeBSD 12.0+ and 11.4+:
+// https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470
+// https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h
+// On FreeBSD, [aarch64 support is available on FreeBSD 11.0+](https://www.freebsd.org/releases/11.0R/relnotes/#hardware-arm),
+// but FreeBSD 11 (11.4) was EoL on 2021-09-30, and FreeBSD 11.3 was EoL on 2020-09-30:
+// https://www.freebsd.org/security/unsupported
+// See also https://github.com/rust-lang/stdarch/pull/611#issuecomment-445464613
+//
+// See tests::test_freebsd and aarch64_aa64reg.rs for (test-only) alternative implementations.
+//
+// # PowerPC64
+//
+// On PowerPC64, outline-atomics is currently disabled by default mainly for
+// compatibility with older versions of operating systems
+// (can be enabled by `--cfg portable_atomic_outline_atomics`).
+
+include!("common.rs");
+
+use os::ffi;
+#[cfg(any(target_os = "linux", target_os = "android"))]
+mod os {
+    // core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
+    #[cfg_attr(test, allow(dead_code))]
+    pub(super) mod ffi {
+        pub(crate) use super::super::c_types::c_ulong;
+        #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+        pub(crate) use super::super::c_types::{c_char, c_int};
+
+        extern "C" {
+            // https://man7.org/linux/man-pages/man3/getauxval.3.html
+            // https://github.com/bminor/glibc/blob/801af9fafd4689337ebf27260aa115335a0cb2bc/misc/sys/auxv.h
+            // https://github.com/bminor/musl/blob/7d756e1c04de6eb3f2b3d3e1141a218bb329fcfb/include/sys/auxv.h
+            // https://github.com/wbx-github/uclibc-ng/blob/cdb07d2cd52af39feb425e6d36c02b30916b9f0a/include/sys/auxv.h
+            // https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h
+            // https://github.com/picolibc/picolibc/blob/7a8a58aeaa5946cb662577a518051091b691af3a/newlib/libc/picolib/getauxval.c
+            // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/linux/gnu/mod.rs#L1201
+            // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/linux/musl/mod.rs#L744
+            // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/android/b64/mod.rs#L333
+            pub(crate) fn getauxval(type_: c_ulong) -> c_ulong;
+
+            // Defined in sys/system_properties.h.
+            // https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/system_properties.h
+            // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/android/mod.rs#L3471
+            #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+            pub(crate) fn __system_property_get(name: *const c_char, value: *mut c_char) -> c_int;
+        }
+
+        // https://github.com/torvalds/linux/blob/v6.1/include/uapi/linux/auxvec.h
+        #[cfg(any(test, target_arch = "aarch64"))]
+        pub(crate) const AT_HWCAP: c_ulong = 16;
+        #[cfg(any(test, target_arch = "powerpc64"))]
+        pub(crate) const AT_HWCAP2: c_ulong = 26;
+
+        // Defined in sys/system_properties.h.
+        // https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/system_properties.h
+        #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+        pub(crate) const PROP_VALUE_MAX: c_int = 92;
+    }
+
+    pub(super) fn getauxval(type_: ffi::c_ulong) -> ffi::c_ulong {
+        #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+        {
+            // Samsung Exynos 9810 has a bug that big and little cores have different
+            // ISAs. And on older Android (pre-9), the kernel incorrectly reports
+            // that features available only on some cores are available on all cores.
+            // https://reviews.llvm.org/D114523
+            let mut arch = [0_u8; ffi::PROP_VALUE_MAX as usize];
+            // SAFETY: we've passed a valid C string and a buffer with max length.
+            let len = unsafe {
+                ffi::__system_property_get(
+                    b"ro.arch\0".as_ptr().cast::<ffi::c_char>(),
+                    arch.as_mut_ptr().cast::<ffi::c_char>(),
+                )
+            };
+            // On Exynos, ro.arch is not available on Android 12+, but it is fine
+            // because Android 9+ includes the fix.
+            if len > 0 && arch.starts_with(b"exynos9810") {
+                return 0;
+            }
+        }
+
+        // SAFETY: `getauxval` is thread-safe. See also the module level docs.
+        unsafe { ffi::getauxval(type_) }
+    }
+}
+#[cfg(target_os = "freebsd")]
+mod os {
+    // core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
+    #[cfg_attr(test, allow(dead_code))]
+    pub(super) mod ffi {
+        pub(crate) use super::super::c_types::{c_int, c_ulong, c_void};
+
+        extern "C" {
+            // Defined in sys/auxv.h.
+            // https://man.freebsd.org/elf_aux_info(3)
+            // https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/sys/auxv.h
+            pub(crate) fn elf_aux_info(aux: c_int, buf: *mut c_void, buf_len: c_int) -> c_int;
+        }
+
+        // Defined in sys/elf_common.h.
+        // https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/sys/elf_common.h
+        #[cfg(any(test, target_arch = "aarch64"))]
+        pub(crate) const AT_HWCAP: c_int = 25;
+        #[cfg(any(test, target_arch = "powerpc64"))]
+        pub(crate) const AT_HWCAP2: c_int = 26;
+    }
+
+    pub(super) fn getauxval(aux: ffi::c_int) -> ffi::c_ulong {
+        #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)]
+        const OUT_LEN: ffi::c_int = core::mem::size_of::<ffi::c_ulong>() as ffi::c_int;
+        let mut out: ffi::c_ulong = 0;
+        // SAFETY:
+        // - the pointer is valid because we got it from a reference.
+        // - `OUT_LEN` is the same as the size of `out`.
+        // - `elf_aux_info` is thread-safe.
+        unsafe {
+            let res = ffi::elf_aux_info(
+                aux,
+                (&mut out as *mut ffi::c_ulong).cast::<ffi::c_void>(),
+                OUT_LEN,
+            );
+            // If elf_aux_info fails, `out` will be left at zero (which is the proper default value).
+            debug_assert!(res == 0 || out == 0);
+        }
+        out
+    }
+}
+
+// Basically, Linux and FreeBSD use the same hwcap values.
+// FreeBSD supports a subset of the hwcap values supported by Linux.
+use arch::_detect;
+#[cfg(target_arch = "aarch64")]
+mod arch {
+    use super::{ffi, os, CpuInfo};
+
+    // Linux
+    // https://github.com/torvalds/linux/blob/1c41041124bd14dd6610da256a3da4e5b74ce6b1/arch/arm64/include/uapi/asm/hwcap.h
+    // FreeBSD
+    // Defined in machine/elf.h.
+    // https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/arm64/include/elf.h
+    // available on FreeBSD 13.0+ and 12.2+
+    // https://github.com/freebsd/freebsd-src/blob/release/13.0.0/sys/arm64/include/elf.h
+    // https://github.com/freebsd/freebsd-src/blob/release/12.2.0/sys/arm64/include/elf.h
+    pub(super) const HWCAP_ATOMICS: ffi::c_ulong = 1 << 8;
+    pub(super) const HWCAP_USCAT: ffi::c_ulong = 1 << 25;
+    #[cfg(any(target_os = "linux", target_os = "android"))]
+    #[cfg(target_pointer_width = "64")]
+    #[cfg(test)]
+    pub(super) const HWCAP2_LRCPC3: ffi::c_ulong = 1 << 46;
+    #[cfg(any(target_os = "linux", target_os = "android"))]
+    #[cfg(target_pointer_width = "64")]
+    #[cfg(test)]
+    pub(super) const HWCAP2_LSE128: ffi::c_ulong = 1 << 47;
+
+    #[cold]
+    pub(super) fn _detect(info: &mut CpuInfo) {
+        let hwcap = os::getauxval(ffi::AT_HWCAP);
+
+        if hwcap & HWCAP_ATOMICS != 0 {
+            info.set(CpuInfo::HAS_LSE);
+        }
+        if hwcap & HWCAP_USCAT != 0 {
+            info.set(CpuInfo::HAS_LSE2);
+        }
+        #[cfg(any(target_os = "linux", target_os = "android"))]
+        #[cfg(target_pointer_width = "64")]
+        #[cfg(test)]
+        {
+            let hwcap2 = os::getauxval(ffi::AT_HWCAP2);
+            if hwcap2 & HWCAP2_LRCPC3 != 0 {
+                info.set(CpuInfo::HAS_RCPC3);
+            }
+            if hwcap2 & HWCAP2_LSE128 != 0 {
+                info.set(CpuInfo::HAS_LSE128);
+            }
+        }
+    }
+}
+#[cfg(target_arch = "powerpc64")]
+mod arch {
+    use super::{ffi, os, CpuInfo};
+
+    // Linux
+    // https://github.com/torvalds/linux/blob/v6.1/arch/powerpc/include/uapi/asm/cputable.h
+    // FreeBSD
+    // Defined in machine/cpu.h.
+    // https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/powerpc/include/cpu.h
+    // available on FreeBSD 11.0+
+    // https://github.com/freebsd/freebsd-src/commit/b0bf7fcd298133457991b27625bbed766e612730
+    pub(super) const PPC_FEATURE2_ARCH_2_07: ffi::c_ulong = 0x80000000;
+
+    #[cold]
+    pub(super) fn _detect(info: &mut CpuInfo) {
+        let hwcap2 = os::getauxval(ffi::AT_HWCAP2);
+
+        // power8
+        if hwcap2 & PPC_FEATURE2_ARCH_2_07 != 0 {
+            info.set(CpuInfo::HAS_QUADWORD_ATOMICS);
+        }
+    }
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[cfg(any(target_os = "linux", target_os = "android"))]
+    #[cfg(target_pointer_width = "64")]
+    #[test]
+    fn test_linux_like() {
+        use c_types::*;
+        use core::{arch::asm, mem};
+        use std::vec;
+        use test_helper::{libc, sys};
+
+        // Linux kernel 6.4 has added a way to read auxv without depending on either libc or mrs trap.
+        // https://github.com/torvalds/linux/commit/ddc65971bb677aa9f6a4c21f76d3133e106f88eb
+        //
+        // This is currently used only for testing.
+        fn getauxval_pr_get_auxv(type_: ffi::c_ulong) -> Result<ffi::c_ulong, c_int> {
+            #[cfg(target_arch = "aarch64")]
+            unsafe fn prctl_get_auxv(out: *mut c_void, len: usize) -> Result<usize, c_int> {
+                let r: i64;
+                unsafe {
+                    asm!(
+                        "svc 0",
+                        in("x8") sys::__NR_prctl as u64,
+                        inout("x0") sys::PR_GET_AUXV as u64 => r,
+                        in("x1") ptr_reg!(out),
+                        in("x2") len as u64,
+                        // arg4 and arg5 must be zero.
+                        in("x3") 0_u64,
+                        in("x4") 0_u64,
+                        options(nostack, preserves_flags)
+                    );
+                }
+                #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
+                if (r as c_int) < 0 {
+                    Err(r as c_int)
+                } else {
+                    Ok(r as usize)
+                }
+            }
+            #[cfg(target_arch = "powerpc64")]
+            unsafe fn prctl_get_auxv(out: *mut c_void, len: usize) -> Result<usize, c_int> {
+                let r: i64;
+                unsafe {
+                    asm!(
+                        "sc",
+                        "bns+ 2f",
+                        "neg %r3, %r3",
+                        "2:",
+                        inout("r0") sys::__NR_prctl as u64 => _,
+                        inout("r3") sys::PR_GET_AUXV as u64 => r,
+                        inout("r4") ptr_reg!(out) => _,
+                        inout("r5") len as u64 => _,
+                        // arg4 and arg5 must be zero.
+                        inout("r6") 0_u64 => _,
+                        inout("r7") 0_u64 => _,
+                        out("r8") _,
+                        out("r9") _,
+                        out("r10") _,
+                        out("r11") _,
+                        out("r12") _,
+                        out("cr0") _,
+                        options(nostack, preserves_flags)
+                    );
+                }
+                #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
+                if (r as c_int) < 0 {
+                    Err(r as c_int)
+                } else {
+                    Ok(r as usize)
+                }
+            }
+
+            let mut auxv = vec![unsafe { mem::zeroed::<sys::Elf64_auxv_t>() }; 38];
+
+            let old_len = auxv.len() * mem::size_of::<sys::Elf64_auxv_t>();
+
+            // SAFETY:
+            // - `out_len` does not exceed the size of `auxv`.
+            let _len = unsafe { prctl_get_auxv(auxv.as_mut_ptr().cast::<c_void>(), old_len)? };
+
+            for aux in &auxv {
+                if aux.a_type == type_ {
+                    // SAFETY: aux.a_un is #[repr(C)] union and all fields have
+                    // the same size and can be safely transmuted to integers.
+                    return Ok(unsafe { aux.a_un.a_val });
+                }
+            }
+            Err(0)
+        }
+
+        unsafe {
+            let mut u = mem::zeroed();
+            assert_eq!(libc::uname(&mut u), 0);
+            let release = std::ffi::CStr::from_ptr(u.release.as_ptr());
+            let release = core::str::from_utf8(release.to_bytes()).unwrap();
+            let mut digits = release.split('.');
+            let major = digits.next().unwrap().parse::<u32>().unwrap();
+            let minor = digits.next().unwrap().parse::<u32>().unwrap();
+            if (major, minor) < (6, 4) {
+                std::eprintln!("kernel version: {major}.{minor} (no pr_get_auxv)");
+                assert_eq!(getauxval_pr_get_auxv(ffi::AT_HWCAP).unwrap_err(), -22);
+                assert_eq!(getauxval_pr_get_auxv(ffi::AT_HWCAP2).unwrap_err(), -22);
+            } else {
+                std::eprintln!("kernel version: {major}.{minor} (has pr_get_auxv)");
+                assert_eq!(
+                    os::getauxval(ffi::AT_HWCAP),
+                    getauxval_pr_get_auxv(ffi::AT_HWCAP).unwrap()
+                );
+                assert_eq!(
+                    os::getauxval(ffi::AT_HWCAP2),
+                    getauxval_pr_get_auxv(ffi::AT_HWCAP2).unwrap()
+                );
+            }
+        }
+    }
+
+    #[allow(clippy::cast_sign_loss)]
+    #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+    #[test]
+    fn test_android() {
+        unsafe {
+            let mut arch = [1; ffi::PROP_VALUE_MAX as usize];
+            let len = ffi::__system_property_get(
+                b"ro.arch\0".as_ptr().cast::<ffi::c_char>(),
+                arch.as_mut_ptr().cast::<ffi::c_char>(),
+            );
+            assert!(len >= 0);
+            std::eprintln!("len={}", len);
+            std::eprintln!("arch={:?}", arch);
+            std::eprintln!(
+                "arch={:?}",
+                core::str::from_utf8(core::slice::from_raw_parts(arch.as_ptr(), len as usize))
+                    .unwrap()
+            );
+        }
+    }
+
+    #[allow(clippy::cast_possible_wrap)]
+    #[cfg(target_os = "freebsd")]
+    #[test]
+    fn test_freebsd() {
+        use c_types::*;
+        use core::{arch::asm, mem, ptr};
+        use test_helper::sys;
+
+        // This is almost equivalent to what elf_aux_info does.
+        // https://man.freebsd.org/elf_aux_info(3)
+        // On FreeBSD, [aarch64 support is available on FreeBSD 11.0+](https://www.freebsd.org/releases/11.0R/relnotes/#hardware-arm),
+        // but elf_aux_info is available on FreeBSD 12.0+ and 11.4+:
+        // https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470
+        // https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h
+        // so use sysctl instead of elf_aux_info.
+        // Note that FreeBSD 11 (11.4) was EoL on 2021-09-30, and FreeBSD 11.3 was EoL on 2020-09-30:
+        // https://www.freebsd.org/security/unsupported
+        //
+        // std_detect uses this way, but it appears to be somewhat incorrect
+        // (the type of arg4 of sysctl, auxv is smaller than AT_COUNT, etc.).
+        // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/freebsd/auxvec.rs#L52
+        //
+        // This is currently used only for testing.
+        // If you want us to use this implementation for compatibility with the older FreeBSD
+        // version that came to EoL a few years ago, please open an issue.
+        fn getauxval_sysctl_libc(type_: ffi::c_int) -> ffi::c_ulong {
+            let mut auxv: [sys::Elf64_Auxinfo; sys::AT_COUNT as usize] = unsafe { mem::zeroed() };
+
+            let mut len = core::mem::size_of_val(&auxv) as c_size_t;
+
+            // SAFETY: calling getpid is safe.
+            let pid = unsafe { sys::getpid() };
+            let mib = [
+                sys::CTL_KERN as c_int,
+                sys::KERN_PROC as c_int,
+                sys::KERN_PROC_AUXV as c_int,
+                pid,
+            ];
+
+            #[allow(clippy::cast_possible_truncation)]
+            // SAFETY:
+            // - `mib.len()` does not exceed the size of `mib`.
+            // - `len` does not exceed the size of `auxv`.
+            // - `sysctl` is thread-safe.
+            let res = unsafe {
+                sys::sysctl(
+                    mib.as_ptr(),
+                    mib.len() as c_uint,
+                    auxv.as_mut_ptr().cast::<c_void>(),
+                    &mut len,
+                    ptr::null_mut(),
+                    0,
+                )
+            };
+
+            if res != -1 {
+                for aux in &auxv {
+                    if aux.a_type == type_ as c_long {
+                        // SAFETY: aux.a_un is #[repr(C)] union and all fields have
+                        // the same size and can be safely transmuted to integers.
+                        return unsafe { aux.a_un.a_val as c_ulong };
+                    }
+                }
+            }
+            0
+        }
+        // Similar to the above, but call syscall using asm instead of libc.
+        // Note that FreeBSD does not guarantee the stability of raw syscall as
+        // much as Linux does (It may actually be stable enough, though:
+        // https://lists.llvm.org/pipermail/llvm-dev/2019-June/133393.html,
+        // https://github.com/ziglang/zig/issues/16590).
+        //
+        // This is currently used only for testing.
+        fn getauxval_sysctl_asm_syscall(type_: ffi::c_int) -> Result<ffi::c_ulong, c_int> {
+            #[allow(non_camel_case_types)]
+            type pid_t = c_int;
+
+            // https://github.com/freebsd/freebsd-src/blob/9888a79adad22ba06b5aff17d05abac0029c537a/lib/libc/aarch64/SYS.h
+            // https://github.com/golang/go/blob/4badad8d477ffd7a6b762c35bc69aed82faface7/src/syscall/asm_freebsd_arm64.s
+            #[cfg(target_arch = "aarch64")]
+            #[inline]
+            fn getpid() -> pid_t {
+                #[allow(clippy::cast_possible_truncation)]
+                // SAFETY: calling getpid is safe.
+                unsafe {
+                    let n = sys::SYS_getpid;
+                    let r: i64;
+                    asm!(
+                        "svc 0",
+                        in("x8") n as u64,
+                        out("x0") r,
+                        options(nostack, readonly),
+                    );
+                    r as pid_t
+                }
+            }
+            #[cfg(target_arch = "aarch64")]
+            #[inline]
+            unsafe fn sysctl(
+                name: *const c_int,
+                name_len: c_uint,
+                old_p: *mut c_void,
+                old_len_p: *mut c_size_t,
+                new_p: *const c_void,
+                new_len: c_size_t,
+            ) -> Result<c_int, c_int> {
+                #[allow(clippy::cast_possible_truncation)]
+                // SAFETY: the caller must uphold the safety contract.
+                unsafe {
+                    let mut n = sys::SYS___sysctl as u64;
+                    let r: i64;
+                    asm!(
+                        "svc 0",
+                        "b.cc 2f",
+                        "mov x8, x0",
+                        "mov x0, #-1",
+                        "2:",
+                        inout("x8") n,
+                        inout("x0") ptr_reg!(name) => r,
+                        inout("x1") name_len as u64 => _,
+                        in("x2") ptr_reg!(old_p),
+                        in("x3") ptr_reg!(old_len_p),
+                        in("x4") ptr_reg!(new_p),
+                        in("x5") new_len as u64,
+                        options(nostack),
+                    );
+                    if r as c_int == -1 {
+                        Err(n as c_int)
+                    } else {
+                        Ok(r as c_int)
+                    }
+                }
+            }
+
+            // https://github.com/freebsd/freebsd-src/blob/9888a79adad22ba06b5aff17d05abac0029c537a/lib/libc/powerpc64/SYS.h
+            #[cfg(target_arch = "powerpc64")]
+            #[inline]
+            fn getpid() -> pid_t {
+                #[allow(clippy::cast_possible_truncation)]
+                // SAFETY: calling getpid is safe.
+                unsafe {
+                    let n = sys::SYS_getpid;
+                    let r: i64;
+                    asm!(
+                        "sc",
+                        inout("r0") n as u64 => _,
+                        out("r3") r,
+                        out("r4") _,
+                        out("r5") _,
+                        out("r6") _,
+                        out("r7") _,
+                        out("r8") _,
+                        out("r9") _,
+                        out("r10") _,
+                        out("r11") _,
+                        out("r12") _,
+                        out("cr0") _,
+                        options(nostack, preserves_flags, readonly),
+                    );
+                    r as pid_t
+                }
+            }
+            #[cfg(target_arch = "powerpc64")]
+            #[inline]
+            unsafe fn sysctl(
+                name: *const c_int,
+                name_len: c_uint,
+                old_p: *mut c_void,
+                old_len_p: *mut c_size_t,
+                new_p: *const c_void,
+                new_len: c_size_t,
+            ) -> Result<c_int, c_int> {
+                #[allow(clippy::cast_possible_truncation)]
+                // SAFETY: the caller must uphold the safety contract.
+                unsafe {
+                    let mut n = sys::SYS___sysctl as u64;
+                    let r: i64;
+                    asm!(
+                        "sc",
+                        "bns+ 2f",
+                        "mr %r0, %r3",
+                        "li %r3, -1",
+                        "2:",
+                        inout("r0") n,
+                        inout("r3") ptr_reg!(name) => r,
+                        inout("r4") name_len as u64 => _,
+                        inout("r5") ptr_reg!(old_p) => _,
+                        inout("r6") ptr_reg!(old_len_p) => _,
+                        inout("r7") ptr_reg!(new_p) => _,
+                        inout("r8") new_len as u64 => _,
+                        out("r9") _,
+                        out("r10") _,
+                        out("r11") _,
+                        out("r12") _,
+                        out("cr0") _,
+                        options(nostack, preserves_flags)
+                    );
+                    if r as c_int == -1 {
+                        Err(n as c_int)
+                    } else {
+                        Ok(r as c_int)
+                    }
+                }
+            }
+
+            let mut auxv: [sys::Elf64_Auxinfo; sys::AT_COUNT as usize] = unsafe { mem::zeroed() };
+
+            let mut len = core::mem::size_of_val(&auxv) as c_size_t;
+
+            let pid = getpid();
+            let mib = [
+                sys::CTL_KERN as c_int,
+                sys::KERN_PROC as c_int,
+                sys::KERN_PROC_AUXV as c_int,
+                pid,
+            ];
+
+            #[allow(clippy::cast_possible_truncation)]
+            // SAFETY:
+            // - `mib.len()` does not exceed the size of `mib`.
+            // - `len` does not exceed the size of `auxv`.
+            // - `sysctl` is thread-safe.
+            unsafe {
+                sysctl(
+                    mib.as_ptr(),
+                    mib.len() as c_uint,
+                    auxv.as_mut_ptr().cast::<c_void>(),
+                    &mut len,
+                    ptr::null_mut(),
+                    0,
+                )?;
+            }
+
+            for aux in &auxv {
+                if aux.a_type == type_ as c_long {
+                    // SAFETY: aux.a_un is #[repr(C)] union and all fields have
+                    // the same size and can be safely transmuted to integers.
+                    return Ok(unsafe { aux.a_un.a_val as c_ulong });
+                }
+            }
+            Err(0)
+        }
+
+        assert_eq!(os::getauxval(ffi::AT_HWCAP), getauxval_sysctl_libc(ffi::AT_HWCAP));
+        assert_eq!(os::getauxval(ffi::AT_HWCAP2), getauxval_sysctl_libc(ffi::AT_HWCAP2));
+        assert_eq!(
+            os::getauxval(ffi::AT_HWCAP),
+            getauxval_sysctl_asm_syscall(ffi::AT_HWCAP).unwrap()
+        );
+        assert_eq!(
+            os::getauxval(ffi::AT_HWCAP2),
+            // AT_HWCAP2 is only available on FreeBSD 13+, at least for aarch64.
+            getauxval_sysctl_asm_syscall(ffi::AT_HWCAP2).unwrap_or(0)
+        );
+    }
+
+    // Static assertions for FFI bindings.
+    // This checks that FFI bindings defined in this crate, FFI bindings defined
+    // in libc, and FFI bindings generated for the platform's latest header file
+    // using bindgen have compatible signatures (or the same values if constants).
+    // Since this is static assertion, we can detect problems with
+    // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
+    // without actually running tests on these platforms.
+    // See also tools/codegen/src/ffi.rs.
+    // TODO(codegen): auto-generate this test
+    #[allow(
+        clippy::cast_possible_wrap,
+        clippy::cast_sign_loss,
+        clippy::cast_possible_truncation,
+        clippy::no_effect_underscore_binding
+    )]
+    const _: fn() = || {
+        use test_helper::{libc, sys};
+        #[cfg(not(target_os = "freebsd"))]
+        type AtType = ffi::c_ulong;
+        #[cfg(target_os = "freebsd")]
+        type AtType = ffi::c_int;
+        #[cfg(any(target_os = "linux", target_os = "android"))]
+        {
+            let mut _getauxval: unsafe extern "C" fn(ffi::c_ulong) -> ffi::c_ulong = ffi::getauxval;
+            _getauxval = libc::getauxval;
+            _getauxval = sys::getauxval;
+        }
+        #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+        {
+            let mut ___system_property_get: unsafe extern "C" fn(
+                *const ffi::c_char,
+                *mut ffi::c_char,
+            ) -> ffi::c_int = ffi::__system_property_get;
+            ___system_property_get = libc::__system_property_get;
+            ___system_property_get = sys::__system_property_get;
+            static_assert!(ffi::PROP_VALUE_MAX == libc::PROP_VALUE_MAX);
+            static_assert!(ffi::PROP_VALUE_MAX == sys::PROP_VALUE_MAX as ffi::c_int);
+        }
+        #[cfg(target_os = "freebsd")]
+        {
+            let mut _elf_aux_info: unsafe extern "C" fn(
+                ffi::c_int,
+                *mut ffi::c_void,
+                ffi::c_int,
+            ) -> ffi::c_int = ffi::elf_aux_info;
+            _elf_aux_info = libc::elf_aux_info;
+            _elf_aux_info = sys::elf_aux_info;
+        }
+        #[cfg(not(target_os = "freebsd"))] // libc doesn't have this on FreeBSD
+        static_assert!(ffi::AT_HWCAP == libc::AT_HWCAP);
+        static_assert!(ffi::AT_HWCAP == sys::AT_HWCAP as AtType);
+        #[cfg(not(target_os = "freebsd"))] // libc doesn't have this on FreeBSD
+        static_assert!(ffi::AT_HWCAP2 == libc::AT_HWCAP2);
+        static_assert!(ffi::AT_HWCAP2 == sys::AT_HWCAP2 as AtType);
+        #[cfg(target_arch = "aarch64")]
+        {
+            // static_assert!(arch::HWCAP_ATOMICS == libc::HWCAP_ATOMICS); // libc doesn't have this
+            static_assert!(arch::HWCAP_ATOMICS == sys::HWCAP_ATOMICS as ffi::c_ulong);
+            // static_assert!(HWCAP_USCAT == libc::HWCAP_USCAT); // libc doesn't have this
+            static_assert!(arch::HWCAP_USCAT == sys::HWCAP_USCAT as ffi::c_ulong);
+            #[cfg(any(target_os = "linux", target_os = "android"))]
+            #[cfg(target_pointer_width = "64")]
+            {
+                // static_assert!(HWCAP2_LRCPC3 == libc::HWCAP2_LRCPC3); // libc doesn't have this
+                static_assert!(arch::HWCAP2_LRCPC3 == sys::HWCAP2_LRCPC3 as ffi::c_ulong);
+                // static_assert!(HWCAP2_LSE128 == libc::HWCAP2_LSE128); // libc doesn't have this
+                static_assert!(arch::HWCAP2_LSE128 == sys::HWCAP2_LSE128 as ffi::c_ulong);
+            }
+        }
+        #[cfg(target_arch = "powerpc64")]
+        {
+            // static_assert!(arch::PPC_FEATURE2_ARCH_2_07 == libc::PPC_FEATURE2_ARCH_2_07); // libc doesn't have this
+            static_assert!(
+                arch::PPC_FEATURE2_ARCH_2_07 == sys::PPC_FEATURE2_ARCH_2_07 as ffi::c_ulong
+            );
+        }
+    };
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/common.rs b/vendor/portable-atomic/src/imp/atomic128/detect/common.rs
new file mode 100644
index 0000000..b87caa3
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/common.rs
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+#[derive(Clone, Copy)]
+pub(crate) struct CpuInfo(u32);
+
+impl CpuInfo {
+    const INIT: u32 = 0;
+
+    #[inline]
+    fn set(&mut self, bit: u32) {
+        self.0 = set(self.0, bit);
+    }
+    #[inline]
+    fn test(self, bit: u32) -> bool {
+        test(self.0, bit)
+    }
+}
+
+#[inline]
+fn set(x: u32, bit: u32) -> u32 {
+    x | 1 << bit
+}
+#[inline]
+fn test(x: u32, bit: u32) -> bool {
+    x & (1 << bit) != 0
+}
+
+#[inline]
+pub(crate) fn detect() -> CpuInfo {
+    use core::sync::atomic::{AtomicU32, Ordering};
+
+    static CACHE: AtomicU32 = AtomicU32::new(0);
+    let mut info = CpuInfo(CACHE.load(Ordering::Relaxed));
+    if info.0 != 0 {
+        return info;
+    }
+    info.set(CpuInfo::INIT);
+    // Note: detect_false cfg is intended to make it easy for portable-atomic developers to
+    // test cases such as has_cmpxchg16b == false, has_lse == false,
+    // __kuser_helper_version < 5, etc., and is not a public API.
+    if !cfg!(portable_atomic_test_outline_atomics_detect_false) {
+        _detect(&mut info);
+    }
+    CACHE.store(info.0, Ordering::Relaxed);
+    info
+}
+
+#[cfg(target_arch = "aarch64")]
+impl CpuInfo {
+    /// Whether FEAT_LSE is available
+    const HAS_LSE: u32 = 1;
+    /// Whether FEAT_LSE2 is available
+    #[cfg_attr(not(test), allow(dead_code))]
+    const HAS_LSE2: u32 = 2;
+    /// Whether FEAT_LSE128 is available
+    // This is currently only used in tests.
+    #[cfg(test)]
+    const HAS_LSE128: u32 = 3;
+    /// Whether FEAT_LRCPC3 is available
+    // This is currently only used in tests.
+    #[cfg(test)]
+    const HAS_RCPC3: u32 = 4;
+
+    #[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
+    #[inline]
+    pub(crate) fn has_lse(self) -> bool {
+        self.test(CpuInfo::HAS_LSE)
+    }
+    #[cfg_attr(not(test), allow(dead_code))]
+    #[cfg(any(test, not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))))]
+    #[inline]
+    pub(crate) fn has_lse2(self) -> bool {
+        self.test(CpuInfo::HAS_LSE2)
+    }
+    #[cfg(test)]
+    #[inline]
+    pub(crate) fn has_lse128(self) -> bool {
+        self.test(CpuInfo::HAS_LSE128)
+    }
+    #[cfg(test)]
+    #[inline]
+    pub(crate) fn has_rcpc3(self) -> bool {
+        self.test(CpuInfo::HAS_RCPC3)
+    }
+}
+
+#[cfg(target_arch = "x86_64")]
+impl CpuInfo {
+    /// Whether CMPXCHG16B is available
+    const HAS_CMPXCHG16B: u32 = 1;
+    /// Whether VMOVDQA is atomic
+    const HAS_VMOVDQA_ATOMIC: u32 = 2;
+
+    #[cfg(any(
+        test,
+        not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+    ))]
+    #[inline]
+    pub(crate) fn has_cmpxchg16b(self) -> bool {
+        self.test(CpuInfo::HAS_CMPXCHG16B)
+    }
+    #[inline]
+    pub(crate) fn has_vmovdqa_atomic(self) -> bool {
+        self.test(CpuInfo::HAS_VMOVDQA_ATOMIC)
+    }
+}
+
+#[cfg(target_arch = "powerpc64")]
+impl CpuInfo {
+    /// Whether lqarx and stqcx. instructions are available
+    const HAS_QUADWORD_ATOMICS: u32 = 1;
+
+    #[cfg(any(
+        test,
+        not(any(
+            target_feature = "quadword-atomics",
+            portable_atomic_target_feature = "quadword-atomics",
+        )),
+    ))]
+    #[inline]
+    pub(crate) fn has_quadword_atomics(self) -> bool {
+        self.test(CpuInfo::HAS_QUADWORD_ATOMICS)
+    }
+}
+
+// core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
+#[cfg(any(target_arch = "aarch64", target_arch = "powerpc64"))]
+#[cfg(not(windows))]
+#[allow(dead_code, non_camel_case_types)]
+mod c_types {
+    pub(crate) type c_void = core::ffi::c_void;
+    // c_{,u}int is {i,u}32 on non-16-bit architectures
+    // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L160
+    // (16-bit architectures currently don't use this module)
+    pub(crate) type c_int = i32;
+    pub(crate) type c_uint = u32;
+    // c_{,u}long is {i,u}64 on non-Windows 64-bit targets, otherwise is {i,u}32
+    // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L176
+    // (Windows currently doesn't use this module - this module is cfg(not(windows)))
+    #[cfg(target_pointer_width = "64")]
+    pub(crate) type c_long = i64;
+    #[cfg(not(target_pointer_width = "64"))]
+    pub(crate) type c_long = i32;
+    #[cfg(target_pointer_width = "64")]
+    pub(crate) type c_ulong = u64;
+    #[cfg(not(target_pointer_width = "64"))]
+    pub(crate) type c_ulong = u32;
+    // c_size_t is currently always usize
+    // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L88
+    pub(crate) type c_size_t = usize;
+    // c_char is u8 by default on most non-Apple/non-Windows ARM/PowerPC/RISC-V/s390x/Hexagon targets
+    // (Linux/Android/FreeBSD/NetBSD/OpenBSD/VxWorks/Fuchsia/QNX Neutrino/Horizon/AIX/z/OS)
+    // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L104
+    // https://github.com/llvm/llvm-project/blob/9734b2256d89cb4c61a4dbf4a3c3f3f942fe9b8c/lldb/source/Utility/ArchSpec.cpp#L712
+    // RISC-V https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/HEAD/riscv-cc.adoc#cc-type-representations
+    // Hexagon https://lists.llvm.org/pipermail/llvm-dev/attachments/20190916/21516a52/attachment-0001.pdf
+    // AIX https://www.ibm.com/docs/en/xl-c-aix/13.1.2?topic=descriptions-qchars
+    // z/OS https://www.ibm.com/docs/en/zos/2.5.0?topic=specifiers-character-types
+    // (macOS is currently the only Apple target that uses this module, and Windows currently doesn't use this module)
+    #[cfg(not(target_os = "macos"))]
+    pub(crate) type c_char = u8;
+    // c_char is i8 on all Apple targets
+    #[cfg(target_os = "macos")]
+    pub(crate) type c_char = i8;
+
+    // Static assertions for C type definitions.
+    #[cfg(test)]
+    const _: fn() = || {
+        use test_helper::{libc, sys};
+        let _: c_int = 0 as std::os::raw::c_int;
+        let _: c_uint = 0 as std::os::raw::c_uint;
+        let _: c_long = 0 as std::os::raw::c_long;
+        let _: c_ulong = 0 as std::os::raw::c_ulong;
+        let _: c_size_t = 0 as libc::size_t; // std::os::raw::c_size_t is unstable
+        let _: c_char = 0 as std::os::raw::c_char;
+        let _: c_char = 0 as sys::c_char;
+    };
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests_common {
+    use super::*;
+
+    #[test]
+    fn test_bit_flags() {
+        let mut x = CpuInfo(0);
+        #[cfg(target_arch = "aarch64")]
+        {
+            assert!(!x.test(CpuInfo::INIT));
+            assert!(!x.test(CpuInfo::HAS_LSE));
+            assert!(!x.test(CpuInfo::HAS_LSE2));
+            assert!(!x.test(CpuInfo::HAS_LSE128));
+            assert!(!x.test(CpuInfo::HAS_RCPC3));
+            x.set(CpuInfo::INIT);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(!x.test(CpuInfo::HAS_LSE));
+            assert!(!x.test(CpuInfo::HAS_LSE2));
+            assert!(!x.test(CpuInfo::HAS_LSE128));
+            assert!(!x.test(CpuInfo::HAS_RCPC3));
+            x.set(CpuInfo::HAS_LSE);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_LSE));
+            assert!(!x.test(CpuInfo::HAS_LSE2));
+            assert!(!x.test(CpuInfo::HAS_LSE128));
+            assert!(!x.test(CpuInfo::HAS_RCPC3));
+            x.set(CpuInfo::HAS_LSE2);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_LSE));
+            assert!(x.test(CpuInfo::HAS_LSE2));
+            assert!(!x.test(CpuInfo::HAS_LSE128));
+            assert!(!x.test(CpuInfo::HAS_RCPC3));
+            x.set(CpuInfo::HAS_LSE128);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_LSE));
+            assert!(x.test(CpuInfo::HAS_LSE2));
+            assert!(x.test(CpuInfo::HAS_LSE128));
+            assert!(!x.test(CpuInfo::HAS_RCPC3));
+            x.set(CpuInfo::HAS_RCPC3);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_LSE));
+            assert!(x.test(CpuInfo::HAS_LSE2));
+            assert!(x.test(CpuInfo::HAS_LSE128));
+            assert!(x.test(CpuInfo::HAS_RCPC3));
+        }
+        #[cfg(target_arch = "x86_64")]
+        {
+            assert!(!x.test(CpuInfo::INIT));
+            assert!(!x.test(CpuInfo::HAS_CMPXCHG16B));
+            assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+            x.set(CpuInfo::INIT);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(!x.test(CpuInfo::HAS_CMPXCHG16B));
+            assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+            x.set(CpuInfo::HAS_CMPXCHG16B);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_CMPXCHG16B));
+            assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+            x.set(CpuInfo::HAS_VMOVDQA_ATOMIC);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_CMPXCHG16B));
+            assert!(x.test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+        }
+        #[cfg(target_arch = "powerpc64")]
+        {
+            assert!(!x.test(CpuInfo::INIT));
+            assert!(!x.test(CpuInfo::HAS_QUADWORD_ATOMICS));
+            x.set(CpuInfo::INIT);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(!x.test(CpuInfo::HAS_QUADWORD_ATOMICS));
+            x.set(CpuInfo::HAS_QUADWORD_ATOMICS);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_QUADWORD_ATOMICS));
+        }
+    }
+
+    #[test]
+    fn print_features() {
+        use std::{fmt::Write as _, io::Write, string::String};
+
+        let mut features = String::new();
+        macro_rules! print_feature {
+            ($name:expr, $enabled:expr $(,)?) => {{
+                let _ = writeln!(features, "  {}: {}", $name, $enabled);
+            }};
+        }
+        #[cfg(target_arch = "aarch64")]
+        {
+            features.push_str("run-time:\n");
+            print_feature!("lse", detect().test(CpuInfo::HAS_LSE));
+            print_feature!("lse2", detect().test(CpuInfo::HAS_LSE2));
+            print_feature!("lse128", detect().test(CpuInfo::HAS_LSE128));
+            print_feature!("rcpc3", detect().test(CpuInfo::HAS_RCPC3));
+            features.push_str("compile-time:\n");
+            print_feature!(
+                "lse",
+                cfg!(any(target_feature = "lse", portable_atomic_target_feature = "lse")),
+            );
+            print_feature!(
+                "lse2",
+                cfg!(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")),
+            );
+        }
+        #[cfg(target_arch = "x86_64")]
+        {
+            features.push_str("run-time:\n");
+            print_feature!("cmpxchg16b", detect().test(CpuInfo::HAS_CMPXCHG16B));
+            print_feature!("vmovdqa-atomic", detect().test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+            features.push_str("compile-time:\n");
+            print_feature!(
+                "cmpxchg16b",
+                cfg!(any(
+                    target_feature = "cmpxchg16b",
+                    portable_atomic_target_feature = "cmpxchg16b",
+                )),
+            );
+        }
+        #[cfg(target_arch = "powerpc64")]
+        {
+            features.push_str("run-time:\n");
+            print_feature!("quadword-atomics", detect().test(CpuInfo::HAS_QUADWORD_ATOMICS));
+            features.push_str("compile-time:\n");
+            print_feature!(
+                "quadword-atomics",
+                cfg!(any(
+                    target_feature = "quadword-atomics",
+                    portable_atomic_target_feature = "quadword-atomics",
+                )),
+            );
+        }
+        let stdout = std::io::stderr();
+        let mut stdout = stdout.lock();
+        let _ = stdout.write_all(features.as_bytes());
+    }
+
+    #[cfg(target_arch = "x86_64")]
+    #[test]
+    #[cfg_attr(portable_atomic_test_outline_atomics_detect_false, ignore)]
+    fn test_detect() {
+        if detect().has_cmpxchg16b() {
+            assert!(detect().test(CpuInfo::HAS_CMPXCHG16B));
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_CMPXCHG16B));
+        }
+        if detect().has_vmovdqa_atomic() {
+            assert!(detect().test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+        }
+    }
+    #[cfg(target_arch = "aarch64")]
+    #[test]
+    #[cfg_attr(portable_atomic_test_outline_atomics_detect_false, ignore)]
+    fn test_detect() {
+        let proc_cpuinfo = test_helper::cpuinfo::ProcCpuinfo::new();
+        if detect().has_lse() {
+            assert!(detect().test(CpuInfo::HAS_LSE));
+            if let Ok(proc_cpuinfo) = proc_cpuinfo {
+                assert!(proc_cpuinfo.lse);
+            }
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_LSE));
+            if let Ok(proc_cpuinfo) = proc_cpuinfo {
+                assert!(!proc_cpuinfo.lse);
+            }
+        }
+        if detect().has_lse2() {
+            assert!(detect().test(CpuInfo::HAS_LSE));
+            assert!(detect().test(CpuInfo::HAS_LSE2));
+            if let Ok(test_helper::cpuinfo::ProcCpuinfo { lse2: Some(lse2), .. }) = proc_cpuinfo {
+                assert!(lse2);
+            }
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_LSE2));
+            if let Ok(test_helper::cpuinfo::ProcCpuinfo { lse2: Some(lse2), .. }) = proc_cpuinfo {
+                assert!(!lse2);
+            }
+        }
+        if detect().has_lse128() {
+            assert!(detect().test(CpuInfo::HAS_LSE));
+            assert!(detect().test(CpuInfo::HAS_LSE2));
+            assert!(detect().test(CpuInfo::HAS_LSE128));
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_LSE128));
+        }
+        if detect().has_rcpc3() {
+            assert!(detect().test(CpuInfo::HAS_RCPC3));
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_RCPC3));
+        }
+    }
+    #[cfg(target_arch = "powerpc64")]
+    #[test]
+    #[cfg_attr(portable_atomic_test_outline_atomics_detect_false, ignore)]
+    fn test_detect() {
+        let proc_cpuinfo = test_helper::cpuinfo::ProcCpuinfo::new();
+        if detect().has_quadword_atomics() {
+            assert!(detect().test(CpuInfo::HAS_QUADWORD_ATOMICS));
+            if let Ok(proc_cpuinfo) = proc_cpuinfo {
+                assert!(proc_cpuinfo.power8);
+            }
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_QUADWORD_ATOMICS));
+            if let Ok(proc_cpuinfo) = proc_cpuinfo {
+                assert!(!proc_cpuinfo.power8);
+            }
+        }
+    }
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/x86_64.rs b/vendor/portable-atomic/src/imp/atomic128/detect/x86_64.rs
new file mode 100644
index 0000000..80eefed
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/x86_64.rs
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Adapted from https://github.com/rust-lang/stdarch.
+
+#![cfg_attr(any(not(target_feature = "sse"), portable_atomic_sanitize_thread), allow(dead_code))]
+
+// Miri doesn't support inline assembly used in __cpuid: https://github.com/rust-lang/miri/issues/932
+// SGX doesn't support CPUID: https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs#L102-L105
+#[cfg(any(target_env = "sgx", miri))]
+compile_error!("internal error: this module is not supported on this environment");
+
+include!("common.rs");
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+use core::arch::x86_64::{CpuidResult, _xgetbv};
+
+// Workaround for https://github.com/rust-lang/rust/issues/101346
+// It is not clear if our use cases are affected, but we implement this just in case.
+//
+// Refs:
+// - https://www.felixcloutier.com/x86/cpuid
+// - https://en.wikipedia.org/wiki/CPUID
+// - https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs
+unsafe fn __cpuid(leaf: u32) -> CpuidResult {
+    let eax;
+    let mut ebx;
+    let ecx;
+    let edx;
+    // SAFETY: the caller must guarantee that CPU supports `cpuid`.
+    unsafe {
+        asm!(
+            // rbx is reserved by LLVM
+            "mov {ebx_tmp:r}, rbx",
+            "cpuid",
+            "xchg {ebx_tmp:r}, rbx", // restore rbx
+            ebx_tmp = out(reg) ebx,
+            inout("eax") leaf => eax,
+            inout("ecx") 0 => ecx,
+            out("edx") edx,
+            options(nostack, preserves_flags),
+        );
+    }
+    CpuidResult { eax, ebx, ecx, edx }
+}
+
+// https://en.wikipedia.org/wiki/CPUID
+const VENDOR_ID_INTEL: [u8; 12] = *b"GenuineIntel";
+const VENDOR_ID_AMD: [u8; 12] = *b"AuthenticAMD";
+
+unsafe fn _vendor_id() -> [u8; 12] {
+    // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L40-L59
+    // SAFETY: the caller must guarantee that CPU supports `cpuid`.
+    let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0) };
+    let vendor_id: [[u8; 4]; 3] = [ebx.to_ne_bytes(), edx.to_ne_bytes(), ecx.to_ne_bytes()];
+    // SAFETY: transmute is safe because `[u8; 12]` and `[[u8; 4]; 3]` has the same layout.
+    unsafe { core::mem::transmute(vendor_id) }
+}
+
+#[cold]
+fn _detect(info: &mut CpuInfo) {
+    // SAFETY: Calling `_vendor_id`` is safe because the CPU has `cpuid` support.
+    let vendor_id = unsafe { _vendor_id() };
+
+    // SAFETY: Calling `__cpuid`` is safe because the CPU has `cpuid` support.
+    let proc_info_ecx = unsafe { __cpuid(0x0000_0001_u32).ecx };
+
+    // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L111
+    if test(proc_info_ecx, 13) {
+        info.set(CpuInfo::HAS_CMPXCHG16B);
+    }
+
+    // VMOVDQA is atomic on Intel and AMD CPUs with AVX.
+    // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details.
+    if vendor_id == VENDOR_ID_INTEL || vendor_id == VENDOR_ID_AMD {
+        // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L131-L224
+        let cpu_xsave = test(proc_info_ecx, 26);
+        if cpu_xsave {
+            let cpu_osxsave = test(proc_info_ecx, 27);
+            if cpu_osxsave {
+                // SAFETY: Calling `_xgetbv`` is safe because the CPU has `xsave` support
+                // and OS has set `osxsave`.
+                let xcr0 = unsafe { _xgetbv(0) };
+                let os_avx_support = xcr0 & 6 == 6;
+                if os_avx_support && test(proc_info_ecx, 28) {
+                    info.set(CpuInfo::HAS_VMOVDQA_ATOMIC);
+                }
+            }
+        }
+    }
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests {
+    #[cfg(not(portable_atomic_test_outline_atomics_detect_false))]
+    use super::*;
+
+    #[cfg(not(portable_atomic_test_outline_atomics_detect_false))]
+    #[test]
+    fn test_cpuid() {
+        assert_eq!(std::is_x86_feature_detected!("cmpxchg16b"), detect().has_cmpxchg16b());
+        let vendor_id = unsafe { _vendor_id() };
+        if vendor_id == VENDOR_ID_INTEL || vendor_id == VENDOR_ID_AMD {
+            assert_eq!(std::is_x86_feature_detected!("avx"), detect().has_vmovdqa_atomic());
+        } else {
+            assert!(!detect().has_vmovdqa_atomic());
+        }
+    }
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/intrinsics.rs b/vendor/portable-atomic/src/imp/atomic128/intrinsics.rs
new file mode 100644
index 0000000..21b5be2
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/intrinsics.rs
@@ -0,0 +1,503 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic{I,U}128 implementation without inline assembly.
+//
+// Note: This module is currently only enabled on Miri and ThreadSanitizer which
+// do not support inline assembly.
+//
+// This uses `core::arch::x86_64::cmpxchg16b` on x86_64 and
+// `core::intrinsics::atomic_*` on aarch64, powerpc64, and s390x.
+//
+// See README.md of this directory for performance comparison with the
+// implementation with inline assembly.
+//
+// Note:
+// - This currently needs Rust 1.70 on x86_64, otherwise nightly compilers.
+// - On powerpc64, this requires LLVM 15+ and pwr8+ (quadword-atomics LLVM target feature):
+//   https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445
+// - On aarch64 big-endian, LLVM (as of 17) generates broken code. (wrong result in stress test)
+//   (on cfg(miri)/cfg(sanitize) it may be fine though)
+// - On s390x, LLVM (as of 17) generates libcalls for operations other than load/store/cmpxchg:
+//   https://godbolt.org/z/5a5T4hxMh
+//   https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll
+//   https://reviews.llvm.org/D146425
+// - On powerpc64, LLVM (as of 17) doesn't support 128-bit atomic min/max:
+//   https://github.com/llvm/llvm-project/issues/68390
+// - On powerpc64le, LLVM (as of 17) generates broken code. (wrong result from fetch_add)
+//
+// Refs: https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs
+
+include!("macros.rs");
+
+#[allow(dead_code)] // we only use compare_exchange.
+#[cfg(target_arch = "x86_64")]
+#[cfg(not(target_feature = "cmpxchg16b"))]
+#[path = "../fallback/outline_atomics.rs"]
+mod fallback;
+
+#[cfg(target_arch = "x86_64")]
+#[cfg(not(target_feature = "cmpxchg16b"))]
+#[path = "detect/x86_64.rs"]
+mod detect;
+
+use core::sync::atomic::Ordering;
+#[cfg(not(target_arch = "x86_64"))]
+use core::{
+    intrinsics,
+    sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst},
+};
+
+// https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs#L3128
+#[cfg(target_arch = "x86_64")]
+#[inline]
+fn strongest_failure_ordering(order: Ordering) -> Ordering {
+    match order {
+        Ordering::Release | Ordering::Relaxed => Ordering::Relaxed,
+        Ordering::SeqCst => Ordering::SeqCst,
+        Ordering::Acquire | Ordering::AcqRel => Ordering::Acquire,
+        _ => unreachable!("{:?}", order),
+    }
+}
+
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
+    #[cfg(target_arch = "x86_64")]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let fail_order = strongest_failure_ordering(order);
+        match atomic_compare_exchange(src, 0, 0, order, fail_order) {
+            Ok(v) | Err(v) => v,
+        }
+    }
+    #[cfg(not(target_arch = "x86_64"))]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_load_acquire(src),
+            Relaxed => intrinsics::atomic_load_relaxed(src),
+            SeqCst => intrinsics::atomic_load_seqcst(src),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
+    #[cfg(target_arch = "x86_64")]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        atomic_swap(dst, val, order);
+    }
+    #[cfg(not(target_arch = "x86_64"))]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Release => intrinsics::atomic_store_release(dst, val),
+            Relaxed => intrinsics::atomic_store_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_store_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_compare_exchange(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> Result<u128, u128> {
+    #[cfg(target_arch = "x86_64")]
+    let (val, ok) = {
+        #[cfg_attr(not(target_feature = "cmpxchg16b"), target_feature(enable = "cmpxchg16b"))]
+        #[cfg_attr(target_feature = "cmpxchg16b", inline)]
+        #[cfg_attr(not(target_feature = "cmpxchg16b"), inline(never))]
+        unsafe fn cmpxchg16b(
+            dst: *mut u128,
+            old: u128,
+            new: u128,
+            success: Ordering,
+            failure: Ordering,
+        ) -> (u128, bool) {
+            debug_assert!(dst as usize % 16 == 0);
+            #[cfg(not(target_feature = "cmpxchg16b"))]
+            {
+                debug_assert!(detect::detect().has_cmpxchg16b());
+            }
+            // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+            // reads, 16-byte aligned (required by CMPXCHG16B), that there are no
+            // concurrent non-atomic operations, and that the CPU supports CMPXCHG16B.
+            let prev = unsafe { core::arch::x86_64::cmpxchg16b(dst, old, new, success, failure) };
+            (prev, prev == old)
+        }
+        // The stronger failure ordering in cmpxchg16b_intrinsic is actually supported
+        // before stabilization, but we do not have a specific cfg for it.
+        #[cfg(portable_atomic_unstable_cmpxchg16b_intrinsic)]
+        let success = crate::utils::upgrade_success_ordering(success, failure);
+        #[cfg(target_feature = "cmpxchg16b")]
+        // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+        // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+        // and cfg guarantees that CMPXCHG16B is available at compile-time.
+        unsafe {
+            cmpxchg16b(dst, old, new, success, failure)
+        }
+        #[cfg(not(target_feature = "cmpxchg16b"))]
+        // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+        // reads, 16-byte aligned, and that there are no different kinds of concurrent accesses.
+        unsafe {
+            ifunc!(unsafe fn(
+                dst: *mut u128, old: u128, new: u128, success: Ordering, failure: Ordering
+            ) -> (u128, bool) {
+                if detect::detect().has_cmpxchg16b() {
+                    cmpxchg16b
+                } else {
+                    fallback::atomic_compare_exchange
+                }
+            })
+        }
+    };
+    #[cfg(not(target_arch = "x86_64"))]
+    // SAFETY: the caller must uphold the safety contract.
+    let (val, ok) = unsafe {
+        match (success, failure) {
+            (Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed_relaxed(dst, old, new),
+            (Relaxed, Acquire) => intrinsics::atomic_cxchg_relaxed_acquire(dst, old, new),
+            (Relaxed, SeqCst) => intrinsics::atomic_cxchg_relaxed_seqcst(dst, old, new),
+            (Acquire, Relaxed) => intrinsics::atomic_cxchg_acquire_relaxed(dst, old, new),
+            (Acquire, Acquire) => intrinsics::atomic_cxchg_acquire_acquire(dst, old, new),
+            (Acquire, SeqCst) => intrinsics::atomic_cxchg_acquire_seqcst(dst, old, new),
+            (Release, Relaxed) => intrinsics::atomic_cxchg_release_relaxed(dst, old, new),
+            (Release, Acquire) => intrinsics::atomic_cxchg_release_acquire(dst, old, new),
+            (Release, SeqCst) => intrinsics::atomic_cxchg_release_seqcst(dst, old, new),
+            (AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_relaxed(dst, old, new),
+            (AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel_acquire(dst, old, new),
+            (AcqRel, SeqCst) => intrinsics::atomic_cxchg_acqrel_seqcst(dst, old, new),
+            (SeqCst, Relaxed) => intrinsics::atomic_cxchg_seqcst_relaxed(dst, old, new),
+            (SeqCst, Acquire) => intrinsics::atomic_cxchg_seqcst_acquire(dst, old, new),
+            (SeqCst, SeqCst) => intrinsics::atomic_cxchg_seqcst_seqcst(dst, old, new),
+            _ => unreachable!("{:?}, {:?}", success, failure),
+        }
+    };
+    if ok {
+        Ok(val)
+    } else {
+        Err(val)
+    }
+}
+
+#[cfg(target_arch = "x86_64")]
+use atomic_compare_exchange as atomic_compare_exchange_weak;
+#[cfg(not(target_arch = "x86_64"))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_compare_exchange_weak(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> Result<u128, u128> {
+    // SAFETY: the caller must uphold the safety contract.
+    let (val, ok) = unsafe {
+        match (success, failure) {
+            (Relaxed, Relaxed) => intrinsics::atomic_cxchgweak_relaxed_relaxed(dst, old, new),
+            (Relaxed, Acquire) => intrinsics::atomic_cxchgweak_relaxed_acquire(dst, old, new),
+            (Relaxed, SeqCst) => intrinsics::atomic_cxchgweak_relaxed_seqcst(dst, old, new),
+            (Acquire, Relaxed) => intrinsics::atomic_cxchgweak_acquire_relaxed(dst, old, new),
+            (Acquire, Acquire) => intrinsics::atomic_cxchgweak_acquire_acquire(dst, old, new),
+            (Acquire, SeqCst) => intrinsics::atomic_cxchgweak_acquire_seqcst(dst, old, new),
+            (Release, Relaxed) => intrinsics::atomic_cxchgweak_release_relaxed(dst, old, new),
+            (Release, Acquire) => intrinsics::atomic_cxchgweak_release_acquire(dst, old, new),
+            (Release, SeqCst) => intrinsics::atomic_cxchgweak_release_seqcst(dst, old, new),
+            (AcqRel, Relaxed) => intrinsics::atomic_cxchgweak_acqrel_relaxed(dst, old, new),
+            (AcqRel, Acquire) => intrinsics::atomic_cxchgweak_acqrel_acquire(dst, old, new),
+            (AcqRel, SeqCst) => intrinsics::atomic_cxchgweak_acqrel_seqcst(dst, old, new),
+            (SeqCst, Relaxed) => intrinsics::atomic_cxchgweak_seqcst_relaxed(dst, old, new),
+            (SeqCst, Acquire) => intrinsics::atomic_cxchgweak_seqcst_acquire(dst, old, new),
+            (SeqCst, SeqCst) => intrinsics::atomic_cxchgweak_seqcst_seqcst(dst, old, new),
+            _ => unreachable!("{:?}, {:?}", success, failure),
+        }
+    };
+    if ok {
+        Ok(val)
+    } else {
+        Err(val)
+    }
+}
+
+#[inline(always)]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_update<F>(dst: *mut u128, order: Ordering, mut f: F) -> u128
+where
+    F: FnMut(u128) -> u128,
+{
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        // This is a private function and all instances of `f` only operate on the value
+        // loaded, so there is no need to synchronize the first load/failed CAS.
+        let mut prev = atomic_load(dst, Ordering::Relaxed);
+        loop {
+            let next = f(prev);
+            match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) {
+                Ok(x) => return x,
+                Err(x) => prev = x,
+            }
+        }
+    }
+}
+
+// On x86_64, we use core::arch::x86_64::cmpxchg16b instead of core::intrinsics.
+// On s390x, LLVM generates libcalls for operations other than load/store/cmpxchg (see also module-level comment).
+#[cfg(any(target_arch = "x86_64", target_arch = "s390x"))]
+atomic_rmw_by_atomic_update!();
+// On powerpc64, LLVM doesn't support 128-bit atomic min/max (see also module-level comment).
+#[cfg(target_arch = "powerpc64")]
+atomic_rmw_by_atomic_update!(cmp);
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_xchg_acquire(dst, val),
+            Release => intrinsics::atomic_xchg_release(dst, val),
+            AcqRel => intrinsics::atomic_xchg_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_xchg_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_xchg_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_add(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_xadd_acquire(dst, val),
+            Release => intrinsics::atomic_xadd_release(dst, val),
+            AcqRel => intrinsics::atomic_xadd_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_xadd_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_xadd_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_sub(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_xsub_acquire(dst, val),
+            Release => intrinsics::atomic_xsub_release(dst, val),
+            AcqRel => intrinsics::atomic_xsub_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_xsub_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_xsub_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_and_acquire(dst, val),
+            Release => intrinsics::atomic_and_release(dst, val),
+            AcqRel => intrinsics::atomic_and_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_and_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_and_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_nand(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_nand_acquire(dst, val),
+            Release => intrinsics::atomic_nand_release(dst, val),
+            AcqRel => intrinsics::atomic_nand_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_nand_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_nand_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_or_acquire(dst, val),
+            Release => intrinsics::atomic_or_release(dst, val),
+            AcqRel => intrinsics::atomic_or_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_or_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_or_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_xor(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_xor_acquire(dst, val),
+            Release => intrinsics::atomic_xor_release(dst, val),
+            AcqRel => intrinsics::atomic_xor_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_xor_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_xor_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_max(dst: *mut u128, val: u128, order: Ordering) -> i128 {
+    #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_max_acquire(dst.cast::<i128>(), val as i128),
+            Release => intrinsics::atomic_max_release(dst.cast::<i128>(), val as i128),
+            AcqRel => intrinsics::atomic_max_acqrel(dst.cast::<i128>(), val as i128),
+            Relaxed => intrinsics::atomic_max_relaxed(dst.cast::<i128>(), val as i128),
+            SeqCst => intrinsics::atomic_max_seqcst(dst.cast::<i128>(), val as i128),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_min(dst: *mut u128, val: u128, order: Ordering) -> i128 {
+    #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_min_acquire(dst.cast::<i128>(), val as i128),
+            Release => intrinsics::atomic_min_release(dst.cast::<i128>(), val as i128),
+            AcqRel => intrinsics::atomic_min_acqrel(dst.cast::<i128>(), val as i128),
+            Relaxed => intrinsics::atomic_min_relaxed(dst.cast::<i128>(), val as i128),
+            SeqCst => intrinsics::atomic_min_seqcst(dst.cast::<i128>(), val as i128),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_umax(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_umax_acquire(dst, val),
+            Release => intrinsics::atomic_umax_release(dst, val),
+            AcqRel => intrinsics::atomic_umax_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_umax_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_umax_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_umin(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_umin_acquire(dst, val),
+            Release => intrinsics::atomic_umin_release(dst, val),
+            AcqRel => intrinsics::atomic_umin_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_umin_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_umin_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_not(dst: *mut u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe { atomic_xor(dst, !0, order) }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_neg(dst: *mut u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe { atomic_update(dst, order, u128::wrapping_neg) }
+}
+
+#[cfg(not(target_arch = "x86_64"))]
+#[inline]
+const fn is_lock_free() -> bool {
+    IS_ALWAYS_LOCK_FREE
+}
+#[cfg(not(target_arch = "x86_64"))]
+const IS_ALWAYS_LOCK_FREE: bool = true;
+
+#[cfg(target_arch = "x86_64")]
+#[inline]
+fn is_lock_free() -> bool {
+    #[cfg(target_feature = "cmpxchg16b")]
+    {
+        // CMPXCHG16B is available at compile-time.
+        true
+    }
+    #[cfg(not(target_feature = "cmpxchg16b"))]
+    {
+        detect::detect().has_cmpxchg16b()
+    }
+}
+#[cfg(target_arch = "x86_64")]
+const IS_ALWAYS_LOCK_FREE: bool = cfg!(target_feature = "cmpxchg16b");
+
+atomic128!(AtomicI128, i128, atomic_max, atomic_min);
+atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    test_atomic_int!(i128);
+    test_atomic_int!(u128);
+
+    // load/store/swap implementation is not affected by signedness, so it is
+    // enough to test only unsigned types.
+    stress_test!(u128);
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/macros.rs b/vendor/portable-atomic/src/imp/atomic128/macros.rs
new file mode 100644
index 0000000..d32217e
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/macros.rs
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+macro_rules! atomic128 {
+    ($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => {
+        #[repr(C, align(16))]
+        pub(crate) struct $atomic_type {
+            v: core::cell::UnsafeCell<$int_type>,
+        }
+
+        // Send is implicitly implemented.
+        // SAFETY: any data races are prevented by atomic intrinsics.
+        unsafe impl Sync for $atomic_type {}
+
+        impl_default_no_fetch_ops!($atomic_type, $int_type);
+        impl_default_bit_opts!($atomic_type, $int_type);
+        impl $atomic_type {
+            #[inline]
+            pub(crate) const fn new(v: $int_type) -> Self {
+                Self { v: core::cell::UnsafeCell::new(v) }
+            }
+
+            #[inline]
+            pub(crate) fn is_lock_free() -> bool {
+                is_lock_free()
+            }
+            #[inline]
+            pub(crate) const fn is_always_lock_free() -> bool {
+                IS_ALWAYS_LOCK_FREE
+            }
+
+            #[inline]
+            pub(crate) fn get_mut(&mut self) -> &mut $int_type {
+                // SAFETY: the mutable reference guarantees unique ownership.
+                // (UnsafeCell::get_mut requires Rust 1.50)
+                unsafe { &mut *self.v.get() }
+            }
+
+            #[inline]
+            pub(crate) fn into_inner(self) -> $int_type {
+                self.v.into_inner()
+            }
+
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn load(&self, order: Ordering) -> $int_type {
+                crate::utils::assert_load_ordering(order);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_load(self.v.get().cast::<u128>(), order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn store(&self, val: $int_type, order: Ordering) {
+                crate::utils::assert_store_ordering(order);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_store(self.v.get().cast::<u128>(), val as u128, order) }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn swap(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_swap(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn compare_exchange(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                crate::utils::assert_compare_exchange_ordering(success, failure);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    match atomic_compare_exchange(
+                        self.v.get().cast::<u128>(),
+                        current as u128,
+                        new as u128,
+                        success,
+                        failure,
+                    ) {
+                        Ok(v) => Ok(v as $int_type),
+                        Err(v) => Err(v as $int_type),
+                    }
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn compare_exchange_weak(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                crate::utils::assert_compare_exchange_ordering(success, failure);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    match atomic_compare_exchange_weak(
+                        self.v.get().cast::<u128>(),
+                        current as u128,
+                        new as u128,
+                        success,
+                        failure,
+                    ) {
+                        Ok(v) => Ok(v as $int_type),
+                        Err(v) => Err(v as $int_type),
+                    }
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_add(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_add(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_sub(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_sub(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_and(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_nand(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_nand(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_or(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_xor(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { $atomic_max(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { $atomic_min(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_not(self.v.get().cast::<u128>(), order) as $int_type }
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn not(&self, order: Ordering) {
+                self.fetch_not(order);
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_neg(&self, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_neg(self.v.get().cast::<u128>(), order) as $int_type }
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn neg(&self, order: Ordering) {
+                self.fetch_neg(order);
+            }
+
+            #[inline]
+            pub(crate) const fn as_ptr(&self) -> *mut $int_type {
+                self.v.get()
+            }
+        }
+    };
+}
+
+#[cfg(any(target_arch = "powerpc64", target_arch = "s390x", target_arch = "x86_64"))]
+#[allow(unused_macros)] // also used by intrinsics.rs
+macro_rules! atomic_rmw_by_atomic_update {
+    () => {
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |_| val) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_add(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| x.wrapping_add(val)) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_sub(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| x.wrapping_sub(val)) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| x & val) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_nand(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| !(x & val)) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| x | val) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_xor(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| x ^ val) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_not(dst: *mut u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| !x) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_neg(dst: *mut u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, u128::wrapping_neg) }
+        }
+        atomic_rmw_by_atomic_update!(cmp);
+    };
+    (cmp) => {
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_max(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                atomic_update(dst, order, |x| core::cmp::max(x as i128, val as i128) as u128)
+            }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_umax(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| core::cmp::max(x, val)) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_min(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                atomic_update(dst, order, |x| core::cmp::min(x as i128, val as i128) as u128)
+            }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_umin(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| core::cmp::min(x, val)) }
+        }
+    };
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs b/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs
new file mode 100644
index 0000000..5edc147
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs
@@ -0,0 +1,947 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic{I,U}128 implementation on PowerPC64.
+//
+// powerpc64 on pwr8+ support 128-bit atomics:
+// https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445
+// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
+// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128.ll
+//
+// powerpc64le is pwr8+ by default https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/PowerPC/PPC.td#L663
+// See also https://github.com/rust-lang/rust/issues/59932
+//
+// Note that we do not separate LL and SC into separate functions, but handle
+// them within a single asm block. This is because it is theoretically possible
+// for the compiler to insert operations that might clear the reservation between
+// LL and SC. See aarch64.rs for details.
+//
+// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
+// this module and use intrinsics.rs instead.
+//
+// Refs:
+// - Power ISA https://openpowerfoundation.org/specifications/isa
+// - AIX Assembler language reference https://www.ibm.com/docs/en/aix/7.3?topic=aix-assembler-language-reference
+// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
+//
+// Generated asm:
+// - powerpc64 (pwr8) https://godbolt.org/z/nG5dGa38a
+// - powerpc64le https://godbolt.org/z/6c99s75e4
+
+include!("macros.rs");
+
+#[cfg(not(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+)))]
+#[path = "../fallback/outline_atomics.rs"]
+mod fallback;
+
+// On musl with static linking, it seems that getauxval is not always available.
+// See detect/auxv.rs for more.
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(test, portable_atomic_outline_atomics))] // TODO(powerpc64): currently disabled by default
+#[cfg(any(
+    test,
+    not(any(
+        target_feature = "quadword-atomics",
+        portable_atomic_target_feature = "quadword-atomics",
+    )),
+))]
+#[cfg(any(
+    all(
+        target_os = "linux",
+        any(
+            target_env = "gnu",
+            all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")),
+            portable_atomic_outline_atomics,
+        ),
+    ),
+    target_os = "android",
+    target_os = "freebsd",
+))]
+#[path = "detect/auxv.rs"]
+mod detect;
+
+use core::{arch::asm, sync::atomic::Ordering};
+
+use crate::utils::{Pair, U128};
+
+macro_rules! debug_assert_pwr8 {
+    () => {
+        #[cfg(not(any(
+            target_feature = "quadword-atomics",
+            portable_atomic_target_feature = "quadword-atomics",
+        )))]
+        {
+            debug_assert!(detect::detect().has_quadword_atomics());
+        }
+    };
+}
+
+// Refs: https://www.ibm.com/docs/en/aix/7.3?topic=ops-machine-pseudo-op
+//
+// This is similar to #[target_feature(enable = "quadword-atomics")], except that there are
+// no compiler guarantees regarding (un)inlining, and the scope is within an asm
+// block rather than a function. We use this directive because #[target_feature(enable = "quadword-atomics")]
+// is not supported as of Rust 1.70-nightly.
+//
+// start_pwr8 and end_pwr8 must be used in pairs.
+//
+// Note: If power8 instructions are not available at compile-time, we must guarantee that
+// the function that uses it is not inlined into a function where it is not
+// clear whether power8 instructions are available. Otherwise, (even if we checked whether
+// power8 instructions are available at run-time) optimizations that reorder its
+// instructions across the if condition might introduce undefined behavior.
+// (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts)
+// However, our code uses the ifunc helper macro that works with function pointers,
+// so we don't have to worry about this unless calling without helper macro.
+macro_rules! start_pwr8 {
+    () => {
+        ".machine push\n.machine power8"
+    };
+}
+macro_rules! end_pwr8 {
+    () => {
+        ".machine pop"
+    };
+}
+
+macro_rules! atomic_rmw {
+    ($op:ident, $order:ident) => {
+        match $order {
+            Ordering::Relaxed => $op!("", ""),
+            Ordering::Acquire => $op!("lwsync", ""),
+            Ordering::Release => $op!("", "lwsync"),
+            Ordering::AcqRel => $op!("lwsync", "lwsync"),
+            Ordering::SeqCst => $op!("lwsync", "sync"),
+            _ => unreachable!("{:?}", $order),
+        }
+    };
+}
+
+// Extracts and checks the EQ bit of cr0.
+#[inline]
+fn extract_cr0(r: u64) -> bool {
+    r & 0x20000000 != 0
+}
+
+#[cfg(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+))]
+use atomic_load_pwr8 as atomic_load;
+#[cfg(not(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+)))]
+#[inline]
+unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
+    fn_alias! {
+        // inline(never) is just a hint and also not strictly necessary
+        // because we use ifunc helper macro, but used for clarity.
+        #[inline(never)]
+        unsafe fn(src: *mut u128) -> u128;
+        atomic_load_pwr8_relaxed = atomic_load_pwr8(Ordering::Relaxed);
+        atomic_load_pwr8_acquire = atomic_load_pwr8(Ordering::Acquire);
+        atomic_load_pwr8_seqcst = atomic_load_pwr8(Ordering::SeqCst);
+    }
+    // SAFETY: the caller must uphold the safety contract.
+    // we only calls atomic_load_pwr8 if quadword-atomics is available.
+    unsafe {
+        match order {
+            Ordering::Relaxed => {
+                ifunc!(unsafe fn(src: *mut u128) -> u128 {
+                    if detect::detect().has_quadword_atomics() {
+                        atomic_load_pwr8_relaxed
+                    } else {
+                        fallback::atomic_load_non_seqcst
+                    }
+                })
+            }
+            Ordering::Acquire => {
+                ifunc!(unsafe fn(src: *mut u128) -> u128 {
+                    if detect::detect().has_quadword_atomics() {
+                        atomic_load_pwr8_acquire
+                    } else {
+                        fallback::atomic_load_non_seqcst
+                    }
+                })
+            }
+            Ordering::SeqCst => {
+                ifunc!(unsafe fn(src: *mut u128) -> u128 {
+                    if detect::detect().has_quadword_atomics() {
+                        atomic_load_pwr8_seqcst
+                    } else {
+                        fallback::atomic_load_seqcst
+                    }
+                })
+            }
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+#[inline]
+unsafe fn atomic_load_pwr8(src: *mut u128, order: Ordering) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+    debug_assert_pwr8!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    //
+    // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA
+    unsafe {
+        let (out_hi, out_lo);
+        macro_rules! atomic_load_acquire {
+            ($release:tt) => {
+                asm!(
+                    start_pwr8!(),
+                    $release,
+                    "lq %r4, 0({src})",
+                    // Lightweight acquire sync
+                    // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62
+                    "cmpd %cr7, %r4, %r4",
+                    "bne- %cr7, 2f",
+                    "2:",
+                    "isync",
+                    end_pwr8!(),
+                    src = in(reg_nonzero) ptr_reg!(src),
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                    out("r4") out_hi,
+                    out("r5") out_lo,
+                    out("cr7") _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        match order {
+            Ordering::Relaxed => {
+                asm!(
+                    start_pwr8!(),
+                    "lq %r4, 0({src})",
+                    end_pwr8!(),
+                    src = in(reg_nonzero) ptr_reg!(src),
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                    out("r4") out_hi,
+                    out("r5") out_lo,
+                    options(nostack, preserves_flags, readonly),
+                );
+            }
+            Ordering::Acquire => atomic_load_acquire!(""),
+            Ordering::SeqCst => atomic_load_acquire!("sync"),
+            _ => unreachable!("{:?}", order),
+        }
+        U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
+    }
+}
+
+#[cfg(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+))]
+use atomic_store_pwr8 as atomic_store;
+#[cfg(not(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+)))]
+#[inline]
+unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
+    fn_alias! {
+        // inline(never) is just a hint and also not strictly necessary
+        // because we use ifunc helper macro, but used for clarity.
+        #[inline(never)]
+        unsafe fn(dst: *mut u128, val: u128);
+        atomic_store_pwr8_relaxed = atomic_store_pwr8(Ordering::Relaxed);
+        atomic_store_pwr8_release = atomic_store_pwr8(Ordering::Release);
+        atomic_store_pwr8_seqcst = atomic_store_pwr8(Ordering::SeqCst);
+    }
+    // SAFETY: the caller must uphold the safety contract.
+    // we only calls atomic_store_pwr8 if quadword-atomics is available.
+    unsafe {
+        match order {
+            Ordering::Relaxed => {
+                ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                    if detect::detect().has_quadword_atomics() {
+                        atomic_store_pwr8_relaxed
+                    } else {
+                        fallback::atomic_store_non_seqcst
+                    }
+                });
+            }
+            Ordering::Release => {
+                ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                    if detect::detect().has_quadword_atomics() {
+                        atomic_store_pwr8_release
+                    } else {
+                        fallback::atomic_store_non_seqcst
+                    }
+                });
+            }
+            Ordering::SeqCst => {
+                ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                    if detect::detect().has_quadword_atomics() {
+                        atomic_store_pwr8_seqcst
+                    } else {
+                        fallback::atomic_store_seqcst
+                    }
+                });
+            }
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+#[inline]
+unsafe fn atomic_store_pwr8(dst: *mut u128, val: u128, order: Ordering) {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_pwr8!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    //
+    // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA
+    unsafe {
+        let val = U128 { whole: val };
+        macro_rules! atomic_store {
+            ($release:tt) => {
+                asm!(
+                    start_pwr8!(),
+                    $release,
+                    "stq %r4, 0({dst})",
+                    end_pwr8!(),
+                    dst = in(reg_nonzero) ptr_reg!(dst),
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                    in("r4") val.pair.hi,
+                    in("r5") val.pair.lo,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        match order {
+            Ordering::Relaxed => atomic_store!(""),
+            Ordering::Release => atomic_store!("lwsync"),
+            Ordering::SeqCst => atomic_store!("sync"),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[inline]
+unsafe fn atomic_compare_exchange(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> Result<u128, u128> {
+    let success = crate::utils::upgrade_success_ordering(success, failure);
+
+    #[cfg(any(
+        target_feature = "quadword-atomics",
+        portable_atomic_target_feature = "quadword-atomics",
+    ))]
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantees that quadword atomics instructions are available at compile-time.
+    let (prev, ok) = unsafe { atomic_compare_exchange_pwr8(dst, old, new, success) };
+    #[cfg(not(any(
+        target_feature = "quadword-atomics",
+        portable_atomic_target_feature = "quadword-atomics",
+    )))]
+    // SAFETY: the caller must uphold the safety contract.
+    let (prev, ok) = unsafe { atomic_compare_exchange_ifunc(dst, old, new, success) };
+    if ok {
+        Ok(prev)
+    } else {
+        Err(prev)
+    }
+}
+#[inline]
+unsafe fn atomic_compare_exchange_pwr8(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    order: Ordering,
+) -> (u128, bool) {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_pwr8!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    //
+    // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA
+    unsafe {
+        let old = U128 { whole: old };
+        let new = U128 { whole: new };
+        let (mut prev_hi, mut prev_lo);
+        let mut r;
+        macro_rules! cmpxchg {
+            ($acquire:tt, $release:tt) => {
+                asm!(
+                    start_pwr8!(),
+                    $release,
+                    "2:",
+                        "lqarx %r8, 0, {dst}",
+                        "xor {tmp_lo}, %r9, {old_lo}",
+                        "xor {tmp_hi}, %r8, {old_hi}",
+                        "or. {tmp_lo}, {tmp_lo}, {tmp_hi}",
+                        "bne %cr0, 3f", // jump if compare failed
+                        "stqcx. %r6, 0, {dst}",
+                        "bne %cr0, 2b", // continue loop if store failed
+                    "3:",
+                    // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set.
+                    "mfcr {tmp_lo}",
+                    $acquire,
+                    end_pwr8!(),
+                    dst = in(reg_nonzero) ptr_reg!(dst),
+                    old_hi = in(reg) old.pair.hi,
+                    old_lo = in(reg) old.pair.lo,
+                    tmp_hi = out(reg) _,
+                    tmp_lo = out(reg) r,
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                    in("r6") new.pair.hi,
+                    in("r7") new.pair.lo,
+                    out("r8") prev_hi,
+                    out("r9") prev_lo,
+                    out("cr0") _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(cmpxchg, order);
+        (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r))
+    }
+}
+
+// Always use strong CAS for outline-atomics.
+#[cfg(not(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+)))]
+use atomic_compare_exchange as atomic_compare_exchange_weak;
+#[cfg(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+))]
+#[inline]
+unsafe fn atomic_compare_exchange_weak(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> Result<u128, u128> {
+    let success = crate::utils::upgrade_success_ordering(success, failure);
+
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantees that quadword atomics instructions are available at compile-time.
+    let (prev, ok) = unsafe { atomic_compare_exchange_weak_pwr8(dst, old, new, success) };
+    if ok {
+        Ok(prev)
+    } else {
+        Err(prev)
+    }
+}
+#[cfg(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+))]
+#[inline]
+unsafe fn atomic_compare_exchange_weak_pwr8(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    order: Ordering,
+) -> (u128, bool) {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_pwr8!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    //
+    // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA
+    unsafe {
+        let old = U128 { whole: old };
+        let new = U128 { whole: new };
+        let (mut prev_hi, mut prev_lo);
+        let mut r;
+        macro_rules! cmpxchg_weak {
+            ($acquire:tt, $release:tt) => {
+                asm!(
+                    start_pwr8!(),
+                    $release,
+                    "lqarx %r8, 0, {dst}",
+                    "xor {tmp_lo}, %r9, {old_lo}",
+                    "xor {tmp_hi}, %r8, {old_hi}",
+                    "or. {tmp_lo}, {tmp_lo}, {tmp_hi}",
+                    "bne %cr0, 3f", // jump if compare failed
+                    "stqcx. %r6, 0, {dst}",
+                    "3:",
+                    // if compare or stqcx failed EQ bit is cleared, if stqcx succeeds EQ bit is set.
+                    "mfcr {tmp_lo}",
+                    $acquire,
+                    end_pwr8!(),
+                    dst = in(reg_nonzero) ptr_reg!(dst),
+                    old_hi = in(reg) old.pair.hi,
+                    old_lo = in(reg) old.pair.lo,
+                    tmp_hi = out(reg) _,
+                    tmp_lo = out(reg) r,
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                    in("r6") new.pair.hi,
+                    in("r7") new.pair.lo,
+                    out("r8") prev_hi,
+                    out("r9") prev_lo,
+                    out("cr0") _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(cmpxchg_weak, order);
+        (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r))
+    }
+}
+
+#[cfg(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+))]
+use atomic_swap_pwr8 as atomic_swap;
+// Do not use atomic_rmw_ll_sc_3 because it needs extra MR to implement swap.
+#[inline]
+unsafe fn atomic_swap_pwr8(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_pwr8!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let val = U128 { whole: val };
+        let (mut prev_hi, mut prev_lo);
+        macro_rules! swap {
+            ($acquire:tt, $release:tt) => {
+                asm!(
+                    start_pwr8!(),
+                    $release,
+                    "2:",
+                        "lqarx %r6, 0, {dst}",
+                        "stqcx. %r8, 0, {dst}",
+                        "bne %cr0, 2b",
+                    $acquire,
+                    end_pwr8!(),
+                    dst = in(reg_nonzero) ptr_reg!(dst),
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                    out("r6") prev_hi,
+                    out("r7") prev_lo,
+                    in("r8") val.pair.hi,
+                    in("r9") val.pair.lo,
+                    out("cr0") _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(swap, order);
+        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+    }
+}
+
+/// Atomic RMW by LL/SC loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// $op can use the following registers:
+/// - val_hi/val_lo pair: val argument (read-only for `$op`)
+/// - r6/r7 pair: previous value loaded by ll (read-only for `$op`)
+/// - r8/r9 pair: new value that will be stored by sc
+macro_rules! atomic_rmw_ll_sc_3 {
+    ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => {
+        #[cfg(any(
+            target_feature = "quadword-atomics",
+            portable_atomic_target_feature = "quadword-atomics",
+        ))]
+        use $name as $reexport_name;
+        #[inline]
+        unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            debug_assert_pwr8!();
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                let val = U128 { whole: val };
+                let (mut prev_hi, mut prev_lo);
+                macro_rules! op {
+                    ($acquire:tt, $release:tt) => {
+                        asm!(
+                            start_pwr8!(),
+                            $release,
+                            "2:",
+                                "lqarx %r6, 0, {dst}",
+                                $($op)*
+                                "stqcx. %r8, 0, {dst}",
+                                "bne %cr0, 2b",
+                            $acquire,
+                            end_pwr8!(),
+                            dst = in(reg_nonzero) ptr_reg!(dst),
+                            val_hi = in(reg) val.pair.hi,
+                            val_lo = in(reg) val.pair.lo,
+                            $($reg)*
+                            // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                            // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                            out("r6") prev_hi,
+                            out("r7") prev_lo,
+                            out("r8") _, // new (hi)
+                            out("r9") _, // new (lo)
+                            out("cr0") _,
+                            options(nostack, preserves_flags),
+                        )
+                    };
+                }
+                atomic_rmw!(op, order);
+                U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+            }
+        }
+    };
+}
+/// Atomic RMW by LL/SC loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// $op can use the following registers:
+/// - r6/r7 pair: previous value loaded by ll (read-only for `$op`)
+/// - r8/r9 pair: new value that will be stored by sc
+macro_rules! atomic_rmw_ll_sc_2 {
+    ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => {
+        #[cfg(any(
+            target_feature = "quadword-atomics",
+            portable_atomic_target_feature = "quadword-atomics",
+        ))]
+        use $name as $reexport_name;
+        #[inline]
+        unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            debug_assert_pwr8!();
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                let (mut prev_hi, mut prev_lo);
+                macro_rules! op {
+                    ($acquire:tt, $release:tt) => {
+                        asm!(
+                            start_pwr8!(),
+                            $release,
+                            "2:",
+                                "lqarx %r6, 0, {dst}",
+                                $($op)*
+                                "stqcx. %r8, 0, {dst}",
+                                "bne %cr0, 2b",
+                            $acquire,
+                            end_pwr8!(),
+                            dst = in(reg_nonzero) ptr_reg!(dst),
+                            $($reg)*
+                            // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                            // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                            out("r6") prev_hi,
+                            out("r7") prev_lo,
+                            out("r8") _, // new (hi)
+                            out("r9") _, // new (lo)
+                            out("cr0") _,
+                            options(nostack, preserves_flags),
+                        )
+                    };
+                }
+                atomic_rmw!(op, order);
+                U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+            }
+        }
+    };
+}
+
+atomic_rmw_ll_sc_3! {
+    atomic_add_pwr8 as atomic_add, [out("xer") _,],
+    "addc %r9, {val_lo}, %r7",
+    "adde %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+    atomic_sub_pwr8 as atomic_sub, [out("xer") _,],
+    "subc %r9, %r7, {val_lo}",
+    "subfe %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+    atomic_and_pwr8 as atomic_and, [],
+    "and %r9, {val_lo}, %r7",
+    "and %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+    atomic_nand_pwr8 as atomic_nand, [],
+    "nand %r9, {val_lo}, %r7",
+    "nand %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+    atomic_or_pwr8 as atomic_or, [],
+    "or %r9, {val_lo}, %r7",
+    "or %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+    atomic_xor_pwr8 as atomic_xor, [],
+    "xor %r9, {val_lo}, %r7",
+    "xor %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+    atomic_max_pwr8 as atomic_max, [out("cr1") _,],
+    "cmpld %r7, {val_lo}",        // (unsigned) compare lo 64-bit, store result to cr0
+    "iselgt %r9, %r7, {val_lo}",  // select lo 64-bit based on GT bit in cr0
+    "cmpd %cr1, %r6, {val_hi}",   // (signed) compare hi 64-bit, store result to cr1
+    "isel %r8, %r7, {val_lo}, 5", // select lo 64-bit based on GT bit in cr1
+    "cmpld %r6, {val_hi}",        // (unsigned) compare hi 64-bit, store result to cr0
+    "iseleq %r9, %r9, %r8",       // select lo 64-bit based on EQ bit in cr0
+    "isel %r8, %r6, {val_hi}, 5", // select hi 64-bit based on GT bit in cr1
+}
+atomic_rmw_ll_sc_3! {
+    atomic_umax_pwr8 as atomic_umax, [],
+    "cmpld %r7, {val_lo}",       // compare lo 64-bit, store result to cr0
+    "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0
+    "cmpld %r6, {val_hi}",       // compare hi 64-bit, store result to cr0
+    "iselgt %r8, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0
+    "iseleq %r9, %r9, %r8",      // select lo 64-bit based on EQ bit in cr0
+    "iselgt %r8, %r6, {val_hi}", // select hi 64-bit based on GT bit in cr0
+}
+atomic_rmw_ll_sc_3! {
+    atomic_min_pwr8 as atomic_min, [out("cr1") _,],
+    "cmpld %r7, {val_lo}",        // (unsigned) compare lo 64-bit, store result to cr0
+    "isellt %r9, %r7, {val_lo}",  // select lo 64-bit based on LT bit in cr0
+    "cmpd %cr1, %r6, {val_hi}",   // (signed) compare hi 64-bit, store result to cr1
+    "isel %r8, %r7, {val_lo}, 4", // select lo 64-bit based on LT bit in cr1
+    "cmpld %r6, {val_hi}",        // (unsigned) compare hi 64-bit, store result to cr0
+    "iseleq %r9, %r9, %r8",       // select lo 64-bit based on EQ bit in cr0
+    "isel %r8, %r6, {val_hi}, 4", // select hi 64-bit based on LT bit in cr1
+}
+atomic_rmw_ll_sc_3! {
+    atomic_umin_pwr8 as atomic_umin, [],
+    "cmpld %r7, {val_lo}",       // compare lo 64-bit, store result to cr0
+    "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0
+    "cmpld %r6, {val_hi}",       // compare hi 64-bit, store result to cr0
+    "isellt %r8, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0
+    "iseleq %r9, %r9, %r8",      // select lo 64-bit based on EQ bit in cr0
+    "isellt %r8, %r6, {val_hi}", // select hi 64-bit based on LT bit in cr0
+}
+
+#[cfg(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+))]
+use atomic_not_pwr8 as atomic_not;
+#[inline]
+unsafe fn atomic_not_pwr8(dst: *mut u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe { atomic_xor_pwr8(dst, !0, order) }
+}
+
+#[cfg(portable_atomic_llvm_16)]
+atomic_rmw_ll_sc_2! {
+    atomic_neg_pwr8 as atomic_neg, [out("xer") _,],
+    "subfic %r9, %r7, 0",
+    "subfze %r8, %r6",
+}
+// LLVM 15 miscompiles subfic.
+#[cfg(not(portable_atomic_llvm_16))]
+atomic_rmw_ll_sc_2! {
+    atomic_neg_pwr8 as atomic_neg, [zero = in(reg) 0_u64, out("xer") _,],
+    "subc %r9, {zero}, %r7",
+    "subfze %r8, %r6",
+}
+
+macro_rules! atomic_rmw_with_ifunc {
+    (
+        unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?;
+        pwr8 = $pwr8_fn:ident;
+        non_seqcst_fallback = $non_seqcst_fallback_fn:ident;
+        seqcst_fallback = $seqcst_fallback_fn:ident;
+    ) => {
+        #[cfg(not(any(
+            target_feature = "quadword-atomics",
+            portable_atomic_target_feature = "quadword-atomics",
+        )))]
+        #[inline]
+        unsafe fn $name($($arg)*, order: Ordering) $(-> $ret_ty)? {
+            fn_alias! {
+                // inline(never) is just a hint and also not strictly necessary
+                // because we use ifunc helper macro, but used for clarity.
+                #[inline(never)]
+                unsafe fn($($arg)*) $(-> $ret_ty)?;
+                pwr8_relaxed_fn = $pwr8_fn(Ordering::Relaxed);
+                pwr8_acquire_fn = $pwr8_fn(Ordering::Acquire);
+                pwr8_release_fn = $pwr8_fn(Ordering::Release);
+                pwr8_acqrel_fn = $pwr8_fn(Ordering::AcqRel);
+                pwr8_seqcst_fn = $pwr8_fn(Ordering::SeqCst);
+            }
+            // SAFETY: the caller must uphold the safety contract.
+            // we only calls pwr8_fn if quadword-atomics is available.
+            unsafe {
+                match order {
+                    Ordering::Relaxed => {
+                        ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+                            if detect::detect().has_quadword_atomics() {
+                                pwr8_relaxed_fn
+                            } else {
+                                fallback::$non_seqcst_fallback_fn
+                            }
+                        })
+                    }
+                    Ordering::Acquire => {
+                        ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+                            if detect::detect().has_quadword_atomics() {
+                                pwr8_acquire_fn
+                            } else {
+                                fallback::$non_seqcst_fallback_fn
+                            }
+                        })
+                    }
+                    Ordering::Release => {
+                        ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+                            if detect::detect().has_quadword_atomics() {
+                                pwr8_release_fn
+                            } else {
+                                fallback::$non_seqcst_fallback_fn
+                            }
+                        })
+                    }
+                    Ordering::AcqRel => {
+                        ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+                            if detect::detect().has_quadword_atomics() {
+                                pwr8_acqrel_fn
+                            } else {
+                                fallback::$non_seqcst_fallback_fn
+                            }
+                        })
+                    }
+                    Ordering::SeqCst => {
+                        ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+                            if detect::detect().has_quadword_atomics() {
+                                pwr8_seqcst_fn
+                            } else {
+                                fallback::$seqcst_fallback_fn
+                            }
+                        })
+                    }
+                    _ => unreachable!("{:?}", order),
+                }
+            }
+        }
+    };
+}
+
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_compare_exchange_ifunc(dst: *mut u128, old: u128, new: u128) -> (u128, bool);
+    pwr8 = atomic_compare_exchange_pwr8;
+    non_seqcst_fallback = atomic_compare_exchange_non_seqcst;
+    seqcst_fallback = atomic_compare_exchange_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_swap_pwr8;
+    non_seqcst_fallback = atomic_swap_non_seqcst;
+    seqcst_fallback = atomic_swap_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_add_pwr8;
+    non_seqcst_fallback = atomic_add_non_seqcst;
+    seqcst_fallback = atomic_add_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_sub_pwr8;
+    non_seqcst_fallback = atomic_sub_non_seqcst;
+    seqcst_fallback = atomic_sub_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_and_pwr8;
+    non_seqcst_fallback = atomic_and_non_seqcst;
+    seqcst_fallback = atomic_and_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_nand_pwr8;
+    non_seqcst_fallback = atomic_nand_non_seqcst;
+    seqcst_fallback = atomic_nand_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_or_pwr8;
+    non_seqcst_fallback = atomic_or_non_seqcst;
+    seqcst_fallback = atomic_or_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_xor_pwr8;
+    non_seqcst_fallback = atomic_xor_non_seqcst;
+    seqcst_fallback = atomic_xor_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_max_pwr8;
+    non_seqcst_fallback = atomic_max_non_seqcst;
+    seqcst_fallback = atomic_max_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_umax_pwr8;
+    non_seqcst_fallback = atomic_umax_non_seqcst;
+    seqcst_fallback = atomic_umax_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_min_pwr8;
+    non_seqcst_fallback = atomic_min_non_seqcst;
+    seqcst_fallback = atomic_min_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_umin_pwr8;
+    non_seqcst_fallback = atomic_umin_non_seqcst;
+    seqcst_fallback = atomic_umin_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_not(dst: *mut u128) -> u128;
+    pwr8 = atomic_not_pwr8;
+    non_seqcst_fallback = atomic_not_non_seqcst;
+    seqcst_fallback = atomic_not_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_neg(dst: *mut u128) -> u128;
+    pwr8 = atomic_neg_pwr8;
+    non_seqcst_fallback = atomic_neg_non_seqcst;
+    seqcst_fallback = atomic_neg_seqcst;
+}
+
+#[inline]
+fn is_lock_free() -> bool {
+    #[cfg(any(
+        target_feature = "quadword-atomics",
+        portable_atomic_target_feature = "quadword-atomics",
+    ))]
+    {
+        // lqarx and stqcx. instructions are statically available.
+        true
+    }
+    #[cfg(not(any(
+        target_feature = "quadword-atomics",
+        portable_atomic_target_feature = "quadword-atomics",
+    )))]
+    {
+        detect::detect().has_quadword_atomics()
+    }
+}
+const IS_ALWAYS_LOCK_FREE: bool = cfg!(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+));
+
+atomic128!(AtomicI128, i128, atomic_max, atomic_min);
+atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    test_atomic_int!(i128);
+    test_atomic_int!(u128);
+
+    // load/store/swap implementation is not affected by signedness, so it is
+    // enough to test only unsigned types.
+    stress_test!(u128);
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/s390x.rs b/vendor/portable-atomic/src/imp/atomic128/s390x.rs
new file mode 100644
index 0000000..37c2063
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/s390x.rs
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic{I,U}128 implementation on s390x.
+//
+// s390x supports 128-bit atomic load/store/cmpxchg:
+// https://github.com/llvm/llvm-project/commit/a11f63a952664f700f076fd754476a2b9eb158cc
+//
+// LLVM's minimal supported architecture level is z10:
+// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/SystemZ/SystemZProcessors.td)
+// This does not appear to have changed since the current s390x backend was added in LLVM 3.3:
+// https://github.com/llvm/llvm-project/commit/5f613dfd1f7edb0ae95d521b7107b582d9df5103#diff-cbaef692b3958312e80fd5507a7e2aff071f1acb086f10e8a96bc06a7bb289db
+//
+// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
+// this module and use intrinsics.rs instead.
+//
+// Refs:
+// - z/Architecture Principles of Operation https://publibfp.dhe.ibm.com/epubs/pdf/a227832d.pdf
+// - z/Architecture Reference Summary https://www.ibm.com/support/pages/zarchitecture-reference-summary
+// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
+//
+// Generated asm:
+// - s390x https://godbolt.org/z/b11znnEh4
+// - s390x (z196) https://godbolt.org/z/s5n9PGcv6
+// - s390x (z15) https://godbolt.org/z/Wf49h7bPf
+
+include!("macros.rs");
+
+use core::{arch::asm, sync::atomic::Ordering};
+
+use crate::utils::{Pair, U128};
+
+// Use distinct operands on z196 or later, otherwise split to lgr and $op.
+#[cfg(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops"))]
+macro_rules! distinct_op {
+    ($op:tt, $a0:tt, $a1:tt, $a2:tt) => {
+        concat!($op, "k ", $a0, ", ", $a1, ", ", $a2)
+    };
+}
+#[cfg(not(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops")))]
+macro_rules! distinct_op {
+    ($op:tt, $a0:tt, $a1:tt, $a2:tt) => {
+        concat!("lgr ", $a0, ", ", $a1, "\n", $op, " ", $a0, ", ", $a2)
+    };
+}
+
+// Use selgr$cond on z15 or later, otherwise split to locgr$cond and $op.
+#[cfg(any(
+    target_feature = "miscellaneous-extensions-3",
+    portable_atomic_target_feature = "miscellaneous-extensions-3",
+))]
+#[cfg(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+))]
+macro_rules! select_op {
+    ($cond:tt, $a0:tt, $a1:tt, $a2:tt) => {
+        concat!("selgr", $cond, " ", $a0, ", ", $a1, ", ", $a2)
+    };
+}
+#[cfg(not(any(
+    target_feature = "miscellaneous-extensions-3",
+    portable_atomic_target_feature = "miscellaneous-extensions-3",
+)))]
+#[cfg(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+))]
+macro_rules! select_op {
+    ($cond:tt, $a0:tt, $a1:tt, $a2:tt) => {
+        concat!("lgr ", $a0, ", ", $a2, "\n", "locgr", $cond, " ", $a0, ", ", $a1)
+    };
+}
+
+#[inline]
+unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        // atomic load is always SeqCst.
+        let (out_hi, out_lo);
+        asm!(
+            "lpq %r0, 0({src})",
+            src = in(reg) ptr_reg!(src),
+            // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+            out("r0") out_hi,
+            out("r1") out_lo,
+            options(nostack, preserves_flags),
+        );
+        U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
+    }
+}
+
+#[inline]
+unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let val = U128 { whole: val };
+        macro_rules! atomic_store {
+            ($fence:tt) => {
+                asm!(
+                    "stpq %r0, 0({dst})",
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    in("r0") val.pair.hi,
+                    in("r1") val.pair.lo,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        match order {
+            // Relaxed and Release stores are equivalent.
+            Ordering::Relaxed | Ordering::Release => atomic_store!(""),
+            // bcr 14,0 (fast-BCR-serialization) requires z196 or later.
+            #[cfg(any(
+                target_feature = "fast-serialization",
+                portable_atomic_target_feature = "fast-serialization",
+            ))]
+            Ordering::SeqCst => atomic_store!("bcr 14, 0"),
+            #[cfg(not(any(
+                target_feature = "fast-serialization",
+                portable_atomic_target_feature = "fast-serialization",
+            )))]
+            Ordering::SeqCst => atomic_store!("bcr 15, 0"),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[inline]
+unsafe fn atomic_compare_exchange(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    _success: Ordering,
+    _failure: Ordering,
+) -> Result<u128, u128> {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    let prev = unsafe {
+        // atomic CAS is always SeqCst.
+        let old = U128 { whole: old };
+        let new = U128 { whole: new };
+        let (prev_hi, prev_lo);
+        asm!(
+            "cdsg %r0, %r12, 0({dst})",
+            dst = in(reg) ptr_reg!(dst),
+            // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+            inout("r0") old.pair.hi => prev_hi,
+            inout("r1") old.pair.lo => prev_lo,
+            in("r12") new.pair.hi,
+            in("r13") new.pair.lo,
+            // Do not use `preserves_flags` because CDSG modifies the condition code.
+            options(nostack),
+        );
+        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+    };
+    if prev == old {
+        Ok(prev)
+    } else {
+        Err(prev)
+    }
+}
+
+// cdsg is always strong.
+use atomic_compare_exchange as atomic_compare_exchange_weak;
+
+#[cfg(not(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+)))]
+#[inline(always)]
+unsafe fn atomic_update<F>(dst: *mut u128, order: Ordering, mut f: F) -> u128
+where
+    F: FnMut(u128) -> u128,
+{
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        // This is a private function and all instances of `f` only operate on the value
+        // loaded, so there is no need to synchronize the first load/failed CAS.
+        let mut prev = atomic_load(dst, Ordering::Relaxed);
+        loop {
+            let next = f(prev);
+            match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) {
+                Ok(x) => return x,
+                Err(x) => prev = x,
+            }
+        }
+    }
+}
+
+#[inline]
+unsafe fn atomic_swap(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    //
+    // We could use atomic_update here, but using an inline assembly allows omitting
+    // the comparison of results and the storing/comparing of condition flags.
+    //
+    // Do not use atomic_rmw_cas_3 because it needs extra LGR to implement swap.
+    unsafe {
+        // atomic swap is always SeqCst.
+        let val = U128 { whole: val };
+        let (mut prev_hi, mut prev_lo);
+        asm!(
+            "lpq %r0, 0({dst})",
+            "2:",
+                "cdsg %r0, %r12, 0({dst})",
+                "jl 2b",
+            dst = in(reg) ptr_reg!(dst),
+            // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+            out("r0") prev_hi,
+            out("r1") prev_lo,
+            in("r12") val.pair.hi,
+            in("r13") val.pair.lo,
+            // Do not use `preserves_flags` because CDSG modifies the condition code.
+            options(nostack),
+        );
+        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+    }
+}
+
+/// Atomic RMW by CAS loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - val_hi/val_lo pair: val argument (read-only for `$op`)
+/// - r0/r1 pair: previous value loaded (read-only for `$op`)
+/// - r12/r13 pair: new value that will be stored
+// We could use atomic_update here, but using an inline assembly allows omitting
+// the comparison of results and the storing/comparing of condition flags.
+macro_rules! atomic_rmw_cas_3 {
+    ($name:ident, [$($reg:tt)*], $($op:tt)*) => {
+        #[inline]
+        unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                // atomic RMW is always SeqCst.
+                let val = U128 { whole: val };
+                let (mut prev_hi, mut prev_lo);
+                asm!(
+                    "lpq %r0, 0({dst})",
+                    "2:",
+                        $($op)*
+                        "cdsg %r0, %r12, 0({dst})",
+                        "jl 2b",
+                    dst = in(reg) ptr_reg!(dst),
+                    val_hi = in(reg) val.pair.hi,
+                    val_lo = in(reg) val.pair.lo,
+                    $($reg)*
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    out("r0") prev_hi,
+                    out("r1") prev_lo,
+                    out("r12") _,
+                    out("r13") _,
+                    // Do not use `preserves_flags` because CDSG modifies the condition code.
+                    options(nostack),
+                );
+                U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+            }
+        }
+    };
+}
+/// Atomic RMW by CAS loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - r0/r1 pair: previous value loaded (read-only for `$op`)
+/// - r12/r13 pair: new value that will be stored
+// We could use atomic_update here, but using an inline assembly allows omitting
+// the comparison of results and the storing/comparing of condition flags.
+macro_rules! atomic_rmw_cas_2 {
+    ($name:ident, $($op:tt)*) => {
+        #[inline]
+        unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                // atomic RMW is always SeqCst.
+                let (mut prev_hi, mut prev_lo);
+                asm!(
+                    "lpq %r0, 0({dst})",
+                    "2:",
+                        $($op)*
+                        "cdsg %r0, %r12, 0({dst})",
+                        "jl 2b",
+                    dst = in(reg) ptr_reg!(dst),
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    out("r0") prev_hi,
+                    out("r1") prev_lo,
+                    out("r12") _,
+                    out("r13") _,
+                    // Do not use `preserves_flags` because CDSG modifies the condition code.
+                    options(nostack),
+                );
+                U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+            }
+        }
+    };
+}
+
+atomic_rmw_cas_3! {
+    atomic_add, [],
+    distinct_op!("algr", "%r13", "%r1", "{val_lo}"),
+    "lgr %r12, %r0",
+    "alcgr %r12, {val_hi}",
+}
+atomic_rmw_cas_3! {
+    atomic_sub, [],
+    distinct_op!("slgr", "%r13", "%r1", "{val_lo}"),
+    "lgr %r12, %r0",
+    "slbgr %r12, {val_hi}",
+}
+atomic_rmw_cas_3! {
+    atomic_and, [],
+    distinct_op!("ngr", "%r13", "%r1", "{val_lo}"),
+    distinct_op!("ngr", "%r12", "%r0", "{val_hi}"),
+}
+
+// Use nngrk on z15 or later.
+#[cfg(any(
+    target_feature = "miscellaneous-extensions-3",
+    portable_atomic_target_feature = "miscellaneous-extensions-3",
+))]
+atomic_rmw_cas_3! {
+    atomic_nand, [],
+    "nngrk %r13, %r1, {val_lo}",
+    "nngrk %r12, %r0, {val_hi}",
+}
+#[cfg(not(any(
+    target_feature = "miscellaneous-extensions-3",
+    portable_atomic_target_feature = "miscellaneous-extensions-3",
+)))]
+atomic_rmw_cas_3! {
+    atomic_nand, [],
+    distinct_op!("ngr", "%r13", "%r1", "{val_lo}"),
+    "xihf %r13, 4294967295",
+    "xilf %r13, 4294967295",
+    distinct_op!("ngr", "%r12", "%r0", "{val_hi}"),
+    "xihf %r12, 4294967295",
+    "xilf %r12, 4294967295",
+}
+
+atomic_rmw_cas_3! {
+    atomic_or, [],
+    distinct_op!("ogr", "%r13", "%r1", "{val_lo}"),
+    distinct_op!("ogr", "%r12", "%r0", "{val_hi}"),
+}
+atomic_rmw_cas_3! {
+    atomic_xor, [],
+    distinct_op!("xgr", "%r13", "%r1", "{val_lo}"),
+    distinct_op!("xgr", "%r12", "%r0", "{val_hi}"),
+}
+
+#[cfg(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+))]
+atomic_rmw_cas_3! {
+    atomic_max, [],
+    "clgr %r1, {val_lo}",
+    select_op!("h", "%r12", "%r1", "{val_lo}"),
+    "cgr %r0, {val_hi}",
+    select_op!("h", "%r13", "%r1", "{val_lo}"),
+    "locgre %r13, %r12",
+    select_op!("h", "%r12", "%r0", "{val_hi}"),
+}
+#[cfg(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+))]
+atomic_rmw_cas_3! {
+    atomic_umax, [tmp = out(reg) _,],
+    "clgr %r1, {val_lo}",
+    select_op!("h", "{tmp}", "%r1", "{val_lo}"),
+    "clgr %r0, {val_hi}",
+    select_op!("h", "%r12", "%r0", "{val_hi}"),
+    select_op!("h", "%r13", "%r1", "{val_lo}"),
+    "cgr %r0, {val_hi}",
+    "locgre %r13, {tmp}",
+}
+#[cfg(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+))]
+atomic_rmw_cas_3! {
+    atomic_min, [],
+    "clgr %r1, {val_lo}",
+    select_op!("l", "%r12", "%r1", "{val_lo}"),
+    "cgr %r0, {val_hi}",
+    select_op!("l", "%r13", "%r1", "{val_lo}"),
+    "locgre %r13, %r12",
+    select_op!("l", "%r12", "%r0", "{val_hi}"),
+}
+#[cfg(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+))]
+atomic_rmw_cas_3! {
+    atomic_umin, [tmp = out(reg) _,],
+    "clgr %r1, {val_lo}",
+    select_op!("l", "{tmp}", "%r1", "{val_lo}"),
+    "clgr %r0, {val_hi}",
+    select_op!("l", "%r12", "%r0", "{val_hi}"),
+    select_op!("l", "%r13", "%r1", "{val_lo}"),
+    "cgr %r0, {val_hi}",
+    "locgre %r13, {tmp}",
+}
+// We use atomic_update for atomic min/max on pre-z196 because
+// z10 doesn't seem to have a good way to implement 128-bit min/max.
+// loc{,g}r requires z196 or later.
+// https://godbolt.org/z/j8KG9q5oq
+#[cfg(not(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+)))]
+atomic_rmw_by_atomic_update!(cmp);
+
+atomic_rmw_cas_2! {
+    atomic_not,
+    "lgr %r13, %r1",
+    "xihf %r13, 4294967295",
+    "xilf %r13, 4294967295",
+    "lgr %r12, %r0",
+    "xihf %r12, 4294967295",
+    "xilf %r12, 4294967295",
+}
+atomic_rmw_cas_2! {
+    atomic_neg,
+    "lghi %r13, 0",
+    "slgr %r13, %r1",
+    "lghi %r12, 0",
+    "slbgr %r12, %r0",
+}
+
+#[inline]
+const fn is_lock_free() -> bool {
+    IS_ALWAYS_LOCK_FREE
+}
+const IS_ALWAYS_LOCK_FREE: bool = true;
+
+atomic128!(AtomicI128, i128, atomic_max, atomic_min);
+atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    test_atomic_int!(i128);
+    test_atomic_int!(u128);
+
+    // load/store/swap implementation is not affected by signedness, so it is
+    // enough to test only unsigned types.
+    stress_test!(u128);
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/x86_64.rs b/vendor/portable-atomic/src/imp/atomic128/x86_64.rs
new file mode 100644
index 0000000..3b9d141
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/x86_64.rs
@@ -0,0 +1,854 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic{I,U}128 implementation on x86_64 using CMPXCHG16B (DWCAS).
+//
+// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
+// this module and use intrinsics.rs instead.
+//
+// Refs:
+// - x86 and amd64 instruction reference https://www.felixcloutier.com/x86
+// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
+//
+// Generated asm:
+// - x86_64 (+cmpxchg16b) https://godbolt.org/z/55n54WeKr
+
+include!("macros.rs");
+
+#[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))]
+#[path = "../fallback/outline_atomics.rs"]
+mod fallback;
+
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(not(target_env = "sgx"))]
+#[path = "detect/x86_64.rs"]
+mod detect;
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+use core::sync::atomic::Ordering;
+
+use crate::utils::{Pair, U128};
+
+// Asserts that the function is called in the correct context.
+macro_rules! debug_assert_cmpxchg16b {
+    () => {
+        #[cfg(not(any(
+            target_feature = "cmpxchg16b",
+            portable_atomic_target_feature = "cmpxchg16b",
+        )))]
+        {
+            debug_assert!(detect::detect().has_cmpxchg16b());
+        }
+    };
+}
+#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
+#[cfg(target_feature = "sse")]
+macro_rules! debug_assert_vmovdqa_atomic {
+    () => {{
+        debug_assert_cmpxchg16b!();
+        debug_assert!(detect::detect().has_vmovdqa_atomic());
+    }};
+}
+
+#[allow(unused_macros)]
+#[cfg(target_pointer_width = "32")]
+macro_rules! ptr_modifier {
+    () => {
+        ":e"
+    };
+}
+#[allow(unused_macros)]
+#[cfg(target_pointer_width = "64")]
+macro_rules! ptr_modifier {
+    () => {
+        ""
+    };
+}
+
+#[cfg_attr(
+    not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+    target_feature(enable = "cmpxchg16b")
+)]
+#[inline]
+unsafe fn cmpxchg16b(dst: *mut u128, old: u128, new: u128) -> (u128, bool) {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_cmpxchg16b!();
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned (required by CMPXCHG16B), that there are no
+    // concurrent non-atomic operations, and that the CPU supports CMPXCHG16B.
+    //
+    // If the value at `dst` (destination operand) and rdx:rax are equal, the
+    // 128-bit value in rcx:rbx is stored in the `dst`, otherwise the value at
+    // `dst` is loaded to rdx:rax.
+    //
+    // The ZF flag is set if the value at `dst` and rdx:rax are equal,
+    // otherwise it is cleared. Other flags are unaffected.
+    //
+    // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b
+    unsafe {
+        // cmpxchg16b is always SeqCst.
+        let r: u8;
+        let old = U128 { whole: old };
+        let new = U128 { whole: new };
+        let (prev_lo, prev_hi);
+        macro_rules! cmpxchg16b {
+            ($rdi:tt) => {
+                asm!(
+                    // rbx is reserved by LLVM
+                    "xchg {rbx_tmp}, rbx",
+                    concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
+                    "sete r8b",
+                    "mov rbx, {rbx_tmp}", // restore rbx
+                    rbx_tmp = inout(reg) new.pair.lo => _,
+                    in("rcx") new.pair.hi,
+                    inout("rax") old.pair.lo => prev_lo,
+                    inout("rdx") old.pair.hi => prev_hi,
+                    in($rdi) dst,
+                    out("r8b") r,
+                    // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
+                    options(nostack),
+                )
+            };
+        }
+        #[cfg(target_pointer_width = "32")]
+        cmpxchg16b!("edi");
+        #[cfg(target_pointer_width = "64")]
+        cmpxchg16b!("rdi");
+        (U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole, r != 0)
+    }
+}
+
+// VMOVDQA is atomic on Intel and AMD CPUs with AVX.
+// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details.
+//
+// Refs: https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
+//
+// Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled.
+// https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html
+#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
+#[cfg(target_feature = "sse")]
+#[target_feature(enable = "avx")]
+#[inline]
+unsafe fn atomic_load_vmovdqa(src: *mut u128) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+    debug_assert_vmovdqa_atomic!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    //
+    // atomic load by vmovdqa is always SeqCst.
+    unsafe {
+        let out: core::arch::x86_64::__m128;
+        asm!(
+            concat!("vmovdqa {out}, xmmword ptr [{src", ptr_modifier!(), "}]"),
+            src = in(reg) src,
+            out = out(xmm_reg) out,
+            options(nostack, preserves_flags),
+        );
+        core::mem::transmute(out)
+    }
+}
+#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
+#[cfg(target_feature = "sse")]
+#[target_feature(enable = "avx")]
+#[inline]
+unsafe fn atomic_store_vmovdqa(dst: *mut u128, val: u128, order: Ordering) {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_vmovdqa_atomic!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let val: core::arch::x86_64::__m128 = core::mem::transmute(val);
+        match order {
+            // Relaxed and Release stores are equivalent.
+            Ordering::Relaxed | Ordering::Release => {
+                asm!(
+                    concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"),
+                    dst = in(reg) dst,
+                    val = in(xmm_reg) val,
+                    options(nostack, preserves_flags),
+                );
+            }
+            Ordering::SeqCst => {
+                asm!(
+                    concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"),
+                    "mfence",
+                    dst = in(reg) dst,
+                    val = in(xmm_reg) val,
+                    options(nostack, preserves_flags),
+                );
+            }
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(all(
+    any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
+    any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
+)))]
+macro_rules! load_store_detect {
+    (
+        vmovdqa = $vmovdqa:ident
+        cmpxchg16b = $cmpxchg16b:ident
+        fallback = $fallback:ident
+    ) => {{
+        let cpuid = detect::detect();
+        #[cfg(not(any(
+            target_feature = "cmpxchg16b",
+            portable_atomic_target_feature = "cmpxchg16b",
+        )))]
+        {
+            // Check CMPXCHG16B first to prevent mixing atomic and non-atomic access.
+            if cpuid.has_cmpxchg16b() {
+                // We do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled.
+                #[cfg(target_feature = "sse")]
+                {
+                    if cpuid.has_vmovdqa_atomic() {
+                        $vmovdqa
+                    } else {
+                        $cmpxchg16b
+                    }
+                }
+                #[cfg(not(target_feature = "sse"))]
+                {
+                    $cmpxchg16b
+                }
+            } else {
+                fallback::$fallback
+            }
+        }
+        #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
+        {
+            if cpuid.has_vmovdqa_atomic() {
+                $vmovdqa
+            } else {
+                $cmpxchg16b
+            }
+        }
+    }};
+}
+
+#[inline]
+unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 {
+    // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled.
+    // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html
+    // SGX doesn't support CPUID.
+    #[cfg(all(
+        any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
+        any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
+    ))]
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantees that CMPXCHG16B is available at compile-time.
+    unsafe {
+        // cmpxchg16b is always SeqCst.
+        atomic_load_cmpxchg16b(src)
+    }
+    #[cfg(not(all(
+        any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
+        any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
+    )))]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        ifunc!(unsafe fn(src: *mut u128) -> u128 {
+            load_store_detect! {
+                vmovdqa = atomic_load_vmovdqa
+                cmpxchg16b = atomic_load_cmpxchg16b
+                // Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst.
+                fallback = atomic_load_seqcst
+            }
+        })
+    }
+}
+#[cfg_attr(
+    not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+    target_feature(enable = "cmpxchg16b")
+)]
+#[inline]
+unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+    debug_assert_cmpxchg16b!();
+
+    // SAFETY: the caller must guarantee that `src` is valid for both writes and
+    // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
+    // cfg guarantees that the CPU supports CMPXCHG16B.
+    //
+    // See cmpxchg16b function for more.
+    //
+    // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows
+    // omitting the storing of condition flags and avoid use of xchg to handle rbx.
+    unsafe {
+        // cmpxchg16b is always SeqCst.
+        let (out_lo, out_hi);
+        macro_rules! cmpxchg16b {
+            ($rdi:tt) => {
+                asm!(
+                    // rbx is reserved by LLVM
+                    "mov {rbx_tmp}, rbx",
+                    "xor rbx, rbx", // zeroed rbx
+                    concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
+                    "mov rbx, {rbx_tmp}", // restore rbx
+                    // set old/new args of cmpxchg16b to 0 (rbx is zeroed after saved to rbx_tmp, to avoid xchg)
+                    rbx_tmp = out(reg) _,
+                    in("rcx") 0_u64,
+                    inout("rax") 0_u64 => out_lo,
+                    inout("rdx") 0_u64 => out_hi,
+                    in($rdi) src,
+                    // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
+                    options(nostack),
+                )
+            };
+        }
+        #[cfg(target_pointer_width = "32")]
+        cmpxchg16b!("edi");
+        #[cfg(target_pointer_width = "64")]
+        cmpxchg16b!("rdi");
+        U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
+    }
+}
+
+#[inline]
+unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
+    // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled.
+    // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html
+    // SGX doesn't support CPUID.
+    #[cfg(all(
+        any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
+        any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
+    ))]
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantees that CMPXCHG16B is available at compile-time.
+    unsafe {
+        // cmpxchg16b is always SeqCst.
+        let _ = order;
+        atomic_store_cmpxchg16b(dst, val);
+    }
+    #[cfg(not(all(
+        any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
+        any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
+    )))]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        #[cfg(target_feature = "sse")]
+        fn_alias! {
+            #[target_feature(enable = "avx")]
+            unsafe fn(dst: *mut u128, val: u128);
+            // atomic store by vmovdqa has at least release semantics.
+            atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release);
+            atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst);
+        }
+        match order {
+            // Relaxed and Release stores are equivalent in all implementations
+            // that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback).
+            // core::arch's cmpxchg16b will never called here.
+            Ordering::Relaxed | Ordering::Release => {
+                ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                    load_store_detect! {
+                        vmovdqa = atomic_store_vmovdqa_non_seqcst
+                        cmpxchg16b = atomic_store_cmpxchg16b
+                        fallback = atomic_store_non_seqcst
+                    }
+                });
+            }
+            Ordering::SeqCst => {
+                ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                    load_store_detect! {
+                        vmovdqa = atomic_store_vmovdqa_seqcst
+                        cmpxchg16b = atomic_store_cmpxchg16b
+                        fallback = atomic_store_seqcst
+                    }
+                });
+            }
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+#[cfg_attr(
+    not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+    target_feature(enable = "cmpxchg16b")
+)]
+unsafe fn atomic_store_cmpxchg16b(dst: *mut u128, val: u128) {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        // cmpxchg16b is always SeqCst.
+        atomic_swap_cmpxchg16b(dst, val, Ordering::SeqCst);
+    }
+}
+
+#[inline]
+unsafe fn atomic_compare_exchange(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    _success: Ordering,
+    _failure: Ordering,
+) -> Result<u128, u128> {
+    #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+    // and cfg guarantees that CMPXCHG16B is available at compile-time.
+    let (prev, ok) = unsafe { cmpxchg16b(dst, old, new) };
+    #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))]
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, and that there are no different kinds of concurrent accesses.
+    let (prev, ok) = unsafe {
+        ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool) {
+            if detect::detect().has_cmpxchg16b() {
+                cmpxchg16b
+            } else {
+                // Use SeqCst because cmpxchg16b is always SeqCst.
+                fallback::atomic_compare_exchange_seqcst
+            }
+        })
+    };
+    if ok {
+        Ok(prev)
+    } else {
+        Err(prev)
+    }
+}
+
+// cmpxchg16b is always strong.
+use atomic_compare_exchange as atomic_compare_exchange_weak;
+
+#[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
+use atomic_swap_cmpxchg16b as atomic_swap;
+#[cfg_attr(
+    not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+    target_feature(enable = "cmpxchg16b")
+)]
+#[inline]
+unsafe fn atomic_swap_cmpxchg16b(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_cmpxchg16b!();
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
+    // cfg guarantees that the CPU supports CMPXCHG16B.
+    //
+    // See cmpxchg16b function for more.
+    //
+    // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows
+    // omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx.
+    //
+    // Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap.
+    unsafe {
+        // cmpxchg16b is always SeqCst.
+        let val = U128 { whole: val };
+        let (mut prev_lo, mut prev_hi);
+        macro_rules! cmpxchg16b {
+            ($rdi:tt) => {
+                asm!(
+                    // rbx is reserved by LLVM
+                    "xchg {rbx_tmp}, rbx",
+                    // This is not single-copy atomic reads, but this is ok because subsequent
+                    // CAS will check for consistency.
+                    //
+                    // This is based on the code generated for the first load in DW RMWs by LLVM.
+                    //
+                    // Note that the C++20 memory model does not allow mixed-sized atomic access,
+                    // so we must use inline assembly to implement this.
+                    // (i.e., byte-wise atomic based on the standard library's atomic types
+                    // cannot be used here).
+                    concat!("mov rax, qword ptr [", $rdi, "]"),
+                    concat!("mov rdx, qword ptr [", $rdi, " + 8]"),
+                    "2:",
+                        concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
+                        "jne 2b",
+                    "mov rbx, {rbx_tmp}", // restore rbx
+                    rbx_tmp = inout(reg) val.pair.lo => _,
+                    in("rcx") val.pair.hi,
+                    out("rax") prev_lo,
+                    out("rdx") prev_hi,
+                    in($rdi) dst,
+                    // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
+                    options(nostack),
+                )
+            };
+        }
+        #[cfg(target_pointer_width = "32")]
+        cmpxchg16b!("edi");
+        #[cfg(target_pointer_width = "64")]
+        cmpxchg16b!("rdi");
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+
+/// Atomic RMW by CAS loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - rsi/r8 pair: val argument (read-only for `$op`)
+/// - rax/rdx pair: previous value loaded (read-only for `$op`)
+/// - rbx/rcx pair: new value that will be stored
+// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows
+// omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx.
+macro_rules! atomic_rmw_cas_3 {
+    ($name:ident as $reexport_name:ident, $($op:tt)*) => {
+        #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
+        use $name as $reexport_name;
+        #[cfg_attr(
+            not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+            target_feature(enable = "cmpxchg16b")
+        )]
+        #[inline]
+        unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            debug_assert_cmpxchg16b!();
+            // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+            // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
+            // cfg guarantees that the CPU supports CMPXCHG16B.
+            //
+            // See cmpxchg16b function for more.
+            unsafe {
+                // cmpxchg16b is always SeqCst.
+                let val = U128 { whole: val };
+                let (mut prev_lo, mut prev_hi);
+                macro_rules! cmpxchg16b {
+                    ($rdi:tt) => {
+                        asm!(
+                            // rbx is reserved by LLVM
+                            "mov {rbx_tmp}, rbx",
+                            // This is not single-copy atomic reads, but this is ok because subsequent
+                            // CAS will check for consistency.
+                            //
+                            // This is based on the code generated for the first load in DW RMWs by LLVM.
+                            //
+                            // Note that the C++20 memory model does not allow mixed-sized atomic access,
+                            // so we must use inline assembly to implement this.
+                            // (i.e., byte-wise atomic based on the standard library's atomic types
+                            // cannot be used here).
+                            concat!("mov rax, qword ptr [", $rdi, "]"),
+                            concat!("mov rdx, qword ptr [", $rdi, " + 8]"),
+                            "2:",
+                                $($op)*
+                                concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
+                                "jne 2b",
+                            "mov rbx, {rbx_tmp}", // restore rbx
+                            rbx_tmp = out(reg) _,
+                            out("rcx") _,
+                            out("rax") prev_lo,
+                            out("rdx") prev_hi,
+                            in($rdi) dst,
+                            in("rsi") val.pair.lo,
+                            in("r8") val.pair.hi,
+                            // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
+                            options(nostack),
+                        )
+                    };
+                }
+                #[cfg(target_pointer_width = "32")]
+                cmpxchg16b!("edi");
+                #[cfg(target_pointer_width = "64")]
+                cmpxchg16b!("rdi");
+                U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+            }
+        }
+    };
+}
+/// Atomic RMW by CAS loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - rax/rdx pair: previous value loaded (read-only for `$op`)
+/// - rbx/rcx pair: new value that will be stored
+// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows
+// omitting the storing of condition flags and avoid use of xchg to handle rbx.
+macro_rules! atomic_rmw_cas_2 {
+    ($name:ident as $reexport_name:ident, $($op:tt)*) => {
+        #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
+        use $name as $reexport_name;
+        #[cfg_attr(
+            not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+            target_feature(enable = "cmpxchg16b")
+        )]
+        #[inline]
+        unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            debug_assert_cmpxchg16b!();
+            // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+            // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
+            // cfg guarantees that the CPU supports CMPXCHG16B.
+            //
+            // See cmpxchg16b function for more.
+            unsafe {
+                // cmpxchg16b is always SeqCst.
+                let (mut prev_lo, mut prev_hi);
+                macro_rules! cmpxchg16b {
+                    ($rdi:tt) => {
+                        asm!(
+                            // rbx is reserved by LLVM
+                            "mov {rbx_tmp}, rbx",
+                            // This is not single-copy atomic reads, but this is ok because subsequent
+                            // CAS will check for consistency.
+                            //
+                            // This is based on the code generated for the first load in DW RMWs by LLVM.
+                            //
+                            // Note that the C++20 memory model does not allow mixed-sized atomic access,
+                            // so we must use inline assembly to implement this.
+                            // (i.e., byte-wise atomic based on the standard library's atomic types
+                            // cannot be used here).
+                            concat!("mov rax, qword ptr [", $rdi, "]"),
+                            concat!("mov rdx, qword ptr [", $rdi, " + 8]"),
+                            "2:",
+                                $($op)*
+                                concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
+                                "jne 2b",
+                            "mov rbx, {rbx_tmp}", // restore rbx
+                            rbx_tmp = out(reg) _,
+                            out("rcx") _,
+                            out("rax") prev_lo,
+                            out("rdx") prev_hi,
+                            in($rdi) dst,
+                            // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
+                            options(nostack),
+                        )
+                    };
+                }
+                #[cfg(target_pointer_width = "32")]
+                cmpxchg16b!("edi");
+                #[cfg(target_pointer_width = "64")]
+                cmpxchg16b!("rdi");
+                U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+            }
+        }
+    };
+}
+
+atomic_rmw_cas_3! {
+    atomic_add_cmpxchg16b as atomic_add,
+    "mov rbx, rax",
+    "add rbx, rsi",
+    "mov rcx, rdx",
+    "adc rcx, r8",
+}
+atomic_rmw_cas_3! {
+    atomic_sub_cmpxchg16b as atomic_sub,
+    "mov rbx, rax",
+    "sub rbx, rsi",
+    "mov rcx, rdx",
+    "sbb rcx, r8",
+}
+atomic_rmw_cas_3! {
+    atomic_and_cmpxchg16b as atomic_and,
+    "mov rbx, rax",
+    "and rbx, rsi",
+    "mov rcx, rdx",
+    "and rcx, r8",
+}
+atomic_rmw_cas_3! {
+    atomic_nand_cmpxchg16b as atomic_nand,
+    "mov rbx, rax",
+    "and rbx, rsi",
+    "not rbx",
+    "mov rcx, rdx",
+    "and rcx, r8",
+    "not rcx",
+}
+atomic_rmw_cas_3! {
+    atomic_or_cmpxchg16b as atomic_or,
+    "mov rbx, rax",
+    "or rbx, rsi",
+    "mov rcx, rdx",
+    "or rcx, r8",
+}
+atomic_rmw_cas_3! {
+    atomic_xor_cmpxchg16b as atomic_xor,
+    "mov rbx, rax",
+    "xor rbx, rsi",
+    "mov rcx, rdx",
+    "xor rcx, r8",
+}
+
+atomic_rmw_cas_2! {
+    atomic_not_cmpxchg16b as atomic_not,
+    "mov rbx, rax",
+    "not rbx",
+    "mov rcx, rdx",
+    "not rcx",
+}
+atomic_rmw_cas_2! {
+    atomic_neg_cmpxchg16b as atomic_neg,
+    "mov rbx, rax",
+    "neg rbx",
+    "mov rcx, 0",
+    "sbb rcx, rdx",
+}
+
+atomic_rmw_cas_3! {
+    atomic_max_cmpxchg16b as atomic_max,
+    "cmp rsi, rax",
+    "mov rcx, r8",
+    "sbb rcx, rdx",
+    "mov rcx, r8",
+    "cmovl rcx, rdx",
+    "mov rbx, rsi",
+    "cmovl rbx, rax",
+}
+atomic_rmw_cas_3! {
+    atomic_umax_cmpxchg16b as atomic_umax,
+    "cmp rsi, rax",
+    "mov rcx, r8",
+    "sbb rcx, rdx",
+    "mov rcx, r8",
+    "cmovb rcx, rdx",
+    "mov rbx, rsi",
+    "cmovb rbx, rax",
+}
+atomic_rmw_cas_3! {
+    atomic_min_cmpxchg16b as atomic_min,
+    "cmp rsi, rax",
+    "mov rcx, r8",
+    "sbb rcx, rdx",
+    "mov rcx, r8",
+    "cmovge rcx, rdx",
+    "mov rbx, rsi",
+    "cmovge rbx, rax",
+}
+atomic_rmw_cas_3! {
+    atomic_umin_cmpxchg16b as atomic_umin,
+    "cmp rsi, rax",
+    "mov rcx, r8",
+    "sbb rcx, rdx",
+    "mov rcx, r8",
+    "cmovae rcx, rdx",
+    "mov rbx, rsi",
+    "cmovae rbx, rax",
+}
+
+macro_rules! atomic_rmw_with_ifunc {
+    (
+        unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?;
+        cmpxchg16b = $cmpxchg16b_fn:ident;
+        fallback = $seqcst_fallback_fn:ident;
+    ) => {
+        #[cfg(not(any(
+            target_feature = "cmpxchg16b",
+            portable_atomic_target_feature = "cmpxchg16b",
+        )))]
+        #[inline]
+        unsafe fn $name($($arg)*, _order: Ordering) $(-> $ret_ty)? {
+            fn_alias! {
+                #[cfg_attr(
+                    not(any(
+                        target_feature = "cmpxchg16b",
+                        portable_atomic_target_feature = "cmpxchg16b",
+                    )),
+                    target_feature(enable = "cmpxchg16b")
+                )]
+                unsafe fn($($arg)*) $(-> $ret_ty)?;
+                // cmpxchg16b is always SeqCst.
+                cmpxchg16b_seqcst_fn = $cmpxchg16b_fn(Ordering::SeqCst);
+            }
+            // SAFETY: the caller must uphold the safety contract.
+            // we only calls cmpxchg16b_fn if cmpxchg16b is available.
+            unsafe {
+                ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+                    if detect::detect().has_cmpxchg16b() {
+                        cmpxchg16b_seqcst_fn
+                    } else {
+                        // Use SeqCst because cmpxchg16b is always SeqCst.
+                        fallback::$seqcst_fallback_fn
+                    }
+                })
+            }
+        }
+    };
+}
+
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_swap_cmpxchg16b;
+    fallback = atomic_swap_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_add_cmpxchg16b;
+    fallback = atomic_add_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_sub_cmpxchg16b;
+    fallback = atomic_sub_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_and_cmpxchg16b;
+    fallback = atomic_and_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_nand_cmpxchg16b;
+    fallback = atomic_nand_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_or_cmpxchg16b;
+    fallback = atomic_or_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_xor_cmpxchg16b;
+    fallback = atomic_xor_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_max_cmpxchg16b;
+    fallback = atomic_max_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_umax_cmpxchg16b;
+    fallback = atomic_umax_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_min_cmpxchg16b;
+    fallback = atomic_min_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_umin_cmpxchg16b;
+    fallback = atomic_umin_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_not(dst: *mut u128) -> u128;
+    cmpxchg16b = atomic_not_cmpxchg16b;
+    fallback = atomic_not_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_neg(dst: *mut u128) -> u128;
+    cmpxchg16b = atomic_neg_cmpxchg16b;
+    fallback = atomic_neg_seqcst;
+}
+
+#[inline]
+fn is_lock_free() -> bool {
+    #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
+    {
+        // CMPXCHG16B is available at compile-time.
+        true
+    }
+    #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))]
+    {
+        detect::detect().has_cmpxchg16b()
+    }
+}
+const IS_ALWAYS_LOCK_FREE: bool =
+    cfg!(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"));
+
+atomic128!(AtomicI128, i128, atomic_max, atomic_min);
+atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
+
+#[allow(clippy::undocumented_unsafe_blocks, clippy::wildcard_imports)]
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    test_atomic_int!(i128);
+    test_atomic_int!(u128);
+
+    // load/store/swap implementation is not affected by signedness, so it is
+    // enough to test only unsigned types.
+    stress_test!(u128);
+}
diff --git a/vendor/portable-atomic/src/imp/core_atomic.rs b/vendor/portable-atomic/src/imp/core_atomic.rs
new file mode 100644
index 0000000..3525018
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/core_atomic.rs
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Wrap the standard library's atomic types in newtype.
+//
+// This is not a reexport, because we want to backport changes like
+// https://github.com/rust-lang/rust/pull/98383 to old compilers.
+
+use core::{cell::UnsafeCell, marker::PhantomData, sync::atomic::Ordering};
+
+// core::panic::RefUnwindSafe is only available on Rust 1.56+, so on pre-1.56
+// Rust, we implement RefUnwindSafe when "std" feature is enabled.
+// However, on pre-1.56 Rust, the standard library's atomic types implement
+// RefUnwindSafe when "linked to std", and that's behavior that our other atomic
+// implementations can't emulate, so use PhantomData<NotRefUnwindSafe> to match
+// conditions where our other atomic implementations implement RefUnwindSafe.
+// If we do not do this, for example, downstream that is only tested on x86_64
+// may incorrectly assume that AtomicU64 always implements RefUnwindSafe even on
+// older rustc, and may be broken on platforms where std AtomicU64 is not available.
+struct NotRefUnwindSafe(UnsafeCell<()>);
+// SAFETY: this is a marker type and we'll never access the value.
+unsafe impl Sync for NotRefUnwindSafe {}
+
+#[repr(transparent)]
+pub(crate) struct AtomicPtr<T> {
+    inner: core::sync::atomic::AtomicPtr<T>,
+    // Prevent RefUnwindSafe from being propagated from the std atomic type. See NotRefUnwindSafe for more.
+    _not_ref_unwind_safe: PhantomData<NotRefUnwindSafe>,
+}
+impl<T> AtomicPtr<T> {
+    #[inline]
+    pub(crate) const fn new(v: *mut T) -> Self {
+        Self { inner: core::sync::atomic::AtomicPtr::new(v), _not_ref_unwind_safe: PhantomData }
+    }
+    #[inline]
+    pub(crate) fn is_lock_free() -> bool {
+        Self::is_always_lock_free()
+    }
+    #[inline]
+    pub(crate) const fn is_always_lock_free() -> bool {
+        true
+    }
+    #[inline]
+    pub(crate) fn get_mut(&mut self) -> &mut *mut T {
+        self.inner.get_mut()
+    }
+    #[inline]
+    pub(crate) fn into_inner(self) -> *mut T {
+        self.inner.into_inner()
+    }
+    #[inline]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub(crate) fn load(&self, order: Ordering) -> *mut T {
+        crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check)
+        self.inner.load(order)
+    }
+    #[inline]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub(crate) fn store(&self, ptr: *mut T, order: Ordering) {
+        crate::utils::assert_store_ordering(order); // for track_caller (compiler can omit double check)
+        self.inner.store(ptr, order);
+    }
+    const_fn! {
+        const_if: #[cfg(not(portable_atomic_no_const_raw_ptr_deref))];
+        #[inline]
+        pub(crate) const fn as_ptr(&self) -> *mut *mut T {
+            // SAFETY: Self is #[repr(C)] and internally UnsafeCell<*mut T>.
+            // See also https://github.com/rust-lang/rust/pull/66705 and
+            // https://github.com/rust-lang/rust/issues/66136#issuecomment-557867116.
+            unsafe { (*(self as *const Self as *const UnsafeCell<*mut T>)).get() }
+        }
+    }
+}
+#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))]
+#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
+impl<T> AtomicPtr<T> {
+    #[inline]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub(crate) fn compare_exchange(
+        &self,
+        current: *mut T,
+        new: *mut T,
+        success: Ordering,
+        failure: Ordering,
+    ) -> Result<*mut T, *mut T> {
+        crate::utils::assert_compare_exchange_ordering(success, failure); // for track_caller (compiler can omit double check)
+        #[cfg(portable_atomic_no_stronger_failure_ordering)]
+        let success = crate::utils::upgrade_success_ordering(success, failure);
+        self.inner.compare_exchange(current, new, success, failure)
+    }
+    #[inline]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub(crate) fn compare_exchange_weak(
+        &self,
+        current: *mut T,
+        new: *mut T,
+        success: Ordering,
+        failure: Ordering,
+    ) -> Result<*mut T, *mut T> {
+        crate::utils::assert_compare_exchange_ordering(success, failure); // for track_caller (compiler can omit double check)
+        #[cfg(portable_atomic_no_stronger_failure_ordering)]
+        let success = crate::utils::upgrade_success_ordering(success, failure);
+        self.inner.compare_exchange_weak(current, new, success, failure)
+    }
+}
+impl<T> core::ops::Deref for AtomicPtr<T> {
+    type Target = core::sync::atomic::AtomicPtr<T>;
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    fn deref(&self) -> &Self::Target {
+        &self.inner
+    }
+}
+
+macro_rules! atomic_int {
+    ($atomic_type:ident, $int_type:ident) => {
+        #[repr(transparent)]
+        pub(crate) struct $atomic_type {
+            inner: core::sync::atomic::$atomic_type,
+            // Prevent RefUnwindSafe from being propagated from the std atomic type. See NotRefUnwindSafe for more.
+            _not_ref_unwind_safe: PhantomData<NotRefUnwindSafe>,
+        }
+        #[cfg_attr(
+            portable_atomic_no_cfg_target_has_atomic,
+            cfg(not(portable_atomic_no_atomic_cas))
+        )]
+        #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
+        impl_default_no_fetch_ops!($atomic_type, $int_type);
+        #[cfg(not(all(
+            any(target_arch = "x86", target_arch = "x86_64"),
+            not(any(miri, portable_atomic_sanitize_thread)),
+            not(portable_atomic_no_asm),
+        )))]
+        #[cfg_attr(
+            portable_atomic_no_cfg_target_has_atomic,
+            cfg(not(portable_atomic_no_atomic_cas))
+        )]
+        #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
+        impl_default_bit_opts!($atomic_type, $int_type);
+        impl $atomic_type {
+            #[inline]
+            pub(crate) const fn new(v: $int_type) -> Self {
+                Self {
+                    inner: core::sync::atomic::$atomic_type::new(v),
+                    _not_ref_unwind_safe: PhantomData,
+                }
+            }
+            #[inline]
+            pub(crate) fn is_lock_free() -> bool {
+                Self::is_always_lock_free()
+            }
+            #[inline]
+            pub(crate) const fn is_always_lock_free() -> bool {
+                // ESP-IDF targets' 64-bit atomics are not lock-free.
+                // https://github.com/rust-lang/rust/pull/115577#issuecomment-1732259297
+                cfg!(not(all(
+                    any(target_arch = "riscv32", target_arch = "xtensa"),
+                    target_os = "espidf",
+                ))) | (core::mem::size_of::<$int_type>() < 8)
+            }
+            #[inline]
+            pub(crate) fn get_mut(&mut self) -> &mut $int_type {
+                self.inner.get_mut()
+            }
+            #[inline]
+            pub(crate) fn into_inner(self) -> $int_type {
+                self.inner.into_inner()
+            }
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn load(&self, order: Ordering) -> $int_type {
+                crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check)
+                self.inner.load(order)
+            }
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn store(&self, val: $int_type, order: Ordering) {
+                crate::utils::assert_store_ordering(order); // for track_caller (compiler can omit double check)
+                self.inner.store(val, order);
+            }
+            const_fn! {
+                const_if: #[cfg(not(portable_atomic_no_const_raw_ptr_deref))];
+                #[inline]
+                pub(crate) const fn as_ptr(&self) -> *mut $int_type {
+                    // SAFETY: Self is #[repr(C)] and internally UnsafeCell<$int_type>.
+                    // See also https://github.com/rust-lang/rust/pull/66705 and
+                    // https://github.com/rust-lang/rust/issues/66136#issuecomment-557867116.
+                    unsafe {
+                        (*(self as *const Self as *const UnsafeCell<$int_type>)).get()
+                    }
+                }
+            }
+        }
+        #[cfg_attr(
+            portable_atomic_no_cfg_target_has_atomic,
+            cfg(not(portable_atomic_no_atomic_cas))
+        )]
+        #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
+        impl $atomic_type {
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn compare_exchange(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                crate::utils::assert_compare_exchange_ordering(success, failure); // for track_caller (compiler can omit double check)
+                #[cfg(portable_atomic_no_stronger_failure_ordering)]
+                let success = crate::utils::upgrade_success_ordering(success, failure);
+                self.inner.compare_exchange(current, new, success, failure)
+            }
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn compare_exchange_weak(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                crate::utils::assert_compare_exchange_ordering(success, failure); // for track_caller (compiler can omit double check)
+                #[cfg(portable_atomic_no_stronger_failure_ordering)]
+                let success = crate::utils::upgrade_success_ordering(success, failure);
+                self.inner.compare_exchange_weak(current, new, success, failure)
+            }
+            #[allow(dead_code)]
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            fn fetch_update_<F>(&self, order: Ordering, mut f: F) -> $int_type
+            where
+                F: FnMut($int_type) -> $int_type,
+            {
+                // This is a private function and all instances of `f` only operate on the value
+                // loaded, so there is no need to synchronize the first load/failed CAS.
+                let mut prev = self.load(Ordering::Relaxed);
+                loop {
+                    let next = f(prev);
+                    match self.compare_exchange_weak(prev, next, order, Ordering::Relaxed) {
+                        Ok(x) => return x,
+                        Err(next_prev) => prev = next_prev,
+                    }
+                }
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type {
+                #[cfg(not(portable_atomic_no_atomic_min_max))]
+                {
+                    #[cfg(any(
+                        all(
+                            target_arch = "aarch64",
+                            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+                        ),
+                        all(
+                            target_arch = "arm",
+                            not(any(
+                                target_feature = "v6",
+                                portable_atomic_target_feature = "v6",
+                            )),
+                        ),
+                        target_arch = "mips",
+                        target_arch = "mips32r6",
+                        target_arch = "mips64",
+                        target_arch = "mips64r6",
+                        target_arch = "powerpc",
+                        target_arch = "powerpc64",
+                    ))]
+                    {
+                        // HACK: the following operations are currently broken (at least on qemu-user):
+                        // - aarch64's `AtomicI{8,16}::fetch_{max,min}` (release mode + lse)
+                        // - armv5te's `Atomic{I,U}{8,16}::fetch_{max,min}`
+                        // - mips's `AtomicI8::fetch_{max,min}` (release mode)
+                        // - mipsel's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least)
+                        // - mips64's `AtomicI8::fetch_{max,min}` (release mode)
+                        // - mips64el's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least)
+                        // - powerpc's `AtomicI{8,16}::fetch_{max,min}`
+                        // - powerpc64's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least)
+                        // - powerpc64le's `AtomicU{8,16}::fetch_{max,min}` (release mode + fat LTO)
+                        // See also:
+                        // https://github.com/llvm/llvm-project/issues/61880
+                        // https://github.com/llvm/llvm-project/issues/61881
+                        // https://github.com/llvm/llvm-project/issues/61882
+                        // https://github.com/taiki-e/portable-atomic/issues/2
+                        // https://github.com/rust-lang/rust/issues/100650
+                        if core::mem::size_of::<$int_type>() <= 2 {
+                            return self.fetch_update_(order, |x| core::cmp::max(x, val));
+                        }
+                    }
+                    self.inner.fetch_max(val, order)
+                }
+                #[cfg(portable_atomic_no_atomic_min_max)]
+                {
+                    self.fetch_update_(order, |x| core::cmp::max(x, val))
+                }
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type {
+                #[cfg(not(portable_atomic_no_atomic_min_max))]
+                {
+                    #[cfg(any(
+                        all(
+                            target_arch = "aarch64",
+                            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+                        ),
+                        all(
+                            target_arch = "arm",
+                            not(any(
+                                target_feature = "v6",
+                                portable_atomic_target_feature = "v6",
+                            )),
+                        ),
+                        target_arch = "mips",
+                        target_arch = "mips32r6",
+                        target_arch = "mips64",
+                        target_arch = "mips64r6",
+                        target_arch = "powerpc",
+                        target_arch = "powerpc64",
+                    ))]
+                    {
+                        // HACK: the following operations are currently broken (at least on qemu-user):
+                        // - aarch64's `AtomicI{8,16}::fetch_{max,min}` (release mode + lse)
+                        // - armv5te's `Atomic{I,U}{8,16}::fetch_{max,min}`
+                        // - mips's `AtomicI8::fetch_{max,min}` (release mode)
+                        // - mipsel's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least)
+                        // - mips64's `AtomicI8::fetch_{max,min}` (release mode)
+                        // - mips64el's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least)
+                        // - powerpc's `AtomicI{8,16}::fetch_{max,min}`
+                        // - powerpc64's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least)
+                        // - powerpc64le's `AtomicU{8,16}::fetch_{max,min}` (release mode + fat LTO)
+                        // See also:
+                        // https://github.com/llvm/llvm-project/issues/61880
+                        // https://github.com/llvm/llvm-project/issues/61881
+                        // https://github.com/llvm/llvm-project/issues/61882
+                        // https://github.com/taiki-e/portable-atomic/issues/2
+                        // https://github.com/rust-lang/rust/issues/100650
+                        if core::mem::size_of::<$int_type>() <= 2 {
+                            return self.fetch_update_(order, |x| core::cmp::min(x, val));
+                        }
+                    }
+                    self.inner.fetch_min(val, order)
+                }
+                #[cfg(portable_atomic_no_atomic_min_max)]
+                {
+                    self.fetch_update_(order, |x| core::cmp::min(x, val))
+                }
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type {
+                self.fetch_xor(!0, order)
+            }
+            #[cfg(not(all(
+                any(target_arch = "x86", target_arch = "x86_64"),
+                not(any(miri, portable_atomic_sanitize_thread)),
+                not(portable_atomic_no_asm),
+            )))]
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn not(&self, order: Ordering) {
+                self.fetch_not(order);
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_neg(&self, order: Ordering) -> $int_type {
+                self.fetch_update_(order, $int_type::wrapping_neg)
+            }
+            #[cfg(not(all(
+                any(target_arch = "x86", target_arch = "x86_64"),
+                not(any(miri, portable_atomic_sanitize_thread)),
+                not(portable_atomic_no_asm),
+            )))]
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn neg(&self, order: Ordering) {
+                self.fetch_neg(order);
+            }
+        }
+        impl core::ops::Deref for $atomic_type {
+            type Target = core::sync::atomic::$atomic_type;
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            fn deref(&self) -> &Self::Target {
+                &self.inner
+            }
+        }
+    };
+}
+
+atomic_int!(AtomicIsize, isize);
+atomic_int!(AtomicUsize, usize);
+#[cfg(not(portable_atomic_no_atomic_load_store))]
+atomic_int!(AtomicI8, i8);
+#[cfg(not(portable_atomic_no_atomic_load_store))]
+atomic_int!(AtomicU8, u8);
+#[cfg(not(portable_atomic_no_atomic_load_store))]
+atomic_int!(AtomicI16, i16);
+#[cfg(not(portable_atomic_no_atomic_load_store))]
+atomic_int!(AtomicU16, u16);
+#[cfg(not(portable_atomic_no_atomic_load_store))]
+#[cfg(not(target_pointer_width = "16"))]
+atomic_int!(AtomicI32, i32);
+#[cfg(not(portable_atomic_no_atomic_load_store))]
+#[cfg(not(target_pointer_width = "16"))]
+atomic_int!(AtomicU32, u32);
+#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_64)))]
+#[cfg_attr(
+    not(portable_atomic_no_cfg_target_has_atomic),
+    cfg(any(
+        target_has_atomic = "64",
+        not(any(target_pointer_width = "16", target_pointer_width = "32")),
+    ))
+)]
+atomic_int!(AtomicI64, i64);
+#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_64)))]
+#[cfg_attr(
+    not(portable_atomic_no_cfg_target_has_atomic),
+    cfg(any(
+        target_has_atomic = "64",
+        not(any(target_pointer_width = "16", target_pointer_width = "32")),
+    ))
+)]
+atomic_int!(AtomicU64, u64);
diff --git a/vendor/portable-atomic/src/imp/fallback/mod.rs b/vendor/portable-atomic/src/imp/fallback/mod.rs
new file mode 100644
index 0000000..283c98c
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/fallback/mod.rs
@@ -0,0 +1,412 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Fallback implementation using global locks.
+//
+// This implementation uses seqlock for global locks.
+//
+// This is basically based on global locks in crossbeam-utils's `AtomicCell`,
+// but seqlock is implemented in a way that does not depend on UB
+// (see comments in optimistic_read method in atomic! macro for details).
+//
+// Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic
+// type and the value type must be the same.
+
+#![cfg_attr(
+    any(
+        all(
+            target_arch = "x86_64",
+            not(portable_atomic_no_cmpxchg16b_target_feature),
+            not(portable_atomic_no_outline_atomics),
+            not(any(target_env = "sgx", miri)),
+        ),
+        all(
+            target_arch = "powerpc64",
+            feature = "fallback",
+            not(portable_atomic_no_outline_atomics),
+            portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default
+            any(
+                all(
+                    target_os = "linux",
+                    any(
+                        target_env = "gnu",
+                        all(
+                            any(target_env = "musl", target_env = "ohos"),
+                            not(target_feature = "crt-static"),
+                        ),
+                        portable_atomic_outline_atomics,
+                    ),
+                ),
+                target_os = "android",
+                target_os = "freebsd",
+            ),
+            not(any(miri, portable_atomic_sanitize_thread)),
+        ),
+        all(
+            target_arch = "arm",
+            not(portable_atomic_no_asm),
+            any(target_os = "linux", target_os = "android"),
+            not(portable_atomic_no_outline_atomics),
+        ),
+    ),
+    allow(dead_code)
+)]
+
+#[macro_use]
+pub(crate) mod utils;
+
+// Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap
+// around.
+//
+// In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be
+// vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the
+// counter will not be increased that fast.
+//
+// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
+// aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast,
+// so use it to implement normal sequence lock.
+cfg_has_fast_atomic_64! {
+    mod seq_lock;
+}
+cfg_no_fast_atomic_64! {
+    #[path = "seq_lock_wide.rs"]
+    mod seq_lock;
+}
+
+use core::{cell::UnsafeCell, mem, sync::atomic::Ordering};
+
+use seq_lock::{SeqLock, SeqLockWriteGuard};
+use utils::CachePadded;
+
+// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
+// aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast,
+// so use it to reduce chunks of byte-wise atomic memcpy.
+use seq_lock::{AtomicChunk, Chunk};
+
+// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L969-L1016.
+#[inline]
+#[must_use]
+fn lock(addr: usize) -> &'static SeqLock {
+    // The number of locks is a prime number because we want to make sure `addr % LEN` gets
+    // dispersed across all locks.
+    //
+    // crossbeam-utils 0.8.7 uses 97 here but does not use CachePadded,
+    // so the actual concurrency level will be smaller.
+    const LEN: usize = 67;
+    #[allow(clippy::declare_interior_mutable_const)]
+    const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new());
+    static LOCKS: [CachePadded<SeqLock>; LEN] = [
+        L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
+        L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
+        L, L, L, L, L, L, L,
+    ];
+
+    // If the modulus is a constant number, the compiler will use crazy math to transform this into
+    // a sequence of cheap arithmetic operations rather than using the slow modulo instruction.
+    &LOCKS[addr % LEN]
+}
+
+macro_rules! atomic {
+    ($atomic_type:ident, $int_type:ident, $align:literal) => {
+        #[repr(C, align($align))]
+        pub(crate) struct $atomic_type {
+            v: UnsafeCell<$int_type>,
+        }
+
+        impl $atomic_type {
+            const LEN: usize = mem::size_of::<$int_type>() / mem::size_of::<Chunk>();
+
+            #[inline]
+            unsafe fn chunks(&self) -> &[AtomicChunk; Self::LEN] {
+                static_assert!($atomic_type::LEN > 1);
+                static_assert!(mem::size_of::<$int_type>() % mem::size_of::<Chunk>() == 0);
+
+                // SAFETY: the caller must uphold the safety contract for `chunks`.
+                unsafe { &*(self.v.get() as *const $int_type as *const [AtomicChunk; Self::LEN]) }
+            }
+
+            #[inline]
+            fn optimistic_read(&self) -> $int_type {
+                // Using `MaybeUninit<[usize; Self::LEN]>` here doesn't change codegen: https://godbolt.org/z/86f8s733M
+                let mut dst: [Chunk; Self::LEN] = [0; Self::LEN];
+                // SAFETY:
+                // - There are no threads that perform non-atomic concurrent write operations.
+                // - There is no writer that updates the value using atomic operations of different granularity.
+                //
+                // If the atomic operation is not used here, it will cause a data race
+                // when `write` performs concurrent write operation.
+                // Such a data race is sometimes considered virtually unproblematic
+                // in SeqLock implementations:
+                //
+                // - https://github.com/Amanieu/seqlock/issues/2
+                // - https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L1111-L1116
+                // - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/avoiding.20UB.20due.20to.20races.20by.20discarding.20result.3F
+                //
+                // However, in our use case, the implementation that loads/stores value as
+                // chunks of usize is enough fast and sound, so we use that implementation.
+                //
+                // See also atomic-memcpy crate, a generic implementation of this pattern:
+                // https://github.com/taiki-e/atomic-memcpy
+                let chunks = unsafe { self.chunks() };
+                for i in 0..Self::LEN {
+                    dst[i] = chunks[i].load(Ordering::Relaxed);
+                }
+                // SAFETY: integers are plain old data types so we can always transmute to them.
+                unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) }
+            }
+
+            #[inline]
+            fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type {
+                // This calls optimistic_read that can return teared value, but the resulting value
+                // is guaranteed not to be teared because we hold the lock to write.
+                self.optimistic_read()
+            }
+
+            #[inline]
+            fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) {
+                // SAFETY: integers are plain old data types so we can always transmute them to arrays of integers.
+                let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) };
+                // SAFETY:
+                // - The guard guarantees that we hold the lock to write.
+                // - There are no threads that perform non-atomic concurrent read or write operations.
+                //
+                // See optimistic_read for the reason that atomic operations are used here.
+                let chunks = unsafe { self.chunks() };
+                for i in 0..Self::LEN {
+                    chunks[i].store(val[i], Ordering::Relaxed);
+                }
+            }
+        }
+
+        // Send is implicitly implemented.
+        // SAFETY: any data races are prevented by the lock and atomic operation.
+        unsafe impl Sync for $atomic_type {}
+
+        impl_default_no_fetch_ops!($atomic_type, $int_type);
+        impl_default_bit_opts!($atomic_type, $int_type);
+        impl $atomic_type {
+            #[inline]
+            pub(crate) const fn new(v: $int_type) -> Self {
+                Self { v: UnsafeCell::new(v) }
+            }
+
+            #[inline]
+            pub(crate) fn is_lock_free() -> bool {
+                Self::is_always_lock_free()
+            }
+            #[inline]
+            pub(crate) const fn is_always_lock_free() -> bool {
+                false
+            }
+
+            #[inline]
+            pub(crate) fn get_mut(&mut self) -> &mut $int_type {
+                // SAFETY: the mutable reference guarantees unique ownership.
+                // (UnsafeCell::get_mut requires Rust 1.50)
+                unsafe { &mut *self.v.get() }
+            }
+
+            #[inline]
+            pub(crate) fn into_inner(self) -> $int_type {
+                self.v.into_inner()
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn load(&self, order: Ordering) -> $int_type {
+                crate::utils::assert_load_ordering(order);
+                let lock = lock(self.v.get() as usize);
+
+                // Try doing an optimistic read first.
+                if let Some(stamp) = lock.optimistic_read() {
+                    let val = self.optimistic_read();
+
+                    if lock.validate_read(stamp) {
+                        return val;
+                    }
+                }
+
+                // Grab a regular write lock so that writers don't starve this load.
+                let guard = lock.write();
+                let val = self.read(&guard);
+                // The value hasn't been changed. Drop the guard without incrementing the stamp.
+                guard.abort();
+                val
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn store(&self, val: $int_type, order: Ordering) {
+                crate::utils::assert_store_ordering(order);
+                let guard = lock(self.v.get() as usize).write();
+                self.write(val, &guard)
+            }
+
+            #[inline]
+            pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
+                let guard = lock(self.v.get() as usize).write();
+                let prev = self.read(&guard);
+                self.write(val, &guard);
+                prev
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                crate::utils::assert_compare_exchange_ordering(success, failure);
+                let guard = lock(self.v.get() as usize).write();
+                let prev = self.read(&guard);
+                if prev == current {
+                    self.write(new, &guard);
+                    Ok(prev)
+                } else {
+                    // The value hasn't been changed. Drop the guard without incrementing the stamp.
+                    guard.abort();
+                    Err(prev)
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange_weak(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                self.compare_exchange(current, new, success, failure)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type {
+                let guard = lock(self.v.get() as usize).write();
+                let prev = self.read(&guard);
+                self.write(prev.wrapping_add(val), &guard);
+                prev
+            }
+
+            #[inline]
+            pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type {
+                let guard = lock(self.v.get() as usize).write();
+                let prev = self.read(&guard);
+                self.write(prev.wrapping_sub(val), &guard);
+                prev
+            }
+
+            #[inline]
+            pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type {
+                let guard = lock(self.v.get() as usize).write();
+                let prev = self.read(&guard);
+                self.write(prev & val, &guard);
+                prev
+            }
+
+            #[inline]
+            pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
+                let guard = lock(self.v.get() as usize).write();
+                let prev = self.read(&guard);
+                self.write(!(prev & val), &guard);
+                prev
+            }
+
+            #[inline]
+            pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type {
+                let guard = lock(self.v.get() as usize).write();
+                let prev = self.read(&guard);
+                self.write(prev | val, &guard);
+                prev
+            }
+
+            #[inline]
+            pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type {
+                let guard = lock(self.v.get() as usize).write();
+                let prev = self.read(&guard);
+                self.write(prev ^ val, &guard);
+                prev
+            }
+
+            #[inline]
+            pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type {
+                let guard = lock(self.v.get() as usize).write();
+                let prev = self.read(&guard);
+                self.write(core::cmp::max(prev, val), &guard);
+                prev
+            }
+
+            #[inline]
+            pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type {
+                let guard = lock(self.v.get() as usize).write();
+                let prev = self.read(&guard);
+                self.write(core::cmp::min(prev, val), &guard);
+                prev
+            }
+
+            #[inline]
+            pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type {
+                let guard = lock(self.v.get() as usize).write();
+                let prev = self.read(&guard);
+                self.write(!prev, &guard);
+                prev
+            }
+            #[inline]
+            pub(crate) fn not(&self, order: Ordering) {
+                self.fetch_not(order);
+            }
+
+            #[inline]
+            pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
+                let guard = lock(self.v.get() as usize).write();
+                let prev = self.read(&guard);
+                self.write(prev.wrapping_neg(), &guard);
+                prev
+            }
+            #[inline]
+            pub(crate) fn neg(&self, order: Ordering) {
+                self.fetch_neg(order);
+            }
+
+            #[inline]
+            pub(crate) const fn as_ptr(&self) -> *mut $int_type {
+                self.v.get()
+            }
+        }
+    };
+}
+
+#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_64)))]
+#[cfg_attr(
+    not(portable_atomic_no_cfg_target_has_atomic),
+    cfg(any(test, not(target_has_atomic = "64")))
+)]
+cfg_no_fast_atomic_64! {
+    atomic!(AtomicI64, i64, 8);
+    atomic!(AtomicU64, u64, 8);
+}
+
+atomic!(AtomicI128, i128, 16);
+atomic!(AtomicU128, u128, 16);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    cfg_no_fast_atomic_64! {
+        test_atomic_int!(i64);
+        test_atomic_int!(u64);
+    }
+    test_atomic_int!(i128);
+    test_atomic_int!(u128);
+
+    // load/store/swap implementation is not affected by signedness, so it is
+    // enough to test only unsigned types.
+    cfg_no_fast_atomic_64! {
+        stress_test!(u64);
+    }
+    stress_test!(u128);
+}
diff --git a/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs b/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs
new file mode 100644
index 0000000..895b60c
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Helper for outline-atomics.
+//
+// On architectures where DW atomics are not supported on older CPUs, we use
+// fallback implementation when DW atomic instructions are not supported and
+// outline-atomics is enabled.
+//
+// This module provides helpers to implement them.
+
+use core::sync::atomic::Ordering;
+
+#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))]
+pub(crate) type Udw = u128;
+#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))]
+pub(crate) type AtomicUdw = super::super::fallback::AtomicU128;
+#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))]
+pub(crate) type AtomicIdw = super::super::fallback::AtomicI128;
+
+#[cfg(target_arch = "arm")]
+pub(crate) type Udw = u64;
+#[cfg(target_arch = "arm")]
+pub(crate) type AtomicUdw = super::super::fallback::AtomicU64;
+#[cfg(target_arch = "arm")]
+pub(crate) type AtomicIdw = super::super::fallback::AtomicI64;
+
+// Asserts that the function is called in the correct context.
+macro_rules! debug_assert_outline_atomics {
+    () => {
+        #[cfg(target_arch = "x86_64")]
+        {
+            debug_assert!(!super::detect::detect().has_cmpxchg16b());
+        }
+        #[cfg(target_arch = "powerpc64")]
+        {
+            debug_assert!(!super::detect::detect().has_quadword_atomics());
+        }
+        #[cfg(target_arch = "arm")]
+        {
+            debug_assert!(!super::has_kuser_cmpxchg64());
+        }
+    };
+}
+
+#[cold]
+pub(crate) unsafe fn atomic_load(src: *mut Udw, order: Ordering) -> Udw {
+    debug_assert_outline_atomics!();
+    #[allow(clippy::cast_ptr_alignment)]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        (*(src as *const AtomicUdw)).load(order)
+    }
+}
+fn_alias! {
+    #[cold]
+    pub(crate) unsafe fn(src: *mut Udw) -> Udw;
+    // fallback's atomic load has at least acquire semantics.
+    #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))]
+    atomic_load_non_seqcst = atomic_load(Ordering::Acquire);
+    atomic_load_seqcst = atomic_load(Ordering::SeqCst);
+}
+
+#[cold]
+pub(crate) unsafe fn atomic_store(dst: *mut Udw, val: Udw, order: Ordering) {
+    debug_assert_outline_atomics!();
+    #[allow(clippy::cast_ptr_alignment)]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        (*(dst as *const AtomicUdw)).store(val, order);
+    }
+}
+fn_alias! {
+    #[cold]
+    pub(crate) unsafe fn(dst: *mut Udw, val: Udw);
+    // fallback's atomic store has at least release semantics.
+    #[cfg(not(target_arch = "arm"))]
+    atomic_store_non_seqcst = atomic_store(Ordering::Release);
+    atomic_store_seqcst = atomic_store(Ordering::SeqCst);
+}
+
+#[cold]
+pub(crate) unsafe fn atomic_compare_exchange(
+    dst: *mut Udw,
+    old: Udw,
+    new: Udw,
+    success: Ordering,
+    failure: Ordering,
+) -> (Udw, bool) {
+    debug_assert_outline_atomics!();
+    #[allow(clippy::cast_ptr_alignment)]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match (*(dst as *const AtomicUdw)).compare_exchange(old, new, success, failure) {
+            Ok(v) => (v, true),
+            Err(v) => (v, false),
+        }
+    }
+}
+fn_alias! {
+    #[cold]
+    pub(crate) unsafe fn(dst: *mut Udw, old: Udw, new: Udw) -> (Udw, bool);
+    // fallback's atomic CAS has at least AcqRel semantics.
+    #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))]
+    atomic_compare_exchange_non_seqcst
+        = atomic_compare_exchange(Ordering::AcqRel, Ordering::Acquire);
+    atomic_compare_exchange_seqcst
+        = atomic_compare_exchange(Ordering::SeqCst, Ordering::SeqCst);
+}
+
+macro_rules! atomic_rmw_3 {
+    (
+        $name:ident($atomic_type:ident::$method_name:ident),
+        $non_seqcst_alias:ident, $seqcst_alias:ident
+    ) => {
+        #[cold]
+        pub(crate) unsafe fn $name(dst: *mut Udw, val: Udw, order: Ordering) -> Udw {
+            debug_assert_outline_atomics!();
+            #[allow(clippy::cast_ptr_alignment)]
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                (*(dst as *const $atomic_type)).$method_name(val as _, order) as Udw
+            }
+        }
+        fn_alias! {
+            #[cold]
+            pub(crate) unsafe fn(dst: *mut Udw, val: Udw) -> Udw;
+            // fallback's atomic RMW has at least AcqRel semantics.
+            #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))]
+            $non_seqcst_alias = $name(Ordering::AcqRel);
+            $seqcst_alias = $name(Ordering::SeqCst);
+        }
+    };
+}
+macro_rules! atomic_rmw_2 {
+    (
+        $name:ident($atomic_type:ident::$method_name:ident),
+        $non_seqcst_alias:ident, $seqcst_alias:ident
+    ) => {
+        #[cold]
+        pub(crate) unsafe fn $name(dst: *mut Udw, order: Ordering) -> Udw {
+            debug_assert_outline_atomics!();
+            #[allow(clippy::cast_ptr_alignment)]
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                (*(dst as *const $atomic_type)).$method_name(order) as Udw
+            }
+        }
+        fn_alias! {
+            #[cold]
+            pub(crate) unsafe fn(dst: *mut Udw) -> Udw;
+            // fallback's atomic RMW has at least AcqRel semantics.
+            #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))]
+            $non_seqcst_alias = $name(Ordering::AcqRel);
+            $seqcst_alias = $name(Ordering::SeqCst);
+        }
+    };
+}
+
+atomic_rmw_3!(atomic_swap(AtomicUdw::swap), atomic_swap_non_seqcst, atomic_swap_seqcst);
+atomic_rmw_3!(atomic_add(AtomicUdw::fetch_add), atomic_add_non_seqcst, atomic_add_seqcst);
+atomic_rmw_3!(atomic_sub(AtomicUdw::fetch_sub), atomic_sub_non_seqcst, atomic_sub_seqcst);
+atomic_rmw_3!(atomic_and(AtomicUdw::fetch_and), atomic_and_non_seqcst, atomic_and_seqcst);
+atomic_rmw_3!(atomic_nand(AtomicUdw::fetch_nand), atomic_nand_non_seqcst, atomic_nand_seqcst);
+atomic_rmw_3!(atomic_or(AtomicUdw::fetch_or), atomic_or_non_seqcst, atomic_or_seqcst);
+atomic_rmw_3!(atomic_xor(AtomicUdw::fetch_xor), atomic_xor_non_seqcst, atomic_xor_seqcst);
+atomic_rmw_3!(atomic_max(AtomicIdw::fetch_max), atomic_max_non_seqcst, atomic_max_seqcst);
+atomic_rmw_3!(atomic_umax(AtomicUdw::fetch_max), atomic_umax_non_seqcst, atomic_umax_seqcst);
+atomic_rmw_3!(atomic_min(AtomicIdw::fetch_min), atomic_min_non_seqcst, atomic_min_seqcst);
+atomic_rmw_3!(atomic_umin(AtomicUdw::fetch_min), atomic_umin_non_seqcst, atomic_umin_seqcst);
+
+atomic_rmw_2!(atomic_not(AtomicUdw::fetch_not), atomic_not_non_seqcst, atomic_not_seqcst);
+atomic_rmw_2!(atomic_neg(AtomicUdw::fetch_neg), atomic_neg_non_seqcst, atomic_neg_seqcst);
diff --git a/vendor/portable-atomic/src/imp/fallback/seq_lock.rs b/vendor/portable-atomic/src/imp/fallback/seq_lock.rs
new file mode 100644
index 0000000..fb6803f
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/fallback/seq_lock.rs
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock.rs.
+
+use core::{
+    mem::ManuallyDrop,
+    sync::atomic::{self, Ordering},
+};
+
+use super::utils::Backoff;
+
+// See mod.rs for details.
+#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))]
+pub(super) use core::sync::atomic::AtomicU64 as AtomicStamp;
+#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
+pub(super) use core::sync::atomic::AtomicUsize as AtomicStamp;
+#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
+pub(super) type Stamp = usize;
+#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))]
+pub(super) type Stamp = u64;
+
+// See mod.rs for details.
+pub(super) type AtomicChunk = AtomicStamp;
+pub(super) type Chunk = Stamp;
+
+/// A simple stamped lock.
+pub(super) struct SeqLock {
+    /// The current state of the lock.
+    ///
+    /// All bits except the least significant one hold the current stamp. When locked, the state
+    /// equals 1 and doesn't contain a valid stamp.
+    state: AtomicStamp,
+}
+
+impl SeqLock {
+    #[inline]
+    pub(super) const fn new() -> Self {
+        Self { state: AtomicStamp::new(0) }
+    }
+
+    /// If not locked, returns the current stamp.
+    ///
+    /// This method should be called before optimistic reads.
+    #[inline]
+    pub(super) fn optimistic_read(&self) -> Option<Stamp> {
+        let state = self.state.load(Ordering::Acquire);
+        if state == 1 {
+            None
+        } else {
+            Some(state)
+        }
+    }
+
+    /// Returns `true` if the current stamp is equal to `stamp`.
+    ///
+    /// This method should be called after optimistic reads to check whether they are valid. The
+    /// argument `stamp` should correspond to the one returned by method `optimistic_read`.
+    #[inline]
+    pub(super) fn validate_read(&self, stamp: Stamp) -> bool {
+        atomic::fence(Ordering::Acquire);
+        self.state.load(Ordering::Relaxed) == stamp
+    }
+
+    /// Grabs the lock for writing.
+    #[inline]
+    pub(super) fn write(&self) -> SeqLockWriteGuard<'_> {
+        let mut backoff = Backoff::new();
+        loop {
+            let previous = self.state.swap(1, Ordering::Acquire);
+
+            if previous != 1 {
+                atomic::fence(Ordering::Release);
+
+                return SeqLockWriteGuard { lock: self, state: previous };
+            }
+
+            while self.state.load(Ordering::Relaxed) == 1 {
+                backoff.snooze();
+            }
+        }
+    }
+}
+
+/// An RAII guard that releases the lock and increments the stamp when dropped.
+#[must_use]
+pub(super) struct SeqLockWriteGuard<'a> {
+    /// The parent lock.
+    lock: &'a SeqLock,
+
+    /// The stamp before locking.
+    state: Stamp,
+}
+
+impl SeqLockWriteGuard<'_> {
+    /// Releases the lock without incrementing the stamp.
+    #[inline]
+    pub(super) fn abort(self) {
+        // We specifically don't want to call drop(), since that's
+        // what increments the stamp.
+        let this = ManuallyDrop::new(self);
+
+        // Restore the stamp.
+        //
+        // Release ordering for synchronizing with `optimistic_read`.
+        this.lock.state.store(this.state, Ordering::Release);
+    }
+}
+
+impl Drop for SeqLockWriteGuard<'_> {
+    #[inline]
+    fn drop(&mut self) {
+        // Release the lock and increment the stamp.
+        //
+        // Release ordering for synchronizing with `optimistic_read`.
+        self.lock.state.store(self.state.wrapping_add(2), Ordering::Release);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::SeqLock;
+
+    #[test]
+    fn smoke() {
+        let lock = SeqLock::new();
+        let before = lock.optimistic_read().unwrap();
+        assert!(lock.validate_read(before));
+        {
+            let _guard = lock.write();
+        }
+        assert!(!lock.validate_read(before));
+        let after = lock.optimistic_read().unwrap();
+        assert_ne!(before, after);
+    }
+
+    #[test]
+    fn test_abort() {
+        let lock = SeqLock::new();
+        let before = lock.optimistic_read().unwrap();
+        {
+            let guard = lock.write();
+            guard.abort();
+        }
+        let after = lock.optimistic_read().unwrap();
+        assert_eq!(before, after, "aborted write does not update the stamp");
+    }
+}
diff --git a/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs b/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs
new file mode 100644
index 0000000..e12996f
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock_wide.rs.
+
+use core::{
+    mem::ManuallyDrop,
+    sync::atomic::{self, AtomicUsize, Ordering},
+};
+
+use super::utils::Backoff;
+
+// See mod.rs for details.
+pub(super) type AtomicChunk = AtomicUsize;
+pub(super) type Chunk = usize;
+
+/// A simple stamped lock.
+///
+/// The state is represented as two `AtomicUsize`: `state_hi` for high bits and `state_lo` for low
+/// bits.
+pub(super) struct SeqLock {
+    /// The high bits of the current state of the lock.
+    state_hi: AtomicUsize,
+
+    /// The low bits of the current state of the lock.
+    ///
+    /// All bits except the least significant one hold the current stamp. When locked, the state_lo
+    /// equals 1 and doesn't contain a valid stamp.
+    state_lo: AtomicUsize,
+}
+
+impl SeqLock {
+    #[inline]
+    pub(super) const fn new() -> Self {
+        Self { state_hi: AtomicUsize::new(0), state_lo: AtomicUsize::new(0) }
+    }
+
+    /// If not locked, returns the current stamp.
+    ///
+    /// This method should be called before optimistic reads.
+    #[inline]
+    pub(super) fn optimistic_read(&self) -> Option<(usize, usize)> {
+        // The acquire loads from `state_hi` and `state_lo` synchronize with the release stores in
+        // `SeqLockWriteGuard::drop` and `SeqLockWriteGuard::abort`.
+        //
+        // As a consequence, we can make sure that (1) all writes within the era of `state_hi - 1`
+        // happens before now; and therefore, (2) if `state_lo` is even, all writes within the
+        // critical section of (`state_hi`, `state_lo`) happens before now.
+        let state_hi = self.state_hi.load(Ordering::Acquire);
+        let state_lo = self.state_lo.load(Ordering::Acquire);
+        if state_lo == 1 {
+            None
+        } else {
+            Some((state_hi, state_lo))
+        }
+    }
+
+    /// Returns `true` if the current stamp is equal to `stamp`.
+    ///
+    /// This method should be called after optimistic reads to check whether they are valid. The
+    /// argument `stamp` should correspond to the one returned by method `optimistic_read`.
+    #[inline]
+    pub(super) fn validate_read(&self, stamp: (usize, usize)) -> bool {
+        // Thanks to the fence, if we're noticing any modification to the data at the critical
+        // section of `(stamp.0, stamp.1)`, then the critical section's write of 1 to state_lo should be
+        // visible.
+        atomic::fence(Ordering::Acquire);
+
+        // So if `state_lo` coincides with `stamp.1`, then either (1) we're noticing no modification
+        // to the data after the critical section of `(stamp.0, stamp.1)`, or (2) `state_lo` wrapped
+        // around.
+        //
+        // If (2) is the case, the acquire ordering ensures we see the new value of `state_hi`.
+        let state_lo = self.state_lo.load(Ordering::Acquire);
+
+        // If (2) is the case and `state_hi` coincides with `stamp.0`, then `state_hi` also wrapped
+        // around, which we give up to correctly validate the read.
+        let state_hi = self.state_hi.load(Ordering::Relaxed);
+
+        // Except for the case that both `state_hi` and `state_lo` wrapped around, the following
+        // condition implies that we're noticing no modification to the data after the critical
+        // section of `(stamp.0, stamp.1)`.
+        (state_hi, state_lo) == stamp
+    }
+
+    /// Grabs the lock for writing.
+    #[inline]
+    pub(super) fn write(&self) -> SeqLockWriteGuard<'_> {
+        let mut backoff = Backoff::new();
+        loop {
+            let previous = self.state_lo.swap(1, Ordering::Acquire);
+
+            if previous != 1 {
+                // To synchronize with the acquire fence in `validate_read` via any modification to
+                // the data at the critical section of `(state_hi, previous)`.
+                atomic::fence(Ordering::Release);
+
+                return SeqLockWriteGuard { lock: self, state_lo: previous };
+            }
+
+            while self.state_lo.load(Ordering::Relaxed) == 1 {
+                backoff.snooze();
+            }
+        }
+    }
+}
+
+/// An RAII guard that releases the lock and increments the stamp when dropped.
+#[must_use]
+pub(super) struct SeqLockWriteGuard<'a> {
+    /// The parent lock.
+    lock: &'a SeqLock,
+
+    /// The stamp before locking.
+    state_lo: usize,
+}
+
+impl SeqLockWriteGuard<'_> {
+    /// Releases the lock without incrementing the stamp.
+    #[inline]
+    pub(super) fn abort(self) {
+        // We specifically don't want to call drop(), since that's
+        // what increments the stamp.
+        let this = ManuallyDrop::new(self);
+
+        // Restore the stamp.
+        //
+        // Release ordering for synchronizing with `optimistic_read`.
+        this.lock.state_lo.store(this.state_lo, Ordering::Release);
+    }
+}
+
+impl Drop for SeqLockWriteGuard<'_> {
+    #[inline]
+    fn drop(&mut self) {
+        let state_lo = self.state_lo.wrapping_add(2);
+
+        // Increase the high bits if the low bits wrap around.
+        //
+        // Release ordering for synchronizing with `optimistic_read`.
+        if state_lo == 0 {
+            let state_hi = self.lock.state_hi.load(Ordering::Relaxed);
+            self.lock.state_hi.store(state_hi.wrapping_add(1), Ordering::Release);
+        }
+
+        // Release the lock and increment the stamp.
+        //
+        // Release ordering for synchronizing with `optimistic_read`.
+        self.lock.state_lo.store(state_lo, Ordering::Release);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::SeqLock;
+
+    #[test]
+    fn smoke() {
+        let lock = SeqLock::new();
+        let before = lock.optimistic_read().unwrap();
+        assert!(lock.validate_read(before));
+        {
+            let _guard = lock.write();
+        }
+        assert!(!lock.validate_read(before));
+        let after = lock.optimistic_read().unwrap();
+        assert_ne!(before, after);
+    }
+
+    #[test]
+    fn test_abort() {
+        let lock = SeqLock::new();
+        let before = lock.optimistic_read().unwrap();
+        {
+            let guard = lock.write();
+            guard.abort();
+        }
+        let after = lock.optimistic_read().unwrap();
+        assert_eq!(before, after, "aborted write does not update the stamp");
+    }
+}
diff --git a/vendor/portable-atomic/src/imp/fallback/utils.rs b/vendor/portable-atomic/src/imp/fallback/utils.rs
new file mode 100644
index 0000000..e8ed0ba
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/fallback/utils.rs
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+use core::ops;
+
+// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/9384f1eb2b356364e201ad38545e03c837d55f3a/crossbeam-utils/src/cache_padded.rs.
+/// Pads and aligns a value to the length of a cache line.
+// Starting from Intel's Sandy Bridge, spatial prefetcher is now pulling pairs of 64-byte cache
+// lines at a time, so we have to align to 128 bytes rather than 64.
+//
+// Sources:
+// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
+// - https://github.com/facebook/folly/blob/1b5288e6eea6df074758f877c849b6e73bbb9fbb/folly/lang/Align.h#L107
+//
+// ARM's big.LITTLE architecture has asymmetric cores and "big" cores have 128-byte cache line size.
+//
+// Sources:
+// - https://www.mono-project.com/news/2016/09/12/arm64-icache/
+//
+// powerpc64 has 128-byte cache line size.
+//
+// Sources:
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9
+// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/powerpc/include/asm/cache.h#L26
+#[cfg_attr(
+    any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64"),
+    repr(align(128))
+)]
+// arm, mips, mips64, sparc, and hexagon have 32-byte cache line size.
+//
+// Sources:
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_arm.go#L7
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips.go#L7
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mipsle.go#L7
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips64x.go#L9
+// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L17
+// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/hexagon/include/asm/cache.h#L12
+#[cfg_attr(
+    any(
+        target_arch = "arm",
+        target_arch = "mips",
+        target_arch = "mips32r6",
+        target_arch = "mips64",
+        target_arch = "mips64r6",
+        target_arch = "sparc",
+        target_arch = "hexagon",
+    ),
+    repr(align(32))
+)]
+// m68k has 16-byte cache line size.
+//
+// Sources:
+// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/m68k/include/asm/cache.h#L9
+#[cfg_attr(target_arch = "m68k", repr(align(16)))]
+// s390x has 256-byte cache line size.
+//
+// Sources:
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_s390x.go#L7
+// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/s390/include/asm/cache.h#L13
+#[cfg_attr(target_arch = "s390x", repr(align(256)))]
+// x86, wasm, riscv, and sparc64 have 64-byte cache line size.
+//
+// Sources:
+// - https://github.com/golang/go/blob/dda2991c2ea0c5914714469c4defc2562a907230/src/internal/cpu/cpu_x86.go#L9
+// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_wasm.go#L7
+// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/riscv/include/asm/cache.h#L10
+// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L19
+//
+// All others are assumed to have 64-byte cache line size.
+#[cfg_attr(
+    not(any(
+        target_arch = "x86_64",
+        target_arch = "aarch64",
+        target_arch = "powerpc64",
+        target_arch = "arm",
+        target_arch = "mips",
+        target_arch = "mips32r6",
+        target_arch = "mips64",
+        target_arch = "mips64r6",
+        target_arch = "sparc",
+        target_arch = "hexagon",
+        target_arch = "m68k",
+        target_arch = "s390x",
+    )),
+    repr(align(64))
+)]
+pub(crate) struct CachePadded<T> {
+    value: T,
+}
+
+impl<T> CachePadded<T> {
+    #[inline]
+    pub(crate) const fn new(value: T) -> Self {
+        Self { value }
+    }
+}
+
+impl<T> ops::Deref for CachePadded<T> {
+    type Target = T;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.value
+    }
+}
+
+// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/backoff.rs.
+// Adjusted to reduce spinning.
+/// Performs exponential backoff in spin loops.
+pub(crate) struct Backoff {
+    step: u32,
+}
+
+// https://github.com/oneapi-src/oneTBB/blob/v2021.5.0/include/oneapi/tbb/detail/_utils.h#L46-L48
+const SPIN_LIMIT: u32 = 4;
+
+impl Backoff {
+    #[inline]
+    pub(crate) const fn new() -> Self {
+        Self { step: 0 }
+    }
+
+    #[inline]
+    pub(crate) fn snooze(&mut self) {
+        if self.step <= SPIN_LIMIT {
+            for _ in 0..1 << self.step {
+                #[allow(deprecated)]
+                core::sync::atomic::spin_loop_hint();
+            }
+            self.step += 1;
+        } else {
+            #[cfg(not(feature = "std"))]
+            for _ in 0..1 << self.step {
+                #[allow(deprecated)]
+                core::sync::atomic::spin_loop_hint();
+            }
+
+            #[cfg(feature = "std")]
+            std::thread::yield_now();
+        }
+    }
+}
diff --git a/vendor/portable-atomic/src/imp/float.rs b/vendor/portable-atomic/src/imp/float.rs
new file mode 100644
index 0000000..965f983
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/float.rs
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// AtomicF{32,64} implementation based on AtomicU{32,64}.
+//
+// This module provides atomic float implementations using atomic integer.
+//
+// Note that most of `fetch_*` operations of atomic floats are implemented using
+// CAS loops, which can be slower than equivalent operations of atomic integers.
+//
+// GPU targets have atomic instructions for float, so GPU targets will use
+// architecture-specific implementations instead of this implementation in the
+// future: https://github.com/taiki-e/portable-atomic/issues/34
+
+#![cfg_attr(
+    all(target_pointer_width = "16", not(feature = "fallback")),
+    allow(unused_imports, unused_macros)
+)]
+
+use core::{cell::UnsafeCell, sync::atomic::Ordering};
+
+macro_rules! atomic_float {
+    (
+        $atomic_type:ident, $float_type:ident, $atomic_int_type:ident, $int_type:ident,
+        $align:literal
+    ) => {
+        #[repr(C, align($align))]
+        pub(crate) struct $atomic_type {
+            v: UnsafeCell<$float_type>,
+        }
+
+        // Send is implicitly implemented.
+        // SAFETY: any data races are prevented by atomic operations.
+        unsafe impl Sync for $atomic_type {}
+
+        impl $atomic_type {
+            #[inline]
+            pub(crate) const fn new(v: $float_type) -> Self {
+                Self { v: UnsafeCell::new(v) }
+            }
+
+            #[inline]
+            pub(crate) fn is_lock_free() -> bool {
+                crate::$atomic_int_type::is_lock_free()
+            }
+            #[inline]
+            pub(crate) const fn is_always_lock_free() -> bool {
+                crate::$atomic_int_type::is_always_lock_free()
+            }
+
+            #[inline]
+            pub(crate) fn get_mut(&mut self) -> &mut $float_type {
+                // SAFETY: the mutable reference guarantees unique ownership.
+                // (UnsafeCell::get_mut requires Rust 1.50)
+                unsafe { &mut *self.v.get() }
+            }
+
+            #[inline]
+            pub(crate) fn into_inner(self) -> $float_type {
+                self.v.into_inner()
+            }
+
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn load(&self, order: Ordering) -> $float_type {
+                $float_type::from_bits(self.as_bits().load(order))
+            }
+
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn store(&self, val: $float_type, order: Ordering) {
+                self.as_bits().store(val.to_bits(), order)
+            }
+
+            const_fn! {
+                const_if: #[cfg(not(portable_atomic_no_const_raw_ptr_deref))];
+                #[inline]
+                pub(crate) const fn as_bits(&self) -> &crate::$atomic_int_type {
+                    // SAFETY: $atomic_type and $atomic_int_type have the same layout,
+                    // and there is no concurrent access to the value that does not go through this method.
+                    unsafe { &*(self as *const Self as *const crate::$atomic_int_type) }
+                }
+            }
+
+            #[inline]
+            pub(crate) const fn as_ptr(&self) -> *mut $float_type {
+                self.v.get()
+            }
+        }
+
+        cfg_has_atomic_cas! {
+        impl $atomic_type {
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn swap(&self, val: $float_type, order: Ordering) -> $float_type {
+                $float_type::from_bits(self.as_bits().swap(val.to_bits(), order))
+            }
+
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn compare_exchange(
+                &self,
+                current: $float_type,
+                new: $float_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$float_type, $float_type> {
+                match self.as_bits().compare_exchange(
+                    current.to_bits(),
+                    new.to_bits(),
+                    success,
+                    failure,
+                ) {
+                    Ok(v) => Ok($float_type::from_bits(v)),
+                    Err(v) => Err($float_type::from_bits(v)),
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn compare_exchange_weak(
+                &self,
+                current: $float_type,
+                new: $float_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$float_type, $float_type> {
+                match self.as_bits().compare_exchange_weak(
+                    current.to_bits(),
+                    new.to_bits(),
+                    success,
+                    failure,
+                ) {
+                    Ok(v) => Ok($float_type::from_bits(v)),
+                    Err(v) => Err($float_type::from_bits(v)),
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_add(&self, val: $float_type, order: Ordering) -> $float_type {
+                self.fetch_update_(order, |x| x + val)
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_sub(&self, val: $float_type, order: Ordering) -> $float_type {
+                self.fetch_update_(order, |x| x - val)
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            fn fetch_update_<F>(&self, order: Ordering, mut f: F) -> $float_type
+            where
+                F: FnMut($float_type) -> $float_type,
+            {
+                // This is a private function and all instances of `f` only operate on the value
+                // loaded, so there is no need to synchronize the first load/failed CAS.
+                let mut prev = self.load(Ordering::Relaxed);
+                loop {
+                    let next = f(prev);
+                    match self.compare_exchange_weak(prev, next, order, Ordering::Relaxed) {
+                        Ok(x) => return x,
+                        Err(next_prev) => prev = next_prev,
+                    }
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_max(&self, val: $float_type, order: Ordering) -> $float_type {
+                self.fetch_update_(order, |x| x.max(val))
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_min(&self, val: $float_type, order: Ordering) -> $float_type {
+                self.fetch_update_(order, |x| x.min(val))
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_neg(&self, order: Ordering) -> $float_type {
+                const NEG_MASK: $int_type = !0 / 2 + 1;
+                $float_type::from_bits(self.as_bits().fetch_xor(NEG_MASK, order))
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_abs(&self, order: Ordering) -> $float_type {
+                const ABS_MASK: $int_type = !0 / 2;
+                $float_type::from_bits(self.as_bits().fetch_and(ABS_MASK, order))
+            }
+        }
+        } // cfg_has_atomic_cas!
+    };
+}
+
+cfg_has_atomic_32! {
+    atomic_float!(AtomicF32, f32, AtomicU32, u32, 4);
+}
+cfg_has_atomic_64! {
+    atomic_float!(AtomicF64, f64, AtomicU64, u64, 8);
+}
diff --git a/vendor/portable-atomic/src/imp/interrupt/README.md b/vendor/portable-atomic/src/imp/interrupt/README.md
new file mode 100644
index 0000000..edc5fbf
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/interrupt/README.md
@@ -0,0 +1,27 @@
+# Implementation of disabling interrupts
+
+This module is used to provide atomic CAS for targets where atomic CAS is not available in the standard library.
+
+- On MSP430 and AVR, they are always single-core, so this module is always used.
+- On ARMv6-M (thumbv6m), pre-v6 ARM (e.g., thumbv4t, thumbv5te), RISC-V without A-extension, and Xtensa, they could be multi-core, so this module is used when the `unsafe-assume-single-core` feature is enabled.
+
+The implementation uses privileged instructions to disable interrupts, so it usually doesn't work on unprivileged mode.
+Enabling this feature in an environment where privileged instructions are not available, or if the instructions used are not sufficient to disable interrupts in the system, it is also usually considered **unsound**, although the details are system-dependent.
+
+Consider using the [`critical-section` feature](../../../README.md#optional-features-critical-section) for systems that cannot use the `unsafe-assume-single-core` feature.
+
+For some targets, the implementation can be changed by explicitly enabling features.
+
+- On ARMv6-M, this disables interrupts by modifying the PRIMASK register.
+- On pre-v6 ARM, this disables interrupts by modifying the I (IRQ mask) bit of the CPSR.
+- On pre-v6 ARM with the `disable-fiq` feature, this disables interrupts by modifying the I (IRQ mask) bit and F (FIQ mask) bit of the CPSR.
+- On RISC-V (without A-extension), this disables interrupts by modifying the MIE (Machine Interrupt Enable) bit of the `mstatus` register.
+- On RISC-V (without A-extension) with the `s-mode` feature, this disables interrupts by modifying the SIE (Supervisor Interrupt Enable) bit of the `sstatus` register.
+- On RISC-V (without A-extension) with the `force-amo` feature, this uses AMO instructions for RMWs that have corresponding AMO instructions even if A-extension is disabled. For other RMWs, this disables interrupts as usual.
+- On MSP430, this disables interrupts by modifying the GIE (Global Interrupt Enable) bit of the status register (SR).
+- On AVR, this disables interrupts by modifying the I (Global Interrupt Enable) bit of the status register (SREG).
+- On Xtensa, this disables interrupts by modifying the PS special register.
+
+Some operations don't require disabling interrupts (loads and stores on targets except for AVR, but additionally on MSP430 {8,16}-bit `add,sub,and,or,xor,not`, on RISC-V with the `force-amo` feature 32-bit(RV32)/{32,64}-bit(RV64) `swap,fetch_{add,sub,and,or,xor,not,max,min},add,sub,and,or,xor,not` and {8,16}-bit `fetch_{and,or,xor,not},and,or,xor,not`). However, when the `critical-section` feature is enabled, critical sections are taken for all atomic operations.
+
+Feel free to submit an issue if your target is not supported yet.
diff --git a/vendor/portable-atomic/src/imp/interrupt/armv4t.rs b/vendor/portable-atomic/src/imp/interrupt/armv4t.rs
new file mode 100644
index 0000000..20f7089
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/interrupt/armv4t.rs
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Refs: https://developer.arm.com/documentation/ddi0406/cb/System-Level-Architecture/The-System-Level-Programmers--Model/ARM-processor-modes-and-ARM-core-registers/Program-Status-Registers--PSRs-?lang=en
+//
+// Generated asm:
+// - armv5te https://godbolt.org/z/Teh7WajMs
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+
+// - 0x80 - I (IRQ mask) bit (1 << 7)
+// - 0x40 - F (FIQ mask) bit (1 << 6)
+// We disable only IRQs by default. See also https://github.com/taiki-e/portable-atomic/pull/28#issuecomment-1214146912.
+#[cfg(not(portable_atomic_disable_fiq))]
+macro_rules! mask {
+    () => {
+        "0x80"
+    };
+}
+#[cfg(portable_atomic_disable_fiq)]
+macro_rules! mask {
+    () => {
+        "0xC0" // 0x80 | 0x40
+    };
+}
+
+pub(super) type State = u32;
+
+/// Disables interrupts and returns the previous interrupt state.
+#[inline]
+#[instruction_set(arm::a32)]
+pub(super) fn disable() -> State {
+    let cpsr: State;
+    // SAFETY: reading CPSR and disabling interrupts are safe.
+    // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
+    unsafe {
+        asm!(
+            "mrs {prev}, cpsr",
+            concat!("orr {new}, {prev}, ", mask!()),
+            "msr cpsr_c, {new}",
+            prev = out(reg) cpsr,
+            new = out(reg) _,
+            // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled.
+            options(nostack, preserves_flags),
+        );
+    }
+    cpsr
+}
+
+/// Restores the previous interrupt state.
+///
+/// # Safety
+///
+/// The state must be the one retrieved by the previous `disable`.
+#[inline]
+#[instruction_set(arm::a32)]
+pub(super) unsafe fn restore(cpsr: State) {
+    // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
+    //
+    // This clobbers the control field mask byte of CPSR. See msp430.rs to safety on this.
+    // (preserves_flags is fine because we only clobber the I, F, T, and M bits of CPSR.)
+    //
+    // Refs: https://developer.arm.com/documentation/dui0473/m/arm-and-thumb-instructions/msr--general-purpose-register-to-psr-
+    unsafe {
+        // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
+        asm!("msr cpsr_c, {0}", in(reg) cpsr, options(nostack, preserves_flags));
+    }
+}
+
+// On pre-v6 ARM, we cannot use core::sync::atomic here because they call the
+// `__sync_*` builtins for non-relaxed load/store (because pre-v6 ARM doesn't
+// have Data Memory Barrier).
+//
+// Generated asm:
+// - armv5te https://godbolt.org/z/bMxK7M8Ta
+pub(crate) mod atomic {
+    #[cfg(not(portable_atomic_no_asm))]
+    use core::arch::asm;
+    use core::{cell::UnsafeCell, sync::atomic::Ordering};
+
+    macro_rules! atomic {
+        ($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => {
+            #[repr(transparent)]
+            pub(crate) struct $atomic_type $(<$($generics)*>)? {
+                v: UnsafeCell<$value_type>,
+            }
+
+            // Send is implicitly implemented for atomic integers, but not for atomic pointers.
+            // SAFETY: any data races are prevented by atomic operations.
+            unsafe impl $(<$($generics)*>)? Send for $atomic_type $(<$($generics)*>)? {}
+            // SAFETY: any data races are prevented by atomic operations.
+            unsafe impl $(<$($generics)*>)? Sync for $atomic_type $(<$($generics)*>)? {}
+
+            impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
+                #[inline]
+                pub(crate) fn load(&self, order: Ordering) -> $value_type {
+                    let src = self.v.get();
+                    // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                    // pointer passed in is valid because we got it from a reference.
+                    unsafe {
+                        let out;
+                        match order {
+                            Ordering::Relaxed => {
+                                asm!(
+                                    concat!("ldr", $asm_suffix, " {out}, [{src}]"),
+                                    src = in(reg) src,
+                                    out = lateout(reg) out,
+                                    options(nostack, preserves_flags, readonly),
+                                );
+                            }
+                            Ordering::Acquire | Ordering::SeqCst => {
+                                // inline asm without nomem/readonly implies compiler fence.
+                                // And compiler fence is fine because the user explicitly declares that
+                                // the system is single-core by using an unsafe cfg.
+                                asm!(
+                                    concat!("ldr", $asm_suffix, " {out}, [{src}]"),
+                                    src = in(reg) src,
+                                    out = lateout(reg) out,
+                                    options(nostack, preserves_flags),
+                                );
+                            }
+                            _ => unreachable!("{:?}", order),
+                        }
+                        out
+                    }
+                }
+
+                #[inline]
+                pub(crate) fn store(&self, val: $value_type, _order: Ordering) {
+                    let dst = self.v.get();
+                    // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                    // pointer passed in is valid because we got it from a reference.
+                    unsafe {
+                        // inline asm without nomem/readonly implies compiler fence.
+                        // And compiler fence is fine because the user explicitly declares that
+                        // the system is single-core by using an unsafe cfg.
+                        asm!(
+                            concat!("str", $asm_suffix, " {val}, [{dst}]"),
+                            dst = in(reg) dst,
+                            val = in(reg) val,
+                            options(nostack, preserves_flags),
+                        );
+                    }
+                }
+            }
+        };
+    }
+
+    atomic!(AtomicI8, i8, "b");
+    atomic!(AtomicU8, u8, "b");
+    atomic!(AtomicI16, i16, "h");
+    atomic!(AtomicU16, u16, "h");
+    atomic!(AtomicI32, i32, "");
+    atomic!(AtomicU32, u32, "");
+    atomic!(AtomicIsize, isize, "");
+    atomic!(AtomicUsize, usize, "");
+    atomic!([T] AtomicPtr, *mut T, "");
+}
diff --git a/vendor/portable-atomic/src/imp/interrupt/armv6m.rs b/vendor/portable-atomic/src/imp/interrupt/armv6m.rs
new file mode 100644
index 0000000..85037a3
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/interrupt/armv6m.rs
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Adapted from https://github.com/rust-embedded/cortex-m.
+//
+// Generated asm:
+// - armv6-m https://godbolt.org/z/YxME38xcM
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+
+pub(super) use core::sync::atomic;
+
+pub(super) type State = u32;
+
+/// Disables interrupts and returns the previous interrupt state.
+#[inline]
+pub(super) fn disable() -> State {
+    let r: State;
+    // SAFETY: reading the priority mask register and disabling interrupts are safe.
+    // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
+    unsafe {
+        // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled.
+        asm!(
+            "mrs {0}, PRIMASK",
+            "cpsid i",
+            out(reg) r,
+            options(nostack, preserves_flags),
+        );
+    }
+    r
+}
+
+/// Restores the previous interrupt state.
+///
+/// # Safety
+///
+/// The state must be the one retrieved by the previous `disable`.
+#[inline]
+pub(super) unsafe fn restore(r: State) {
+    if r & 0x1 == 0 {
+        // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
+        // and we've checked that interrupts were enabled before disabling interrupts.
+        unsafe {
+            // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
+            asm!("cpsie i", options(nostack, preserves_flags));
+        }
+    }
+}
diff --git a/vendor/portable-atomic/src/imp/interrupt/avr.rs b/vendor/portable-atomic/src/imp/interrupt/avr.rs
new file mode 100644
index 0000000..76d99c1
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/interrupt/avr.rs
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Adapted from https://github.com/Rahix/avr-device.
+//
+// Refs:
+// - AVR Instruction Set Manual https://ww1.microchip.com/downloads/en/DeviceDoc/AVR-InstructionSet-Manual-DS40002198.pdf
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+
+pub(super) type State = u8;
+
+/// Disables interrupts and returns the previous interrupt state.
+#[inline]
+pub(super) fn disable() -> State {
+    let sreg: State;
+    // SAFETY: reading the status register (SREG) and disabling interrupts are safe.
+    // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
+    unsafe {
+        // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled.
+        // Do not use `preserves_flags` because CLI modifies the I bit of the status register (SREG).
+        // Refs: https://ww1.microchip.com/downloads/en/DeviceDoc/AVR-InstructionSet-Manual-DS40002198.pdf#page=58
+        #[cfg(not(portable_atomic_no_asm))]
+        asm!(
+            "in {0}, 0x3F",
+            "cli",
+            out(reg) sreg,
+            options(nostack),
+        );
+        #[cfg(portable_atomic_no_asm)]
+        {
+            llvm_asm!("in $0, 0x3F" : "=r"(sreg) ::: "volatile");
+            llvm_asm!("cli" ::: "memory" : "volatile");
+        }
+    }
+    sreg
+}
+
+/// Restores the previous interrupt state.
+///
+/// # Safety
+///
+/// The state must be the one retrieved by the previous `disable`.
+#[inline]
+pub(super) unsafe fn restore(sreg: State) {
+    // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
+    unsafe {
+        // This clobbers the entire status register. See msp430.rs to safety on this.
+        //
+        // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
+        // Do not use `preserves_flags` because OUT modifies the status register (SREG).
+        #[cfg(not(portable_atomic_no_asm))]
+        asm!("out 0x3F, {0}", in(reg) sreg, options(nostack));
+        #[cfg(portable_atomic_no_asm)]
+        llvm_asm!("out 0x3F, $0" :: "r"(sreg) : "memory" : "volatile");
+    }
+}
diff --git a/vendor/portable-atomic/src/imp/interrupt/mod.rs b/vendor/portable-atomic/src/imp/interrupt/mod.rs
new file mode 100644
index 0000000..e0ed0f6
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/interrupt/mod.rs
@@ -0,0 +1,903 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Critical section based fallback implementations
+//
+// This module supports two different critical section implementations:
+// - Built-in "disable all interrupts".
+// - Call into the `critical-section` crate (which allows the user to plug any implementation).
+//
+// The `critical-section`-based fallback is enabled when the user asks for it with the `critical-section`
+// Cargo feature.
+//
+// The "disable interrupts" fallback is not sound on multi-core systems.
+// Also, this uses privileged instructions to disable interrupts, so it usually
+// doesn't work on unprivileged mode. Using this fallback in an environment where privileged
+// instructions are not available is also usually considered **unsound**,
+// although the details are system-dependent.
+//
+// Therefore, this implementation will only be enabled in one of the following cases:
+//
+// - When the user explicitly declares that the system is single-core and that
+//   privileged instructions are available using an unsafe cfg.
+// - When we can safely assume that the system is single-core and that
+//   privileged instructions are available on the system.
+//
+// AVR, which is single core[^avr1] and LLVM also generates code that disables
+// interrupts [^avr2] in atomic ops by default, is considered the latter.
+// MSP430 as well.
+//
+// See also README.md of this directory.
+//
+// [^avr1]: https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp#L1074
+// [^avr2]: https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/AVR/atomics/load16.ll#L5
+
+// On some platforms, atomic load/store can be implemented in a more efficient
+// way than disabling interrupts. On MSP430, some RMWs that do not return the
+// previous value can also be optimized.
+//
+// Note: On single-core systems, it is okay to use critical session-based
+// CAS together with atomic load/store. The load/store will not be
+// called while interrupts are disabled, and since the load/store is
+// atomic, it is not affected by interrupts even if interrupts are enabled.
+#[cfg(not(any(target_arch = "avr", feature = "critical-section")))]
+use arch::atomic;
+
+#[cfg(not(feature = "critical-section"))]
+#[cfg_attr(
+    all(
+        target_arch = "arm",
+        any(target_feature = "mclass", portable_atomic_target_feature = "mclass"),
+    ),
+    path = "armv6m.rs"
+)]
+#[cfg_attr(
+    all(
+        target_arch = "arm",
+        not(any(target_feature = "mclass", portable_atomic_target_feature = "mclass")),
+    ),
+    path = "armv4t.rs"
+)]
+#[cfg_attr(target_arch = "avr", path = "avr.rs")]
+#[cfg_attr(target_arch = "msp430", path = "msp430.rs")]
+#[cfg_attr(any(target_arch = "riscv32", target_arch = "riscv64"), path = "riscv.rs")]
+#[cfg_attr(target_arch = "xtensa", path = "xtensa.rs")]
+mod arch;
+
+use core::{cell::UnsafeCell, sync::atomic::Ordering};
+
+// Critical section implementations might use locks internally.
+#[cfg(feature = "critical-section")]
+const IS_ALWAYS_LOCK_FREE: bool = false;
+
+// Consider atomic operations based on disabling interrupts on single-core
+// systems are lock-free. (We consider the pre-v6 ARM Linux's atomic operations
+// provided in a similar way by the Linux kernel to be lock-free.)
+#[cfg(not(feature = "critical-section"))]
+const IS_ALWAYS_LOCK_FREE: bool = true;
+
+#[cfg(feature = "critical-section")]
+#[inline]
+fn with<F, R>(f: F) -> R
+where
+    F: FnOnce() -> R,
+{
+    critical_section::with(|_| f())
+}
+
+#[cfg(not(feature = "critical-section"))]
+#[inline]
+fn with<F, R>(f: F) -> R
+where
+    F: FnOnce() -> R,
+{
+    // Get current interrupt state and disable interrupts
+    let state = arch::disable();
+
+    let r = f();
+
+    // Restore interrupt state
+    // SAFETY: the state was retrieved by the previous `disable`.
+    unsafe { arch::restore(state) }
+
+    r
+}
+
+#[cfg_attr(target_pointer_width = "16", repr(C, align(2)))]
+#[cfg_attr(target_pointer_width = "32", repr(C, align(4)))]
+#[cfg_attr(target_pointer_width = "64", repr(C, align(8)))]
+#[cfg_attr(target_pointer_width = "128", repr(C, align(16)))]
+pub(crate) struct AtomicPtr<T> {
+    p: UnsafeCell<*mut T>,
+}
+
+// SAFETY: any data races are prevented by disabling interrupts or
+// atomic intrinsics (see module-level comments).
+unsafe impl<T> Send for AtomicPtr<T> {}
+// SAFETY: any data races are prevented by disabling interrupts or
+// atomic intrinsics (see module-level comments).
+unsafe impl<T> Sync for AtomicPtr<T> {}
+
+impl<T> AtomicPtr<T> {
+    #[inline]
+    pub(crate) const fn new(p: *mut T) -> Self {
+        Self { p: UnsafeCell::new(p) }
+    }
+
+    #[inline]
+    pub(crate) fn is_lock_free() -> bool {
+        Self::is_always_lock_free()
+    }
+    #[inline]
+    pub(crate) const fn is_always_lock_free() -> bool {
+        IS_ALWAYS_LOCK_FREE
+    }
+
+    #[inline]
+    pub(crate) fn get_mut(&mut self) -> &mut *mut T {
+        // SAFETY: the mutable reference guarantees unique ownership.
+        // (UnsafeCell::get_mut requires Rust 1.50)
+        unsafe { &mut *self.p.get() }
+    }
+
+    #[inline]
+    pub(crate) fn into_inner(self) -> *mut T {
+        self.p.into_inner()
+    }
+
+    #[inline]
+    #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+    pub(crate) fn load(&self, order: Ordering) -> *mut T {
+        crate::utils::assert_load_ordering(order);
+        #[cfg(not(any(target_arch = "avr", feature = "critical-section")))]
+        {
+            self.as_native().load(order)
+        }
+        #[cfg(any(target_arch = "avr", feature = "critical-section"))]
+        // SAFETY: any data races are prevented by disabling interrupts (see
+        // module-level comments) and the raw pointer is valid because we got it
+        // from a reference.
+        with(|| unsafe { self.p.get().read() })
+    }
+
+    #[inline]
+    #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+    pub(crate) fn store(&self, ptr: *mut T, order: Ordering) {
+        crate::utils::assert_store_ordering(order);
+        #[cfg(not(any(target_arch = "avr", feature = "critical-section")))]
+        {
+            self.as_native().store(ptr, order);
+        }
+        #[cfg(any(target_arch = "avr", feature = "critical-section"))]
+        // SAFETY: any data races are prevented by disabling interrupts (see
+        // module-level comments) and the raw pointer is valid because we got it
+        // from a reference.
+        with(|| unsafe { self.p.get().write(ptr) });
+    }
+
+    #[inline]
+    pub(crate) fn swap(&self, ptr: *mut T, order: Ordering) -> *mut T {
+        let _ = order;
+        #[cfg(portable_atomic_force_amo)]
+        {
+            self.as_native().swap(ptr, order)
+        }
+        #[cfg(not(portable_atomic_force_amo))]
+        // SAFETY: any data races are prevented by disabling interrupts (see
+        // module-level comments) and the raw pointer is valid because we got it
+        // from a reference.
+        with(|| unsafe {
+            let prev = self.p.get().read();
+            self.p.get().write(ptr);
+            prev
+        })
+    }
+
+    #[inline]
+    #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+    pub(crate) fn compare_exchange(
+        &self,
+        current: *mut T,
+        new: *mut T,
+        success: Ordering,
+        failure: Ordering,
+    ) -> Result<*mut T, *mut T> {
+        crate::utils::assert_compare_exchange_ordering(success, failure);
+        // SAFETY: any data races are prevented by disabling interrupts (see
+        // module-level comments) and the raw pointer is valid because we got it
+        // from a reference.
+        with(|| unsafe {
+            let prev = self.p.get().read();
+            if prev == current {
+                self.p.get().write(new);
+                Ok(prev)
+            } else {
+                Err(prev)
+            }
+        })
+    }
+
+    #[inline]
+    #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+    pub(crate) fn compare_exchange_weak(
+        &self,
+        current: *mut T,
+        new: *mut T,
+        success: Ordering,
+        failure: Ordering,
+    ) -> Result<*mut T, *mut T> {
+        self.compare_exchange(current, new, success, failure)
+    }
+
+    #[inline]
+    pub(crate) const fn as_ptr(&self) -> *mut *mut T {
+        self.p.get()
+    }
+
+    #[cfg(not(any(target_arch = "avr", feature = "critical-section")))]
+    #[inline]
+    fn as_native(&self) -> &atomic::AtomicPtr<T> {
+        // SAFETY: AtomicPtr and atomic::AtomicPtr have the same layout and
+        // guarantee atomicity in a compatible way. (see module-level comments)
+        unsafe { &*(self as *const Self as *const atomic::AtomicPtr<T>) }
+    }
+}
+
+macro_rules! atomic_int {
+    (base, $atomic_type:ident, $int_type:ident, $align:literal) => {
+        #[repr(C, align($align))]
+        pub(crate) struct $atomic_type {
+            v: UnsafeCell<$int_type>,
+        }
+
+        // Send is implicitly implemented.
+        // SAFETY: any data races are prevented by disabling interrupts or
+        // atomic intrinsics (see module-level comments).
+        unsafe impl Sync for $atomic_type {}
+
+        impl $atomic_type {
+            #[inline]
+            pub(crate) const fn new(v: $int_type) -> Self {
+                Self { v: UnsafeCell::new(v) }
+            }
+
+            #[inline]
+            pub(crate) fn is_lock_free() -> bool {
+                Self::is_always_lock_free()
+            }
+            #[inline]
+            pub(crate) const fn is_always_lock_free() -> bool {
+                IS_ALWAYS_LOCK_FREE
+            }
+
+            #[inline]
+            pub(crate) fn get_mut(&mut self) -> &mut $int_type {
+                // SAFETY: the mutable reference guarantees unique ownership.
+                // (UnsafeCell::get_mut requires Rust 1.50)
+                unsafe { &mut *self.v.get() }
+            }
+
+            #[inline]
+            pub(crate) fn into_inner(self) -> $int_type {
+                self.v.into_inner()
+            }
+
+            #[inline]
+            pub(crate) const fn as_ptr(&self) -> *mut $int_type {
+                self.v.get()
+            }
+        }
+    };
+    (load_store_atomic $([$kind:ident])?, $atomic_type:ident, $int_type:ident, $align:literal) => {
+        atomic_int!(base, $atomic_type, $int_type, $align);
+        #[cfg(not(portable_atomic_force_amo))]
+        atomic_int!(cas[emulate], $atomic_type, $int_type);
+        #[cfg(portable_atomic_force_amo)]
+        atomic_int!(cas $([$kind])?, $atomic_type, $int_type);
+        impl $atomic_type {
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn load(&self, order: Ordering) -> $int_type {
+                crate::utils::assert_load_ordering(order);
+                #[cfg(not(any(target_arch = "avr", feature = "critical-section")))]
+                {
+                    self.as_native().load(order)
+                }
+                #[cfg(any(target_arch = "avr", feature = "critical-section"))]
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe { self.v.get().read() })
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn store(&self, val: $int_type, order: Ordering) {
+                crate::utils::assert_store_ordering(order);
+                #[cfg(not(any(target_arch = "avr", feature = "critical-section")))]
+                {
+                    self.as_native().store(val, order);
+                }
+                #[cfg(any(target_arch = "avr", feature = "critical-section"))]
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe { self.v.get().write(val) });
+            }
+
+            #[cfg(not(any(target_arch = "avr", feature = "critical-section")))]
+            #[inline]
+            fn as_native(&self) -> &atomic::$atomic_type {
+                // SAFETY: $atomic_type and atomic::$atomic_type have the same layout and
+                // guarantee atomicity in a compatible way. (see module-level comments)
+                unsafe { &*(self as *const Self as *const atomic::$atomic_type) }
+            }
+        }
+
+        #[cfg(not(all(target_arch = "msp430", not(feature = "critical-section"))))]
+        impl_default_no_fetch_ops!($atomic_type, $int_type);
+        impl_default_bit_opts!($atomic_type, $int_type);
+        #[cfg(not(all(target_arch = "msp430", not(feature = "critical-section"))))]
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn not(&self, order: Ordering) {
+                self.fetch_not(order);
+            }
+        }
+        #[cfg(all(target_arch = "msp430", not(feature = "critical-section")))]
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn add(&self, val: $int_type, order: Ordering) {
+                self.as_native().add(val, order);
+            }
+            #[inline]
+            pub(crate) fn sub(&self, val: $int_type, order: Ordering) {
+                self.as_native().sub(val, order);
+            }
+            #[inline]
+            pub(crate) fn and(&self, val: $int_type, order: Ordering) {
+                self.as_native().and(val, order);
+            }
+            #[inline]
+            pub(crate) fn or(&self, val: $int_type, order: Ordering) {
+                self.as_native().or(val, order);
+            }
+            #[inline]
+            pub(crate) fn xor(&self, val: $int_type, order: Ordering) {
+                self.as_native().xor(val, order);
+            }
+            #[inline]
+            pub(crate) fn not(&self, order: Ordering) {
+                self.as_native().not(order);
+            }
+        }
+    };
+    (load_store_critical_session, $atomic_type:ident, $int_type:ident, $align:literal) => {
+        atomic_int!(base, $atomic_type, $int_type, $align);
+        atomic_int!(cas[emulate], $atomic_type, $int_type);
+        impl_default_no_fetch_ops!($atomic_type, $int_type);
+        impl_default_bit_opts!($atomic_type, $int_type);
+        impl $atomic_type {
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn load(&self, order: Ordering) -> $int_type {
+                crate::utils::assert_load_ordering(order);
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe { self.v.get().read() })
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn store(&self, val: $int_type, order: Ordering) {
+                crate::utils::assert_store_ordering(order);
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe { self.v.get().write(val) });
+            }
+
+            #[inline]
+            pub(crate) fn not(&self, order: Ordering) {
+                self.fetch_not(order);
+            }
+        }
+    };
+    (cas[emulate], $atomic_type:ident, $int_type:ident) => {
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(val);
+                    prev
+                })
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                crate::utils::assert_compare_exchange_ordering(success, failure);
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    if prev == current {
+                        self.v.get().write(new);
+                        Ok(prev)
+                    } else {
+                        Err(prev)
+                    }
+                })
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange_weak(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                self.compare_exchange(current, new, success, failure)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev.wrapping_add(val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev.wrapping_sub(val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev & val);
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(!(prev & val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev | val);
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev ^ val);
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(core::cmp::max(prev, val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(core::cmp::min(prev, val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(!prev);
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev.wrapping_neg());
+                    prev
+                })
+            }
+            #[inline]
+            pub(crate) fn neg(&self, order: Ordering) {
+                self.fetch_neg(order);
+            }
+        }
+    };
+    // cfg(portable_atomic_force_amo) 32-bit(RV32)/{32,64}-bit(RV64) RMW
+    (cas, $atomic_type:ident, $int_type:ident) => {
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn swap(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().swap(val, order)
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                crate::utils::assert_compare_exchange_ordering(success, failure);
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    if prev == current {
+                        self.v.get().write(new);
+                        Ok(prev)
+                    } else {
+                        Err(prev)
+                    }
+                })
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange_weak(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                self.compare_exchange(current, new, success, failure)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_add(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_add(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_sub(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_sub(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_and(val, order)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(!(prev & val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_or(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_xor(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_max(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_min(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type {
+                self.as_native().fetch_not(order)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev.wrapping_neg());
+                    prev
+                })
+            }
+            #[inline]
+            pub(crate) fn neg(&self, order: Ordering) {
+                self.fetch_neg(order);
+            }
+        }
+    };
+    // cfg(portable_atomic_force_amo) {8,16}-bit RMW
+    (cas[sub_word], $atomic_type:ident, $int_type:ident) => {
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(val);
+                    prev
+                })
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                crate::utils::assert_compare_exchange_ordering(success, failure);
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    if prev == current {
+                        self.v.get().write(new);
+                        Ok(prev)
+                    } else {
+                        Err(prev)
+                    }
+                })
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn compare_exchange_weak(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                self.compare_exchange(current, new, success, failure)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev.wrapping_add(val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev.wrapping_sub(val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_and(val, order)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(!(prev & val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_or(val, order)
+            }
+            #[inline]
+            pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type {
+                self.as_native().fetch_xor(val, order)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(core::cmp::max(prev, val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(core::cmp::min(prev, val));
+                    prev
+                })
+            }
+
+            #[inline]
+            pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type {
+                self.as_native().fetch_not(order)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by disabling interrupts (see
+                // module-level comments) and the raw pointer is valid because we got it
+                // from a reference.
+                with(|| unsafe {
+                    let prev = self.v.get().read();
+                    self.v.get().write(prev.wrapping_neg());
+                    prev
+                })
+            }
+            #[inline]
+            pub(crate) fn neg(&self, order: Ordering) {
+                self.fetch_neg(order);
+            }
+        }
+    };
+}
+
+#[cfg(target_pointer_width = "16")]
+atomic_int!(load_store_atomic, AtomicIsize, isize, 2);
+#[cfg(target_pointer_width = "16")]
+atomic_int!(load_store_atomic, AtomicUsize, usize, 2);
+#[cfg(target_pointer_width = "32")]
+atomic_int!(load_store_atomic, AtomicIsize, isize, 4);
+#[cfg(target_pointer_width = "32")]
+atomic_int!(load_store_atomic, AtomicUsize, usize, 4);
+#[cfg(target_pointer_width = "64")]
+atomic_int!(load_store_atomic, AtomicIsize, isize, 8);
+#[cfg(target_pointer_width = "64")]
+atomic_int!(load_store_atomic, AtomicUsize, usize, 8);
+#[cfg(target_pointer_width = "128")]
+atomic_int!(load_store_atomic, AtomicIsize, isize, 16);
+#[cfg(target_pointer_width = "128")]
+atomic_int!(load_store_atomic, AtomicUsize, usize, 16);
+
+atomic_int!(load_store_atomic[sub_word], AtomicI8, i8, 1);
+atomic_int!(load_store_atomic[sub_word], AtomicU8, u8, 1);
+atomic_int!(load_store_atomic[sub_word], AtomicI16, i16, 2);
+atomic_int!(load_store_atomic[sub_word], AtomicU16, u16, 2);
+
+#[cfg(not(target_pointer_width = "16"))]
+atomic_int!(load_store_atomic, AtomicI32, i32, 4);
+#[cfg(not(target_pointer_width = "16"))]
+atomic_int!(load_store_atomic, AtomicU32, u32, 4);
+#[cfg(target_pointer_width = "16")]
+#[cfg(any(test, feature = "fallback"))]
+atomic_int!(load_store_critical_session, AtomicI32, i32, 4);
+#[cfg(target_pointer_width = "16")]
+#[cfg(any(test, feature = "fallback"))]
+atomic_int!(load_store_critical_session, AtomicU32, u32, 4);
+
+#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
+atomic_int!(load_store_atomic, AtomicI64, i64, 8);
+#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))]
+atomic_int!(load_store_atomic, AtomicU64, u64, 8);
+#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))]
+#[cfg(any(test, feature = "fallback"))]
+atomic_int!(load_store_critical_session, AtomicI64, i64, 8);
+#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))]
+#[cfg(any(test, feature = "fallback"))]
+atomic_int!(load_store_critical_session, AtomicU64, u64, 8);
+
+#[cfg(any(test, feature = "fallback"))]
+atomic_int!(load_store_critical_session, AtomicI128, i128, 16);
+#[cfg(any(test, feature = "fallback"))]
+atomic_int!(load_store_critical_session, AtomicU128, u128, 16);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    test_atomic_ptr_single_thread!();
+    test_atomic_int_single_thread!(i8);
+    test_atomic_int_single_thread!(u8);
+    test_atomic_int_single_thread!(i16);
+    test_atomic_int_single_thread!(u16);
+    test_atomic_int_single_thread!(i32);
+    test_atomic_int_single_thread!(u32);
+    test_atomic_int_single_thread!(i64);
+    test_atomic_int_single_thread!(u64);
+    test_atomic_int_single_thread!(i128);
+    test_atomic_int_single_thread!(u128);
+    test_atomic_int_single_thread!(isize);
+    test_atomic_int_single_thread!(usize);
+}
diff --git a/vendor/portable-atomic/src/imp/interrupt/msp430.rs b/vendor/portable-atomic/src/imp/interrupt/msp430.rs
new file mode 100644
index 0000000..8c1ca80
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/interrupt/msp430.rs
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Adapted from https://github.com/rust-embedded/msp430.
+//
+// See also src/imp/msp430.rs.
+//
+// Refs: https://www.ti.com/lit/ug/slau208q/slau208q.pdf
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+
+pub(super) use super::super::msp430 as atomic;
+
+pub(super) type State = u16;
+
+/// Disables interrupts and returns the previous interrupt state.
+#[inline]
+pub(super) fn disable() -> State {
+    let r: State;
+    // SAFETY: reading the status register and disabling interrupts are safe.
+    // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
+    unsafe {
+        // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled.
+        // Do not use `preserves_flags` because DINT modifies the GIE (global interrupt enable) bit of the status register.
+        #[cfg(not(portable_atomic_no_asm))]
+        asm!(
+            "mov R2, {0}",
+            "dint {{ nop",
+            out(reg) r,
+            options(nostack),
+        );
+        #[cfg(portable_atomic_no_asm)]
+        {
+            llvm_asm!("mov R2, $0" : "=r"(r) ::: "volatile");
+            llvm_asm!("dint { nop" ::: "memory" : "volatile");
+        }
+    }
+    r
+}
+
+/// Restores the previous interrupt state.
+///
+/// # Safety
+///
+/// The state must be the one retrieved by the previous `disable`.
+#[inline]
+pub(super) unsafe fn restore(r: State) {
+    // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
+    unsafe {
+        // This clobbers the entire status register, but we never explicitly modify
+        // flags within a critical session, and the only flags that may be changed
+        // within a critical session are the arithmetic flags that are changed as
+        // a side effect of arithmetic operations, etc., which LLVM recognizes,
+        // so it is safe to clobber them here.
+        // See also the discussion at https://github.com/taiki-e/portable-atomic/pull/40.
+        //
+        // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
+        // Do not use `preserves_flags` because MOV modifies the status register.
+        #[cfg(not(portable_atomic_no_asm))]
+        asm!("nop {{ mov {0}, R2 {{ nop", in(reg) r, options(nostack));
+        #[cfg(portable_atomic_no_asm)]
+        llvm_asm!("nop { mov $0, R2 { nop" :: "r"(r) : "memory" : "volatile");
+    }
+}
diff --git a/vendor/portable-atomic/src/imp/interrupt/riscv.rs b/vendor/portable-atomic/src/imp/interrupt/riscv.rs
new file mode 100644
index 0000000..65b1af2
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/interrupt/riscv.rs
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Refs:
+// - https://five-embeddev.com/riscv-isa-manual/latest/machine.html#machine-status-registers-mstatus-and-mstatush
+// - https://five-embeddev.com/riscv-isa-manual/latest/supervisor.html#sstatus
+//
+// Generated asm:
+// - riscv64gc https://godbolt.org/z/osbzsT679
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+
+pub(super) use super::super::riscv as atomic;
+
+// Status register
+#[cfg(not(portable_atomic_s_mode))]
+macro_rules! status {
+    () => {
+        "mstatus"
+    };
+}
+#[cfg(portable_atomic_s_mode)]
+macro_rules! status {
+    () => {
+        "sstatus"
+    };
+}
+
+// MIE (Machine Interrupt Enable) bit (1 << 3)
+#[cfg(not(portable_atomic_s_mode))]
+const MASK: State = 0x8;
+#[cfg(not(portable_atomic_s_mode))]
+macro_rules! mask {
+    () => {
+        "0x8"
+    };
+}
+// SIE (Supervisor Interrupt Enable) bit (1 << 1)
+#[cfg(portable_atomic_s_mode)]
+const MASK: State = 0x2;
+#[cfg(portable_atomic_s_mode)]
+macro_rules! mask {
+    () => {
+        "0x2"
+    };
+}
+
+#[cfg(target_arch = "riscv32")]
+pub(super) type State = u32;
+#[cfg(target_arch = "riscv64")]
+pub(super) type State = u64;
+
+/// Disables interrupts and returns the previous interrupt state.
+#[inline]
+pub(super) fn disable() -> State {
+    let r: State;
+    // SAFETY: reading mstatus and disabling interrupts is safe.
+    // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
+    unsafe {
+        // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled.
+        asm!(concat!("csrrci {0}, ", status!(), ", ", mask!()), out(reg) r, options(nostack, preserves_flags));
+    }
+    r
+}
+
+/// Restores the previous interrupt state.
+///
+/// # Safety
+///
+/// The state must be the one retrieved by the previous `disable`.
+#[inline]
+pub(super) unsafe fn restore(r: State) {
+    if r & MASK != 0 {
+        // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
+        // and we've checked that interrupts were enabled before disabling interrupts.
+        unsafe {
+            // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
+            asm!(concat!("csrsi ", status!(), ", ", mask!()), options(nostack, preserves_flags));
+        }
+    }
+}
diff --git a/vendor/portable-atomic/src/imp/interrupt/xtensa.rs b/vendor/portable-atomic/src/imp/interrupt/xtensa.rs
new file mode 100644
index 0000000..bc6d117
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/interrupt/xtensa.rs
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Refs:
+// - Xtensa Instruction Set Architecture (ISA) Reference Manual https://0x04.net/~mwk/doc/xtensa.pdf
+// - Linux kernel's Xtensa atomic implementation https://github.com/torvalds/linux/blob/v6.1/arch/xtensa/include/asm/atomic.h
+
+use core::arch::asm;
+
+pub(super) use core::sync::atomic;
+
+pub(super) type State = u32;
+
+/// Disables interrupts and returns the previous interrupt state.
+#[inline]
+pub(super) fn disable() -> State {
+    let r: State;
+    // SAFETY: reading the PS special register and disabling all interrupts is safe.
+    // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions)
+    unsafe {
+        // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled.
+        // Interrupt level 15 to disable all interrupts.
+        // SYNC after RSIL is not required.
+        asm!("rsil {0}, 15", out(reg) r, options(nostack));
+    }
+    r
+}
+
+/// Restores the previous interrupt state.
+///
+/// # Safety
+///
+/// The state must be the one retrieved by the previous `disable`.
+#[inline]
+pub(super) unsafe fn restore(r: State) {
+    // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`,
+    // and we've checked that interrupts were enabled before disabling interrupts.
+    unsafe {
+        // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled.
+        // SYNC after WSR is required to guarantee that subsequent RSIL read the written value.
+        asm!(
+            "wsr.ps {0}",
+            "rsync",
+            in(reg) r,
+            options(nostack),
+        );
+    }
+}
diff --git a/vendor/portable-atomic/src/imp/mod.rs b/vendor/portable-atomic/src/imp/mod.rs
new file mode 100644
index 0000000..cea71eb
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/mod.rs
@@ -0,0 +1,449 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// -----------------------------------------------------------------------------
+// Lock-free implementations
+
+#[cfg(not(any(
+    all(
+        portable_atomic_no_atomic_load_store,
+        not(all(target_arch = "bpf", not(feature = "critical-section"))),
+    ),
+    portable_atomic_unsafe_assume_single_core,
+    target_arch = "avr",
+    target_arch = "msp430",
+)))]
+#[cfg_attr(
+    portable_atomic_no_cfg_target_has_atomic,
+    cfg(not(all(
+        any(target_arch = "riscv32", target_arch = "riscv64", feature = "critical-section"),
+        portable_atomic_no_atomic_cas,
+    )))
+)]
+#[cfg_attr(
+    not(portable_atomic_no_cfg_target_has_atomic),
+    cfg(not(all(
+        any(target_arch = "riscv32", target_arch = "riscv64", feature = "critical-section"),
+        not(target_has_atomic = "ptr"),
+    )))
+)]
+mod core_atomic;
+
+// aarch64 128-bit atomics
+#[cfg(all(
+    target_arch = "aarch64",
+    any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
+))]
+// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly.
+#[cfg_attr(
+    all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics),
+    path = "atomic128/intrinsics.rs"
+)]
+#[cfg_attr(
+    not(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics)),
+    path = "atomic128/aarch64.rs"
+)]
+mod aarch64;
+
+// x86_64 128-bit atomics
+#[cfg(all(
+    target_arch = "x86_64",
+    any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
+    any(
+        target_feature = "cmpxchg16b",
+        portable_atomic_target_feature = "cmpxchg16b",
+        all(
+            feature = "fallback",
+            not(portable_atomic_no_cmpxchg16b_target_feature),
+            not(portable_atomic_no_outline_atomics),
+            not(any(target_env = "sgx", miri)),
+        ),
+    ),
+))]
+// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly.
+#[cfg_attr(any(miri, portable_atomic_sanitize_thread), path = "atomic128/intrinsics.rs")]
+#[cfg_attr(not(any(miri, portable_atomic_sanitize_thread)), path = "atomic128/x86_64.rs")]
+mod x86_64;
+
+// powerpc64 128-bit atomics
+#[cfg(all(
+    target_arch = "powerpc64",
+    portable_atomic_unstable_asm_experimental_arch,
+    any(
+        target_feature = "quadword-atomics",
+        portable_atomic_target_feature = "quadword-atomics",
+        all(
+            feature = "fallback",
+            not(portable_atomic_no_outline_atomics),
+            any(test, portable_atomic_outline_atomics), // TODO(powerpc64): currently disabled by default
+            any(
+                all(
+                    target_os = "linux",
+                    any(
+                        target_env = "gnu",
+                        all(
+                            any(target_env = "musl", target_env = "ohos"),
+                            not(target_feature = "crt-static"),
+                        ),
+                        portable_atomic_outline_atomics,
+                    ),
+                ),
+                target_os = "android",
+                target_os = "freebsd",
+            ),
+            not(any(miri, portable_atomic_sanitize_thread)),
+        ),
+    ),
+))]
+// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly.
+#[cfg_attr(
+    all(any(miri, portable_atomic_sanitize_thread), portable_atomic_llvm_15),
+    path = "atomic128/intrinsics.rs"
+)]
+#[cfg_attr(
+    not(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_llvm_15)),
+    path = "atomic128/powerpc64.rs"
+)]
+mod powerpc64;
+
+// s390x 128-bit atomics
+#[cfg(all(target_arch = "s390x", portable_atomic_unstable_asm_experimental_arch))]
+// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly.
+#[cfg_attr(any(miri, portable_atomic_sanitize_thread), path = "atomic128/intrinsics.rs")]
+#[cfg_attr(not(any(miri, portable_atomic_sanitize_thread)), path = "atomic128/s390x.rs")]
+mod s390x;
+
+// pre-v6 ARM Linux 64-bit atomics
+#[cfg(feature = "fallback")]
+// Miri and Sanitizer do not support inline assembly.
+#[cfg(all(
+    target_arch = "arm",
+    not(any(miri, portable_atomic_sanitize_thread)),
+    not(portable_atomic_no_asm),
+    any(target_os = "linux", target_os = "android"),
+    not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
+    not(portable_atomic_no_outline_atomics),
+))]
+#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))]
+#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))]
+mod arm_linux;
+
+// MSP430 atomics
+#[cfg(target_arch = "msp430")]
+pub(crate) mod msp430;
+
+// atomic load/store for RISC-V without A-extension
+#[cfg(any(test, not(feature = "critical-section")))]
+#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_cas)))]
+#[cfg_attr(
+    not(portable_atomic_no_cfg_target_has_atomic),
+    cfg(any(test, not(target_has_atomic = "ptr")))
+)]
+#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+mod riscv;
+
+// x86-specific optimizations
+// Miri and Sanitizer do not support inline assembly.
+#[cfg(all(
+    any(target_arch = "x86", target_arch = "x86_64"),
+    not(any(miri, portable_atomic_sanitize_thread)),
+    not(portable_atomic_no_asm),
+))]
+mod x86;
+
+// -----------------------------------------------------------------------------
+// Lock-based fallback implementations
+
+#[cfg(feature = "fallback")]
+#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))]
+#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
+#[cfg(any(
+    test,
+    not(any(
+        all(
+            target_arch = "aarch64",
+            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
+        ),
+        all(
+            target_arch = "x86_64",
+            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
+            any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
+        ),
+        all(
+            target_arch = "powerpc64",
+            portable_atomic_unstable_asm_experimental_arch,
+            any(
+                target_feature = "quadword-atomics",
+                portable_atomic_target_feature = "quadword-atomics",
+            ),
+        ),
+        all(target_arch = "s390x", portable_atomic_unstable_asm_experimental_arch),
+    ))
+))]
+mod fallback;
+
+// -----------------------------------------------------------------------------
+// Critical section based fallback implementations
+
+// On AVR, we always use critical section based fallback implementation.
+// AVR can be safely assumed to be single-core, so this is sound.
+// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp#L1074
+// MSP430 as well.
+#[cfg(any(
+    all(test, target_os = "none"),
+    portable_atomic_unsafe_assume_single_core,
+    feature = "critical-section",
+    target_arch = "avr",
+    target_arch = "msp430",
+))]
+#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_cas)))]
+#[cfg_attr(
+    not(portable_atomic_no_cfg_target_has_atomic),
+    cfg(any(test, not(target_has_atomic = "ptr")))
+)]
+#[cfg(any(
+    target_arch = "arm",
+    target_arch = "avr",
+    target_arch = "msp430",
+    target_arch = "riscv32",
+    target_arch = "riscv64",
+    target_arch = "xtensa",
+    feature = "critical-section",
+))]
+mod interrupt;
+
+// -----------------------------------------------------------------------------
+// Atomic float implementations
+
+#[cfg(feature = "float")]
+pub(crate) mod float;
+
+// -----------------------------------------------------------------------------
+
+#[cfg(not(any(
+    portable_atomic_no_atomic_load_store,
+    portable_atomic_unsafe_assume_single_core,
+    target_arch = "avr",
+    target_arch = "msp430",
+)))]
+#[cfg_attr(
+    portable_atomic_no_cfg_target_has_atomic,
+    cfg(not(all(
+        any(target_arch = "riscv32", target_arch = "riscv64", feature = "critical-section"),
+        portable_atomic_no_atomic_cas,
+    )))
+)]
+#[cfg_attr(
+    not(portable_atomic_no_cfg_target_has_atomic),
+    cfg(not(all(
+        any(target_arch = "riscv32", target_arch = "riscv64", feature = "critical-section"),
+        not(target_has_atomic = "ptr"),
+    )))
+)]
+items! {
+    pub(crate) use self::core_atomic::{
+        AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU32, AtomicU8,
+        AtomicUsize,
+    };
+    #[cfg_attr(
+        portable_atomic_no_cfg_target_has_atomic,
+        cfg(any(
+            not(portable_atomic_no_atomic_64),
+            not(any(target_pointer_width = "16", target_pointer_width = "32")),
+        ))
+    )]
+    #[cfg_attr(
+        not(portable_atomic_no_cfg_target_has_atomic),
+        cfg(any(
+            target_has_atomic = "64",
+            not(any(target_pointer_width = "16", target_pointer_width = "32")),
+        ))
+    )]
+    pub(crate) use self::core_atomic::{AtomicI64, AtomicU64};
+}
+// bpf
+#[cfg(all(
+    target_arch = "bpf",
+    portable_atomic_no_atomic_load_store,
+    not(feature = "critical-section"),
+))]
+pub(crate) use self::core_atomic::{AtomicI64, AtomicIsize, AtomicPtr, AtomicU64, AtomicUsize};
+
+// RISC-V without A-extension & !(assume single core | critical section)
+#[cfg(not(any(portable_atomic_unsafe_assume_single_core, feature = "critical-section")))]
+#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))]
+#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))]
+#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+items! {
+    pub(crate) use self::riscv::{
+        AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU32, AtomicU8,
+        AtomicUsize,
+    };
+    #[cfg(target_arch = "riscv64")]
+    pub(crate) use self::riscv::{AtomicI64, AtomicU64};
+}
+
+// no core atomic CAS & (assume single core | critical section) => critical section based fallback
+#[cfg(any(
+    portable_atomic_unsafe_assume_single_core,
+    feature = "critical-section",
+    target_arch = "avr",
+    target_arch = "msp430",
+))]
+#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))]
+#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))]
+items! {
+    pub(crate) use self::interrupt::{
+        AtomicI16, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU8, AtomicUsize,
+    };
+    #[cfg(any(not(target_pointer_width = "16"), feature = "fallback"))]
+    pub(crate) use self::interrupt::{AtomicI32, AtomicU32};
+    #[cfg(any(
+        not(any(target_pointer_width = "16", target_pointer_width = "32")),
+        feature = "fallback",
+    ))]
+    pub(crate) use self::interrupt::{AtomicI64, AtomicU64};
+    #[cfg(feature = "fallback")]
+    pub(crate) use self::interrupt::{AtomicI128, AtomicU128};
+}
+
+// no core (64-bit | 128-bit) atomic & has CAS => use lock-base fallback
+#[cfg(feature = "fallback")]
+#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))]
+#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))]
+items! {
+    #[cfg(not(all(
+        target_arch = "arm",
+        not(any(miri, portable_atomic_sanitize_thread)),
+        not(portable_atomic_no_asm),
+        any(target_os = "linux", target_os = "android"),
+        not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
+        not(portable_atomic_no_outline_atomics),
+    )))]
+    #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))]
+    #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))]
+    pub(crate) use self::fallback::{AtomicI64, AtomicU64};
+    #[cfg(not(any(
+        all(
+            target_arch = "aarch64",
+            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
+        ),
+        all(
+            target_arch = "x86_64",
+            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
+            any(
+                target_feature = "cmpxchg16b",
+                portable_atomic_target_feature = "cmpxchg16b",
+                all(
+                    feature = "fallback",
+                    not(portable_atomic_no_cmpxchg16b_target_feature),
+                    not(portable_atomic_no_outline_atomics),
+                    not(any(target_env = "sgx", miri)),
+                ),
+            ),
+        ),
+        all(
+            target_arch = "powerpc64",
+            portable_atomic_unstable_asm_experimental_arch,
+            any(
+                target_feature = "quadword-atomics",
+                portable_atomic_target_feature = "quadword-atomics",
+                all(
+                    feature = "fallback",
+                    not(portable_atomic_no_outline_atomics),
+                    portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default
+                    any(
+                        all(
+                            target_os = "linux",
+                            any(
+                                target_env = "gnu",
+                                all(
+                                    any(target_env = "musl", target_env = "ohos"),
+                                    not(target_feature = "crt-static"),
+                                ),
+                                portable_atomic_outline_atomics,
+                            ),
+                        ),
+                        target_os = "android",
+                        target_os = "freebsd",
+                    ),
+                    not(any(miri, portable_atomic_sanitize_thread)),
+                ),
+            ),
+        ),
+        all(target_arch = "s390x", portable_atomic_unstable_asm_experimental_arch),
+    )))]
+    pub(crate) use self::fallback::{AtomicI128, AtomicU128};
+}
+
+// 64-bit atomics (platform-specific)
+// pre-v6 ARM Linux
+#[cfg(feature = "fallback")]
+#[cfg(all(
+    target_arch = "arm",
+    not(any(miri, portable_atomic_sanitize_thread)),
+    not(portable_atomic_no_asm),
+    any(target_os = "linux", target_os = "android"),
+    not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
+    not(portable_atomic_no_outline_atomics),
+))]
+#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))]
+#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))]
+pub(crate) use self::arm_linux::{AtomicI64, AtomicU64};
+
+// 128-bit atomics (platform-specific)
+// aarch64
+#[cfg(all(
+    target_arch = "aarch64",
+    any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
+))]
+pub(crate) use self::aarch64::{AtomicI128, AtomicU128};
+// x86_64 & (cmpxchg16b | outline-atomics)
+#[cfg(all(
+    target_arch = "x86_64",
+    any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
+    any(
+        target_feature = "cmpxchg16b",
+        portable_atomic_target_feature = "cmpxchg16b",
+        all(
+            feature = "fallback",
+            not(portable_atomic_no_cmpxchg16b_target_feature),
+            not(portable_atomic_no_outline_atomics),
+            not(any(target_env = "sgx", miri)),
+        ),
+    ),
+))]
+pub(crate) use self::x86_64::{AtomicI128, AtomicU128};
+// powerpc64 & (pwr8 | outline-atomics)
+#[cfg(all(
+    target_arch = "powerpc64",
+    portable_atomic_unstable_asm_experimental_arch,
+    any(
+        target_feature = "quadword-atomics",
+        portable_atomic_target_feature = "quadword-atomics",
+        all(
+            feature = "fallback",
+            not(portable_atomic_no_outline_atomics),
+            portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default
+            any(
+                all(
+                    target_os = "linux",
+                    any(
+                        target_env = "gnu",
+                        all(
+                            any(target_env = "musl", target_env = "ohos"),
+                            not(target_feature = "crt-static"),
+                        ),
+                        portable_atomic_outline_atomics,
+                    ),
+                ),
+                target_os = "android",
+                target_os = "freebsd",
+            ),
+            not(any(miri, portable_atomic_sanitize_thread)),
+        ),
+    ),
+))]
+pub(crate) use self::powerpc64::{AtomicI128, AtomicU128};
+// s390x
+#[cfg(all(target_arch = "s390x", portable_atomic_unstable_asm_experimental_arch))]
+pub(crate) use self::s390x::{AtomicI128, AtomicU128};
diff --git a/vendor/portable-atomic/src/imp/msp430.rs b/vendor/portable-atomic/src/imp/msp430.rs
new file mode 100644
index 0000000..f6990dd
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/msp430.rs
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic load/store implementation on MSP430.
+//
+// Adapted from https://github.com/pftbest/msp430-atomic.
+// Including https://github.com/pftbest/msp430-atomic/pull/4 for a compile error fix.
+// Including https://github.com/pftbest/msp430-atomic/pull/5 for a soundness bug fix.
+//
+// Operations not supported here are provided by disabling interrupts.
+// See also src/imp/interrupt/msp430.rs.
+//
+// Note: Ordering is always SeqCst.
+//
+// Refs: https://www.ti.com/lit/ug/slau208q/slau208q.pdf
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+#[cfg(any(test, not(feature = "critical-section")))]
+use core::cell::UnsafeCell;
+use core::sync::atomic::Ordering;
+
+/// An atomic fence.
+///
+/// # Panics
+///
+/// Panics if `order` is [`Relaxed`](Ordering::Relaxed).
+#[inline]
+#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+pub fn fence(order: Ordering) {
+    match order {
+        Ordering::Relaxed => panic!("there is no such thing as a relaxed fence"),
+        // MSP430 is single-core and a compiler fence works as an atomic fence.
+        _ => compiler_fence(order),
+    }
+}
+
+/// A compiler memory fence.
+///
+/// # Panics
+///
+/// Panics if `order` is [`Relaxed`](Ordering::Relaxed).
+#[inline]
+#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+pub fn compiler_fence(order: Ordering) {
+    match order {
+        Ordering::Relaxed => panic!("there is no such thing as a relaxed compiler fence"),
+        _ => {}
+    }
+    // SAFETY: using an empty asm is safe.
+    unsafe {
+        // Do not use `nomem` and `readonly` because prevent preceding and subsequent memory accesses from being reordered.
+        #[cfg(not(portable_atomic_no_asm))]
+        asm!("", options(nostack, preserves_flags));
+        #[cfg(portable_atomic_no_asm)]
+        llvm_asm!("" ::: "memory" : "volatile");
+    }
+}
+
+macro_rules! atomic {
+    (load_store, $([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => {
+        #[cfg(any(test, not(feature = "critical-section")))]
+        #[repr(transparent)]
+        pub(crate) struct $atomic_type $(<$($generics)*>)? {
+            v: UnsafeCell<$value_type>,
+        }
+
+        #[cfg(any(test, not(feature = "critical-section")))]
+        // Send is implicitly implemented for atomic integers, but not for atomic pointers.
+        // SAFETY: any data races are prevented by atomic operations.
+        unsafe impl $(<$($generics)*>)? Send for $atomic_type $(<$($generics)*>)? {}
+        #[cfg(any(test, not(feature = "critical-section")))]
+        // SAFETY: any data races are prevented by atomic operations.
+        unsafe impl $(<$($generics)*>)? Sync for $atomic_type $(<$($generics)*>)? {}
+
+        #[cfg(any(test, not(feature = "critical-section")))]
+        impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
+            #[cfg(test)]
+            #[inline]
+            pub(crate) const fn new(v: $value_type) -> Self {
+                Self { v: UnsafeCell::new(v) }
+            }
+
+            #[cfg(test)]
+            #[inline]
+            pub(crate) fn is_lock_free() -> bool {
+                Self::is_always_lock_free()
+            }
+            #[cfg(test)]
+            #[inline]
+            pub(crate) const fn is_always_lock_free() -> bool {
+                true
+            }
+
+            #[cfg(test)]
+            #[inline]
+            pub(crate) fn get_mut(&mut self) -> &mut $value_type {
+                // SAFETY: the mutable reference guarantees unique ownership.
+                // (UnsafeCell::get_mut requires Rust 1.50)
+                unsafe { &mut *self.v.get() }
+            }
+
+            #[cfg(test)]
+            #[inline]
+            pub(crate) fn into_inner(self) -> $value_type {
+                 self.v.into_inner()
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn load(&self, order: Ordering) -> $value_type {
+                crate::utils::assert_load_ordering(order);
+                let src = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    let out;
+                    #[cfg(not(portable_atomic_no_asm))]
+                    asm!(
+                        concat!("mov", $asm_suffix, " @{src}, {out}"),
+                        src = in(reg) src,
+                        out = lateout(reg) out,
+                        options(nostack, preserves_flags),
+                    );
+                    #[cfg(portable_atomic_no_asm)]
+                    llvm_asm!(
+                        concat!("mov", $asm_suffix, " $1, $0")
+                        : "=r"(out) : "*m"(src) : "memory" : "volatile"
+                    );
+                    out
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn store(&self, val: $value_type, order: Ordering) {
+                crate::utils::assert_store_ordering(order);
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    #[cfg(not(portable_atomic_no_asm))]
+                    asm!(
+                        concat!("mov", $asm_suffix, " {val}, 0({dst})"),
+                        dst = in(reg) dst,
+                        val = in(reg) val,
+                        options(nostack, preserves_flags),
+                    );
+                    #[cfg(portable_atomic_no_asm)]
+                    llvm_asm!(
+                        concat!("mov", $asm_suffix, " $1, $0")
+                        :: "*m"(dst), "ir"(val) : "memory" : "volatile"
+                    );
+                }
+            }
+        }
+    };
+    ($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => {
+        atomic!(load_store, $([$($generics)*])? $atomic_type, $value_type, $asm_suffix);
+        #[cfg(any(test, not(feature = "critical-section")))]
+        impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
+            #[inline]
+            pub(crate) fn add(&self, val: $value_type, _order: Ordering) {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    #[cfg(not(portable_atomic_no_asm))]
+                    asm!(
+                        concat!("add", $asm_suffix, " {val}, 0({dst})"),
+                        dst = in(reg) dst,
+                        val = in(reg) val,
+                        // Do not use `preserves_flags` because ADD modifies the V, N, Z, and C bits of the status register.
+                        options(nostack),
+                    );
+                    #[cfg(portable_atomic_no_asm)]
+                    llvm_asm!(
+                        concat!("add", $asm_suffix, " $1, $0")
+                        :: "*m"(dst), "ir"(val) : "memory" : "volatile"
+                    );
+                }
+            }
+
+            #[inline]
+            pub(crate) fn sub(&self, val: $value_type, _order: Ordering) {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    #[cfg(not(portable_atomic_no_asm))]
+                    asm!(
+                        concat!("sub", $asm_suffix, " {val}, 0({dst})"),
+                        dst = in(reg) dst,
+                        val = in(reg) val,
+                        // Do not use `preserves_flags` because SUB modifies the V, N, Z, and C bits of the status register.
+                        options(nostack),
+                    );
+                    #[cfg(portable_atomic_no_asm)]
+                    llvm_asm!(
+                        concat!("sub", $asm_suffix, " $1, $0")
+                        :: "*m"(dst), "ir"(val) : "memory" : "volatile"
+                    );
+                }
+            }
+
+            #[inline]
+            pub(crate) fn and(&self, val: $value_type, _order: Ordering) {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    #[cfg(not(portable_atomic_no_asm))]
+                    asm!(
+                        concat!("and", $asm_suffix, " {val}, 0({dst})"),
+                        dst = in(reg) dst,
+                        val = in(reg) val,
+                        // Do not use `preserves_flags` because AND modifies the V, N, Z, and C bits of the status register.
+                        options(nostack),
+                    );
+                    #[cfg(portable_atomic_no_asm)]
+                    llvm_asm!(
+                        concat!("and", $asm_suffix, " $1, $0")
+                        :: "*m"(dst), "ir"(val) : "memory" : "volatile"
+                    );
+                }
+            }
+
+            #[inline]
+            pub(crate) fn or(&self, val: $value_type, _order: Ordering) {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    #[cfg(not(portable_atomic_no_asm))]
+                    asm!(
+                        concat!("bis", $asm_suffix, " {val}, 0({dst})"),
+                        dst = in(reg) dst,
+                        val = in(reg) val,
+                        options(nostack, preserves_flags),
+                    );
+                    #[cfg(portable_atomic_no_asm)]
+                    llvm_asm!(
+                        concat!("bis", $asm_suffix, " $1, $0")
+                        :: "*m"(dst), "ir"(val) : "memory" : "volatile"
+                    );
+                }
+            }
+
+            #[inline]
+            pub(crate) fn xor(&self, val: $value_type, _order: Ordering) {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    #[cfg(not(portable_atomic_no_asm))]
+                    asm!(
+                        concat!("xor", $asm_suffix, " {val}, 0({dst})"),
+                        dst = in(reg) dst,
+                        val = in(reg) val,
+                        // Do not use `preserves_flags` because XOR modifies the V, N, Z, and C bits of the status register.
+                        options(nostack),
+                    );
+                    #[cfg(portable_atomic_no_asm)]
+                    llvm_asm!(
+                        concat!("xor", $asm_suffix, " $1, $0")
+                        :: "*m"(dst), "ir"(val) : "memory" : "volatile"
+                    );
+                }
+            }
+
+            #[inline]
+            pub(crate) fn not(&self, _order: Ordering) {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    #[cfg(not(portable_atomic_no_asm))]
+                    asm!(
+                        concat!("inv", $asm_suffix, " 0({dst})"),
+                        dst = in(reg) dst,
+                        // Do not use `preserves_flags` because INV modifies the V, N, Z, and C bits of the status register.
+                        options(nostack),
+                    );
+                    #[cfg(portable_atomic_no_asm)]
+                    llvm_asm!(
+                        concat!("inv", $asm_suffix, " $0")
+                        :: "*m"(dst) : "memory" : "volatile"
+                    );
+                }
+            }
+        }
+    }
+}
+
+atomic!(AtomicI8, i8, ".b");
+atomic!(AtomicU8, u8, ".b");
+atomic!(AtomicI16, i16, ".w");
+atomic!(AtomicU16, u16, ".w");
+atomic!(AtomicIsize, isize, ".w");
+atomic!(AtomicUsize, usize, ".w");
+atomic!(load_store, [T] AtomicPtr, *mut T, ".w");
diff --git a/vendor/portable-atomic/src/imp/riscv.rs b/vendor/portable-atomic/src/imp/riscv.rs
new file mode 100644
index 0000000..e4e5d16
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/riscv.rs
@@ -0,0 +1,704 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic load/store implementation on RISC-V.
+//
+// This is for RISC-V targets without atomic CAS. (rustc doesn't provide atomics
+// at all on such targets. https://github.com/rust-lang/rust/pull/114499)
+//
+// Also, optionally provides RMW implementation when force-amo is enabled.
+//
+// Refs:
+// - RISC-V Instruction Set Manual Volume I: Unprivileged ISA
+//   https://riscv.org/wp-content/uploads/2019/12/riscv-spec-20191213.pdf
+// - RISC-V Atomics ABI Specification
+//   https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/HEAD/riscv-atomic.adoc
+// - "Mappings from C/C++ primitives to RISC-V primitives." table in RISC-V Instruction Set Manual:
+//   https://five-embeddev.com/riscv-isa-manual/latest/memory.html#sec:memory:porting
+// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
+//
+// Generated asm:
+// - riscv64gc https://godbolt.org/z/EETebx7TE
+// - riscv32imac https://godbolt.org/z/8zzv73bKh
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+use core::{cell::UnsafeCell, sync::atomic::Ordering};
+
+#[cfg(any(test, portable_atomic_force_amo))]
+macro_rules! atomic_rmw_amo_order {
+    ($op:ident, $order:ident) => {
+        match $order {
+            Ordering::Relaxed => $op!(""),
+            Ordering::Acquire => $op!(".aq"),
+            Ordering::Release => $op!(".rl"),
+            // AcqRel and SeqCst RMWs are equivalent.
+            Ordering::AcqRel | Ordering::SeqCst => $op!(".aqrl"),
+            _ => unreachable!("{:?}", $order),
+        }
+    };
+}
+#[cfg(any(test, portable_atomic_force_amo))]
+macro_rules! atomic_rmw_amo {
+    ($op:ident, $dst:ident, $val:ident, $order:ident, $asm_suffix:tt) => {{
+        let out;
+        macro_rules! op {
+            ($asm_order:tt) => {
+                // SAFETY: The user guaranteed that the AMO instruction is available in this
+                // system by setting the portable_atomic_force_amo and
+                // portable_atomic_unsafe_assume_single_core.
+                // The caller of this macro must guarantee the validity of the pointer.
+                asm!(
+                    ".option push",
+                    // https://github.com/riscv-non-isa/riscv-asm-manual/blob/HEAD/riscv-asm.md#arch
+                    ".option arch, +a",
+                    concat!("amo", stringify!($op), ".", $asm_suffix, $asm_order, " {out}, {val}, 0({dst})"),
+                    ".option pop",
+                    dst = in(reg) ptr_reg!($dst),
+                    val = in(reg) $val,
+                    out = lateout(reg) out,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw_amo_order!(op, $order);
+        out
+    }};
+}
+// 32-bit val.wrapping_shl(shift) but no extra `& (u32::BITS - 1)`
+#[cfg(any(test, portable_atomic_force_amo))]
+#[inline]
+fn sllw(val: u32, shift: u32) -> u32 {
+    // SAFETY: Calling sll{,w} is safe.
+    unsafe {
+        let out;
+        #[cfg(target_arch = "riscv32")]
+        asm!("sll {out}, {val}, {shift}", out = lateout(reg) out, val = in(reg) val, shift = in(reg) shift, options(pure, nomem, nostack, preserves_flags));
+        #[cfg(target_arch = "riscv64")]
+        asm!("sllw {out}, {val}, {shift}", out = lateout(reg) out, val = in(reg) val, shift = in(reg) shift, options(pure, nomem, nostack, preserves_flags));
+        out
+    }
+}
+// 32-bit val.wrapping_shr(shift) but no extra `& (u32::BITS - 1)`
+#[cfg(any(test, portable_atomic_force_amo))]
+#[inline]
+fn srlw(val: u32, shift: u32) -> u32 {
+    // SAFETY: Calling srl{,w} is safe.
+    unsafe {
+        let out;
+        #[cfg(target_arch = "riscv32")]
+        asm!("srl {out}, {val}, {shift}", out = lateout(reg) out, val = in(reg) val, shift = in(reg) shift, options(pure, nomem, nostack, preserves_flags));
+        #[cfg(target_arch = "riscv64")]
+        asm!("srlw {out}, {val}, {shift}", out = lateout(reg) out, val = in(reg) val, shift = in(reg) shift, options(pure, nomem, nostack, preserves_flags));
+        out
+    }
+}
+
+macro_rules! atomic_load_store {
+    ($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => {
+        #[repr(transparent)]
+        pub(crate) struct $atomic_type $(<$($generics)*>)? {
+            v: UnsafeCell<$value_type>,
+        }
+
+        // Send is implicitly implemented for atomic integers, but not for atomic pointers.
+        // SAFETY: any data races are prevented by atomic operations.
+        unsafe impl $(<$($generics)*>)? Send for $atomic_type $(<$($generics)*>)? {}
+        // SAFETY: any data races are prevented by atomic operations.
+        unsafe impl $(<$($generics)*>)? Sync for $atomic_type $(<$($generics)*>)? {}
+
+        #[cfg(any(test, not(portable_atomic_unsafe_assume_single_core)))]
+        impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
+            #[inline]
+            pub(crate) const fn new(v: $value_type) -> Self {
+                Self { v: UnsafeCell::new(v) }
+            }
+
+            #[inline]
+            pub(crate) fn is_lock_free() -> bool {
+                Self::is_always_lock_free()
+            }
+            #[inline]
+            pub(crate) const fn is_always_lock_free() -> bool {
+                true
+            }
+
+            #[inline]
+            pub(crate) fn get_mut(&mut self) -> &mut $value_type {
+                // SAFETY: the mutable reference guarantees unique ownership.
+                // (UnsafeCell::get_mut requires Rust 1.50)
+                unsafe { &mut *self.v.get() }
+            }
+
+            #[inline]
+            pub(crate) fn into_inner(self) -> $value_type {
+                 self.v.into_inner()
+            }
+
+            #[inline]
+            pub(crate) const fn as_ptr(&self) -> *mut $value_type {
+                self.v.get()
+            }
+        }
+        impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn load(&self, order: Ordering) -> $value_type {
+                crate::utils::assert_load_ordering(order);
+                let src = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    let out;
+                    match order {
+                        Ordering::Relaxed => {
+                            asm!(
+                                concat!("l", $asm_suffix, " {out}, 0({src})"),
+                                src = in(reg) ptr_reg!(src),
+                                out = lateout(reg) out,
+                                options(nostack, preserves_flags, readonly),
+                            );
+                        }
+                        Ordering::Acquire => {
+                            asm!(
+                                concat!("l", $asm_suffix, " {out}, 0({src})"),
+                                "fence r, rw",
+                                src = in(reg) ptr_reg!(src),
+                                out = lateout(reg) out,
+                                options(nostack, preserves_flags),
+                            );
+                        }
+                        Ordering::SeqCst => {
+                            asm!(
+                                "fence rw, rw",
+                                concat!("l", $asm_suffix, " {out}, 0({src})"),
+                                "fence r, rw",
+                                src = in(reg) ptr_reg!(src),
+                                out = lateout(reg) out,
+                                options(nostack, preserves_flags),
+                            );
+                        }
+                        _ => unreachable!("{:?}", order),
+                    }
+                    out
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+            pub(crate) fn store(&self, val: $value_type, order: Ordering) {
+                crate::utils::assert_store_ordering(order);
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    match order {
+                        Ordering::Relaxed => {
+                            asm!(
+                                concat!("s", $asm_suffix, " {val}, 0({dst})"),
+                                dst = in(reg) ptr_reg!(dst),
+                                val = in(reg) val,
+                                options(nostack, preserves_flags),
+                            );
+                        }
+                        // Release and SeqCst stores are equivalent.
+                        Ordering::Release | Ordering::SeqCst => {
+                            asm!(
+                                "fence rw, w",
+                                concat!("s", $asm_suffix, " {val}, 0({dst})"),
+                                dst = in(reg) ptr_reg!(dst),
+                                val = in(reg) val,
+                                options(nostack, preserves_flags),
+                            );
+                        }
+                        _ => unreachable!("{:?}", order),
+                    }
+                }
+            }
+        }
+    };
+}
+
+macro_rules! atomic_ptr {
+    ($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => {
+        atomic_load_store!($([$($generics)*])? $atomic_type, $value_type, $asm_suffix);
+        #[cfg(portable_atomic_force_amo)]
+        impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
+            #[inline]
+            pub(crate) fn swap(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!(swap, dst, val, order, $asm_suffix) }
+            }
+        }
+    };
+}
+
+macro_rules! atomic {
+    ($atomic_type:ident, $value_type:ty, $asm_suffix:tt, $max:tt, $min:tt) => {
+        atomic_load_store!($atomic_type, $value_type, $asm_suffix);
+        // There is no amo{sub,nand,neg}.
+        #[cfg(any(test, portable_atomic_force_amo))]
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn swap(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!(swap, dst, val, order, $asm_suffix) }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_add(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!(add, dst, val, order, $asm_suffix) }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_sub(&self, val: $value_type, order: Ordering) -> $value_type {
+                self.fetch_add(val.wrapping_neg(), order)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_and(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!(and, dst, val, order, $asm_suffix) }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_or(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!(or, dst, val, order, $asm_suffix) }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_xor(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!(xor, dst, val, order, $asm_suffix) }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_not(&self, order: Ordering) -> $value_type {
+                self.fetch_xor(!0, order)
+            }
+
+            #[inline]
+            pub(crate) fn fetch_max(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!($max, dst, val, order, $asm_suffix) }
+            }
+
+            #[inline]
+            pub(crate) fn fetch_min(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_rmw_amo!($min, dst, val, order, $asm_suffix) }
+            }
+        }
+    };
+}
+
+macro_rules! atomic_sub_word {
+    ($atomic_type:ident, $value_type:ty, $unsigned_type:ty, $asm_suffix:tt) => {
+        atomic_load_store!($atomic_type, $value_type, $asm_suffix);
+        #[cfg(any(test, portable_atomic_force_amo))]
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn fetch_and(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                let (dst, shift, mask) = crate::utils::create_sub_word_mask_values(dst);
+                let mask = !sllw(mask as u32, shift as u32);
+                // TODO: use zero_extend helper instead of cast for val.
+                let val = sllw(val as $unsigned_type as u32, shift as u32);
+                let val = val | mask;
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                let out: u32 = unsafe { atomic_rmw_amo!(and, dst, val, order, "w") };
+                srlw(out, shift as u32) as $unsigned_type as $value_type
+            }
+
+            #[inline]
+            pub(crate) fn fetch_or(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                let (dst, shift, _mask) = crate::utils::create_sub_word_mask_values(dst);
+                // TODO: use zero_extend helper instead of cast for val.
+                let val = sllw(val as $unsigned_type as u32, shift as u32);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                let out: u32 = unsafe { atomic_rmw_amo!(or, dst, val, order, "w") };
+                srlw(out, shift as u32) as $unsigned_type as $value_type
+            }
+
+            #[inline]
+            pub(crate) fn fetch_xor(&self, val: $value_type, order: Ordering) -> $value_type {
+                let dst = self.v.get();
+                let (dst, shift, _mask) = crate::utils::create_sub_word_mask_values(dst);
+                // TODO: use zero_extend helper instead of cast for val.
+                let val = sllw(val as $unsigned_type as u32, shift as u32);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                let out: u32 = unsafe { atomic_rmw_amo!(xor, dst, val, order, "w") };
+                srlw(out, shift as u32) as $unsigned_type as $value_type
+            }
+
+            #[inline]
+            pub(crate) fn fetch_not(&self, order: Ordering) -> $value_type {
+                self.fetch_xor(!0, order)
+            }
+        }
+    };
+}
+
+atomic_sub_word!(AtomicI8, i8, u8, "b");
+atomic_sub_word!(AtomicU8, u8, u8, "b");
+atomic_sub_word!(AtomicI16, i16, u16, "h");
+atomic_sub_word!(AtomicU16, u16, u16, "h");
+atomic!(AtomicI32, i32, "w", max, min);
+atomic!(AtomicU32, u32, "w", maxu, minu);
+#[cfg(target_arch = "riscv64")]
+atomic!(AtomicI64, i64, "d", max, min);
+#[cfg(target_arch = "riscv64")]
+atomic!(AtomicU64, u64, "d", maxu, minu);
+#[cfg(target_pointer_width = "32")]
+atomic!(AtomicIsize, isize, "w", max, min);
+#[cfg(target_pointer_width = "32")]
+atomic!(AtomicUsize, usize, "w", maxu, minu);
+#[cfg(target_pointer_width = "32")]
+atomic_ptr!([T] AtomicPtr, *mut T, "w");
+#[cfg(target_pointer_width = "64")]
+atomic!(AtomicIsize, isize, "d", max, min);
+#[cfg(target_pointer_width = "64")]
+atomic!(AtomicUsize, usize, "d", maxu, minu);
+#[cfg(target_pointer_width = "64")]
+atomic_ptr!([T] AtomicPtr, *mut T, "d");
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    test_atomic_ptr_load_store!();
+    test_atomic_int_load_store!(i8);
+    test_atomic_int_load_store!(u8);
+    test_atomic_int_load_store!(i16);
+    test_atomic_int_load_store!(u16);
+    test_atomic_int_load_store!(i32);
+    test_atomic_int_load_store!(u32);
+    #[cfg(target_arch = "riscv64")]
+    test_atomic_int_load_store!(i64);
+    #[cfg(target_arch = "riscv64")]
+    test_atomic_int_load_store!(u64);
+    test_atomic_int_load_store!(isize);
+    test_atomic_int_load_store!(usize);
+
+    macro_rules! test_atomic_int_amo {
+        ($int_type:ident) => {
+            paste::paste! {
+                #[allow(
+                    clippy::alloc_instead_of_core,
+                    clippy::std_instead_of_alloc,
+                    clippy::std_instead_of_core,
+                    clippy::undocumented_unsafe_blocks
+                )]
+                mod [<test_atomic_ $int_type _amo>] {
+                    use super::*;
+                    test_atomic_int_amo!([<Atomic $int_type:camel>], $int_type);
+                }
+            }
+        };
+        ($atomic_type:ty, $int_type:ident) => {
+            ::quickcheck::quickcheck! {
+                fn quickcheck_swap(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.swap(y, order), x);
+                        assert_eq!(a.swap(x, order), y);
+                    }
+                    true
+                }
+                fn quickcheck_fetch_add(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_add(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), x.wrapping_add(y));
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_add(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), y.wrapping_add(x));
+                    }
+                    true
+                }
+                fn quickcheck_fetch_sub(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_sub(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), x.wrapping_sub(y));
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_sub(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), y.wrapping_sub(x));
+                    }
+                    true
+                }
+                fn quickcheck_fetch_and(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_and(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), x & y);
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_and(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), y & x);
+                    }
+                    true
+                }
+                fn quickcheck_fetch_or(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_or(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), x | y);
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_or(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), y | x);
+                    }
+                    true
+                }
+                fn quickcheck_fetch_xor(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_xor(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), x ^ y);
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_xor(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), y ^ x);
+                    }
+                    true
+                }
+                fn quickcheck_fetch_max(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_max(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), core::cmp::max(x, y));
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_max(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), core::cmp::max(y, x));
+                    }
+                    true
+                }
+                fn quickcheck_fetch_min(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_min(y, order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), core::cmp::min(x, y));
+                        let a = <$atomic_type>::new(y);
+                        assert_eq!(a.fetch_min(x, order), y);
+                        assert_eq!(a.load(Ordering::Relaxed), core::cmp::min(y, x));
+                    }
+                    true
+                }
+                fn quickcheck_fetch_not(x: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        let a = <$atomic_type>::new(x);
+                        assert_eq!(a.fetch_not(order), x);
+                        assert_eq!(a.load(Ordering::Relaxed), !x);
+                        assert_eq!(a.fetch_not(order), !x);
+                        assert_eq!(a.load(Ordering::Relaxed), x);
+                    }
+                    true
+                }
+            }
+        };
+    }
+    macro_rules! test_atomic_int_amo_sub_word {
+        ($int_type:ident) => {
+            paste::paste! {
+                #[allow(
+                    clippy::alloc_instead_of_core,
+                    clippy::std_instead_of_alloc,
+                    clippy::std_instead_of_core,
+                    clippy::undocumented_unsafe_blocks
+                )]
+                mod [<test_atomic_ $int_type _amo>] {
+                    use super::*;
+                    test_atomic_int_amo_sub_word!([<Atomic $int_type:camel>], $int_type);
+                }
+            }
+        };
+        ($atomic_type:ty, $int_type:ident) => {
+            use crate::tests::helper::*;
+            ::quickcheck::quickcheck! {
+                fn quickcheck_fetch_and(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        for base in [0, !0] {
+                            let mut arr = Align16([
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                            ]);
+                            let a_idx = fastrand::usize(3..=6);
+                            arr.0[a_idx] = <$atomic_type>::new(x);
+                            let a = &arr.0[a_idx];
+                            assert_eq!(a.fetch_and(y, order), x);
+                            assert_eq!(a.load(Ordering::Relaxed), x & y);
+                            for i in 0..a_idx {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                            for i in a_idx + 1..arr.0.len() {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                            arr.0[a_idx] = <$atomic_type>::new(y);
+                            let a = &arr.0[a_idx];
+                            assert_eq!(a.fetch_and(x, order), y);
+                            assert_eq!(a.load(Ordering::Relaxed), y & x);
+                            for i in 0..a_idx {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                            for i in a_idx + 1..arr.0.len() {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                        }
+                    }
+                    true
+                }
+                fn quickcheck_fetch_or(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        for base in [0, !0] {
+                            let mut arr = Align16([
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                            ]);
+                            let a_idx = fastrand::usize(3..=6);
+                            arr.0[a_idx] = <$atomic_type>::new(x);
+                            let a = &arr.0[a_idx];
+                            assert_eq!(a.fetch_or(y, order), x);
+                            assert_eq!(a.load(Ordering::Relaxed), x | y);
+                            for i in 0..a_idx {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                            for i in a_idx + 1..arr.0.len() {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                            arr.0[a_idx] = <$atomic_type>::new(y);
+                            let a = &arr.0[a_idx];
+                            assert_eq!(a.fetch_or(x, order), y);
+                            assert_eq!(a.load(Ordering::Relaxed), y | x);
+                            for i in 0..a_idx {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                            for i in a_idx + 1..arr.0.len() {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                        }
+                    }
+                    true
+                }
+                fn quickcheck_fetch_xor(x: $int_type, y: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        for base in [0, !0] {
+                            let mut arr = Align16([
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                            ]);
+                            let a_idx = fastrand::usize(3..=6);
+                            arr.0[a_idx] = <$atomic_type>::new(x);
+                            let a = &arr.0[a_idx];
+                            assert_eq!(a.fetch_xor(y, order), x);
+                            assert_eq!(a.load(Ordering::Relaxed), x ^ y);
+                            for i in 0..a_idx {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                            for i in a_idx + 1..arr.0.len() {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                            arr.0[a_idx] = <$atomic_type>::new(y);
+                            let a = &arr.0[a_idx];
+                            assert_eq!(a.fetch_xor(x, order), y);
+                            assert_eq!(a.load(Ordering::Relaxed), y ^ x);
+                            for i in 0..a_idx {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                            for i in a_idx + 1..arr.0.len() {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                        }
+                    }
+                    true
+                }
+                fn quickcheck_fetch_not(x: $int_type) -> bool {
+                    for &order in &test_helper::SWAP_ORDERINGS {
+                        for base in [0, !0] {
+                            let mut arr = Align16([
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                                <$atomic_type>::new(base),
+                            ]);
+                            let a_idx = fastrand::usize(3..=6);
+                            arr.0[a_idx] = <$atomic_type>::new(x);
+                            let a = &arr.0[a_idx];
+                            assert_eq!(a.fetch_not(order), x);
+                            assert_eq!(a.load(Ordering::Relaxed), !x);
+                            assert_eq!(a.fetch_not(order), !x);
+                            assert_eq!(a.load(Ordering::Relaxed), x);
+                            for i in 0..a_idx {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                            for i in a_idx + 1..arr.0.len() {
+                                assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written");
+                            }
+                        }
+                    }
+                    true
+                }
+            }
+        };
+    }
+    test_atomic_int_amo_sub_word!(i8);
+    test_atomic_int_amo_sub_word!(u8);
+    test_atomic_int_amo_sub_word!(i16);
+    test_atomic_int_amo_sub_word!(u16);
+    test_atomic_int_amo!(i32);
+    test_atomic_int_amo!(u32);
+    #[cfg(target_arch = "riscv64")]
+    test_atomic_int_amo!(i64);
+    #[cfg(target_arch = "riscv64")]
+    test_atomic_int_amo!(u64);
+    test_atomic_int_amo!(isize);
+    test_atomic_int_amo!(usize);
+}
diff --git a/vendor/portable-atomic/src/imp/x86.rs b/vendor/portable-atomic/src/imp/x86.rs
new file mode 100644
index 0000000..9ef4d74
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/x86.rs
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic operations implementation on x86/x86_64.
+//
+// This module provides atomic operations not supported by LLVM or optimizes
+// cases where LLVM code generation is not optimal.
+//
+// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
+// this module and use CAS loop instead.
+//
+// Refs:
+// - x86 and amd64 instruction reference https://www.felixcloutier.com/x86
+//
+// Generated asm:
+// - x86_64 https://godbolt.org/z/d17eTs5Ec
+
+use core::{arch::asm, sync::atomic::Ordering};
+
+use super::core_atomic::{
+    AtomicI16, AtomicI32, AtomicI64, AtomicI8, AtomicIsize, AtomicU16, AtomicU32, AtomicU64,
+    AtomicU8, AtomicUsize,
+};
+
+#[cfg(target_pointer_width = "32")]
+macro_rules! ptr_modifier {
+    () => {
+        ":e"
+    };
+}
+#[cfg(target_pointer_width = "64")]
+macro_rules! ptr_modifier {
+    () => {
+        ""
+    };
+}
+
+macro_rules! atomic_int {
+    ($atomic_type:ident, $ptr_size:tt) => {
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn not(&self, _order: Ordering) {
+                let dst = self.as_ptr();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                //
+                // https://www.felixcloutier.com/x86/not
+                unsafe {
+                    // atomic RMW is always SeqCst.
+                    asm!(
+                        concat!("lock not ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}]"),
+                        dst = in(reg) dst,
+                        options(nostack, preserves_flags),
+                    );
+                }
+            }
+            #[inline]
+            pub(crate) fn neg(&self, _order: Ordering) {
+                let dst = self.as_ptr();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                //
+                // https://www.felixcloutier.com/x86/neg
+                unsafe {
+                    // atomic RMW is always SeqCst.
+                    asm!(
+                        concat!("lock neg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}]"),
+                        dst = in(reg) dst,
+                        // Do not use `preserves_flags` because NEG modifies the CF, OF, SF, ZF, AF, and PF flag.
+                        options(nostack),
+                    );
+                }
+            }
+        }
+    };
+}
+
+atomic_int!(AtomicI8, "byte");
+atomic_int!(AtomicU8, "byte");
+atomic_int!(AtomicI16, "word");
+atomic_int!(AtomicU16, "word");
+atomic_int!(AtomicI32, "dword");
+atomic_int!(AtomicU32, "dword");
+#[cfg(target_arch = "x86_64")]
+atomic_int!(AtomicI64, "qword");
+#[cfg(target_arch = "x86_64")]
+atomic_int!(AtomicU64, "qword");
+#[cfg(target_pointer_width = "32")]
+atomic_int!(AtomicIsize, "dword");
+#[cfg(target_pointer_width = "32")]
+atomic_int!(AtomicUsize, "dword");
+#[cfg(target_pointer_width = "64")]
+atomic_int!(AtomicIsize, "qword");
+#[cfg(target_pointer_width = "64")]
+atomic_int!(AtomicUsize, "qword");
+
+#[cfg(target_arch = "x86")]
+impl AtomicI64 {
+    #[inline]
+    pub(crate) fn not(&self, order: Ordering) {
+        self.fetch_not(order);
+    }
+    #[inline]
+    pub(crate) fn neg(&self, order: Ordering) {
+        self.fetch_neg(order);
+    }
+}
+#[cfg(target_arch = "x86")]
+impl AtomicU64 {
+    #[inline]
+    pub(crate) fn not(&self, order: Ordering) {
+        self.fetch_not(order);
+    }
+    #[inline]
+    pub(crate) fn neg(&self, order: Ordering) {
+        self.fetch_neg(order);
+    }
+}
+
+macro_rules! atomic_bit_opts {
+    ($atomic_type:ident, $int_type:ident, $val_modifier:tt, $ptr_size:tt) => {
+        // LLVM 14 and older don't support generating `lock bt{s,r,c}`.
+        // LLVM 15 only supports generating `lock bt{s,r,c}` for immediate bit offsets.
+        // LLVM 16+ can generate `lock bt{s,r,c}` for both immediate and register bit offsets.
+        // https://godbolt.org/z/TGhr5z4ds
+        // So, use fetch_* based implementations on LLVM 16+, otherwise use asm based implementations.
+        #[cfg(portable_atomic_llvm_16)]
+        impl_default_bit_opts!($atomic_type, $int_type);
+        #[cfg(not(portable_atomic_llvm_16))]
+        impl $atomic_type {
+            #[inline]
+            pub(crate) fn bit_set(&self, bit: u32, _order: Ordering) -> bool {
+                let dst = self.as_ptr();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                // the masking by the bit size of the type ensures that we do not shift
+                // out of bounds.
+                //
+                // https://www.felixcloutier.com/x86/bts
+                unsafe {
+                    let r: u8;
+                    // atomic RMW is always SeqCst.
+                    asm!(
+                        concat!("lock bts ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {bit", $val_modifier, "}"),
+                        "setb {r}",
+                        dst = in(reg) dst,
+                        bit = in(reg) (bit & ($int_type::BITS - 1)) as $int_type,
+                        r = out(reg_byte) r,
+                        // Do not use `preserves_flags` because BTS modifies the CF flag.
+                        options(nostack),
+                    );
+                    r != 0
+                }
+            }
+            #[inline]
+            pub(crate) fn bit_clear(&self, bit: u32, _order: Ordering) -> bool {
+                let dst = self.as_ptr();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                // the masking by the bit size of the type ensures that we do not shift
+                // out of bounds.
+                //
+                // https://www.felixcloutier.com/x86/btr
+                unsafe {
+                    let r: u8;
+                    // atomic RMW is always SeqCst.
+                    asm!(
+                        concat!("lock btr ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {bit", $val_modifier, "}"),
+                        "setb {r}",
+                        dst = in(reg) dst,
+                        bit = in(reg) (bit & ($int_type::BITS - 1)) as $int_type,
+                        r = out(reg_byte) r,
+                        // Do not use `preserves_flags` because BTR modifies the CF flag.
+                        options(nostack),
+                    );
+                    r != 0
+                }
+            }
+            #[inline]
+            pub(crate) fn bit_toggle(&self, bit: u32, _order: Ordering) -> bool {
+                let dst = self.as_ptr();
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                // the masking by the bit size of the type ensures that we do not shift
+                // out of bounds.
+                //
+                // https://www.felixcloutier.com/x86/btc
+                unsafe {
+                    let r: u8;
+                    // atomic RMW is always SeqCst.
+                    asm!(
+                        concat!("lock btc ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {bit", $val_modifier, "}"),
+                        "setb {r}",
+                        dst = in(reg) dst,
+                        bit = in(reg) (bit & ($int_type::BITS - 1)) as $int_type,
+                        r = out(reg_byte) r,
+                        // Do not use `preserves_flags` because BTC modifies the CF flag.
+                        options(nostack),
+                    );
+                    r != 0
+                }
+            }
+        }
+    };
+}
+
+impl_default_bit_opts!(AtomicI8, i8);
+impl_default_bit_opts!(AtomicU8, u8);
+atomic_bit_opts!(AtomicI16, i16, ":x", "word");
+atomic_bit_opts!(AtomicU16, u16, ":x", "word");
+atomic_bit_opts!(AtomicI32, i32, ":e", "dword");
+atomic_bit_opts!(AtomicU32, u32, ":e", "dword");
+#[cfg(target_arch = "x86_64")]
+atomic_bit_opts!(AtomicI64, i64, "", "qword");
+#[cfg(target_arch = "x86_64")]
+atomic_bit_opts!(AtomicU64, u64, "", "qword");
+#[cfg(target_arch = "x86")]
+impl_default_bit_opts!(AtomicI64, i64);
+#[cfg(target_arch = "x86")]
+impl_default_bit_opts!(AtomicU64, u64);
+#[cfg(target_pointer_width = "32")]
+atomic_bit_opts!(AtomicIsize, isize, ":e", "dword");
+#[cfg(target_pointer_width = "32")]
+atomic_bit_opts!(AtomicUsize, usize, ":e", "dword");
+#[cfg(target_pointer_width = "64")]
+atomic_bit_opts!(AtomicIsize, isize, "", "qword");
+#[cfg(target_pointer_width = "64")]
+atomic_bit_opts!(AtomicUsize, usize, "", "qword");
diff --git a/vendor/portable-atomic/src/lib.rs b/vendor/portable-atomic/src/lib.rs
new file mode 100644
index 0000000..b13aca8
--- /dev/null
+++ b/vendor/portable-atomic/src/lib.rs
@@ -0,0 +1,3976 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+/*!
+<!-- tidy:crate-doc:start -->
+Portable atomic types including support for 128-bit atomics, atomic float, etc.
+
+- Provide all atomic integer types (`Atomic{I,U}{8,16,32,64}`) for all targets that can use atomic CAS. (i.e., all targets that can use `std`, and most no-std targets)
+- Provide `AtomicI128` and `AtomicU128`.
+- Provide `AtomicF32` and `AtomicF64`. ([optional, requires the `float` feature](#optional-features-float))
+- Provide atomic load/store for targets where atomic is not available at all in the standard library. (RISC-V without A-extension, MSP430, AVR)
+- Provide atomic CAS for targets where atomic CAS is not available in the standard library. (thumbv6m, pre-v6 ARM, RISC-V without A-extension, MSP430, AVR, Xtensa, etc.) (always enabled for MSP430 and AVR, [optional](#optional-features-critical-section) otherwise)
+- Provide stable equivalents of the standard library's atomic types' unstable APIs, such as [`AtomicPtr::fetch_*`](https://github.com/rust-lang/rust/issues/99108), [`AtomicBool::fetch_not`](https://github.com/rust-lang/rust/issues/98485).
+- Make features that require newer compilers, such as [`fetch_{max,min}`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicUsize.html#method.fetch_max), [`fetch_update`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicUsize.html#method.fetch_update), [`as_ptr`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicUsize.html#method.as_ptr), [`from_ptr`](https://doc.rust-lang.org/std/sync/atomic/struct.AtomicUsize.html#method.from_ptr) and [stronger CAS failure ordering](https://github.com/rust-lang/rust/pull/98383) available on Rust 1.34+.
+- Provide workaround for bugs in the standard library's atomic-related APIs, such as [rust-lang/rust#100650], `fence`/`compiler_fence` on MSP430 that cause LLVM error, etc.
+
+<!-- TODO:
+- mention Atomic{I,U}*::fetch_neg, Atomic{I*,U*,Ptr}::bit_*, etc.
+- mention portable-atomic-util crate
+-->
+
+## Usage
+
+Add this to your `Cargo.toml`:
+
+```toml
+[dependencies]
+portable-atomic = "1"
+```
+
+The default features are mainly for users who use atomics larger than the pointer width.
+If you don't need them, disabling the default features may reduce code size and compile time slightly.
+
+```toml
+[dependencies]
+portable-atomic = { version = "1", default-features = false }
+```
+
+If your crate supports no-std environment and requires atomic CAS, enabling the `require-cas` feature will allow the `portable-atomic` to display helpful error messages to users on targets requiring additional action on the user side to provide atomic CAS.
+
+```toml
+[dependencies]
+portable-atomic = { version = "1.3", default-features = false, features = ["require-cas"] }
+```
+
+*Compiler support: requires rustc 1.34+*
+
+## 128-bit atomics support
+
+Native 128-bit atomic operations are available on x86_64 (Rust 1.59+), aarch64 (Rust 1.59+), powerpc64 (nightly only), and s390x (nightly only), otherwise the fallback implementation is used.
+
+On x86_64, even if `cmpxchg16b` is not available at compile-time (note: `cmpxchg16b` target feature is enabled by default only on Apple targets), run-time detection checks whether `cmpxchg16b` is available. If `cmpxchg16b` is not available at either compile-time or run-time detection, the fallback implementation is used. See also [`portable_atomic_no_outline_atomics`](#optional-cfg-no-outline-atomics) cfg.
+
+They are usually implemented using inline assembly, and when using Miri or ThreadSanitizer that do not support inline assembly, core intrinsics are used instead of inline assembly if possible.
+
+See the [`atomic128` module's readme](https://github.com/taiki-e/portable-atomic/blob/HEAD/src/imp/atomic128/README.md) for details.
+
+## Optional features
+
+- **`fallback`** *(enabled by default)*<br>
+  Enable fallback implementations.
+
+  Disabling this allows only atomic types for which the platform natively supports atomic operations.
+
+- <a name="optional-features-float"></a>**`float`**<br>
+  Provide `AtomicF{32,64}`.
+
+  Note that most of `fetch_*` operations of atomic floats are implemented using CAS loops, which can be slower than equivalent operations of atomic integers. ([GPU targets have atomic instructions for float, so we plan to use these instructions for GPU targets in the future.](https://github.com/taiki-e/portable-atomic/issues/34))
+
+- **`std`**<br>
+  Use `std`.
+
+- <a name="optional-features-require-cas"></a>**`require-cas`**<br>
+  Emit compile error if atomic CAS is not available. See [Usage](#usage) section and [#100](https://github.com/taiki-e/portable-atomic/pull/100) for more.
+
+- <a name="optional-features-serde"></a>**`serde`**<br>
+  Implement `serde::{Serialize,Deserialize}` for atomic types.
+
+  Note:
+  - The MSRV when this feature is enabled depends on the MSRV of [serde].
+
+- <a name="optional-features-critical-section"></a>**`critical-section`**<br>
+  When this feature is enabled, this crate uses [critical-section] to provide atomic CAS for targets where
+  it is not natively available. When enabling it, you should provide a suitable critical section implementation
+  for the current target, see the [critical-section] documentation for details on how to do so.
+
+  `critical-section` support is useful to get atomic CAS when the [`unsafe-assume-single-core` feature](#optional-features-unsafe-assume-single-core) can't be used,
+  such as multi-core targets, unprivileged code running under some RTOS, or environments where disabling interrupts
+  needs extra care due to e.g. real-time requirements.
+
+  Note that with the `critical-section` feature, critical sections are taken for all atomic operations, while with
+  [`unsafe-assume-single-core` feature](#optional-features-unsafe-assume-single-core) some operations don't require disabling interrupts (loads and stores, but
+  additionally on MSP430 `add`, `sub`, `and`, `or`, `xor`, `not`). Therefore, for better performance, if
+  all the `critical-section` implementation for your target does is disable interrupts, prefer using
+  `unsafe-assume-single-core` feature instead.
+
+  Note:
+  - The MSRV when this feature is enabled depends on the MSRV of [critical-section].
+  - It is usually *not* recommended to always enable this feature in dependencies of the library.
+
+    Enabling this feature will prevent the end user from having the chance to take advantage of other (potentially) efficient implementations ([Implementations provided by `unsafe-assume-single-core` feature, default implementations on MSP430 and AVR](#optional-features-unsafe-assume-single-core), implementation proposed in [#60], etc. Other systems may also be supported in the future).
+
+    The recommended approach for libraries is to leave it up to the end user whether or not to enable this feature. (However, it may make sense to enable this feature by default for libraries specific to a platform where other implementations are known not to work.)
+
+    As an example, the end-user's `Cargo.toml` that uses a crate that provides a critical-section implementation and a crate that depends on portable-atomic as an option would be expected to look like this:
+
+    ```toml
+    [dependencies]
+    portable-atomic = { version = "1", default-features = false, features = ["critical-section"] }
+    crate-provides-critical-section-impl = "..."
+    crate-uses-portable-atomic-as-feature = { version = "...", features = ["portable-atomic"] }
+    ```
+
+- <a name="optional-features-unsafe-assume-single-core"></a>**`unsafe-assume-single-core`**<br>
+  Assume that the target is single-core.
+  When this feature is enabled, this crate provides atomic CAS for targets where atomic CAS is not available in the standard library by disabling interrupts.
+
+  This feature is `unsafe`, and note the following safety requirements:
+  - Enabling this feature for multi-core systems is always **unsound**.
+  - This uses privileged instructions to disable interrupts, so it usually doesn't work on unprivileged mode.
+    Enabling this feature in an environment where privileged instructions are not available, or if the instructions used are not sufficient to disable interrupts in the system, it is also usually considered **unsound**, although the details are system-dependent.
+
+    The following are known cases:
+    - On pre-v6 ARM, this disables only IRQs by default. For many systems (e.g., GBA) this is enough. If the system need to disable both IRQs and FIQs, you need to enable the `disable-fiq` feature together.
+    - On RISC-V without A-extension, this generates code for machine-mode (M-mode) by default. If you enable the `s-mode` together, this generates code for supervisor-mode (S-mode). In particular, `qemu-system-riscv*` uses [OpenSBI](https://github.com/riscv-software-src/opensbi) as the default firmware.
+
+    See also [the `interrupt` module's readme](https://github.com/taiki-e/portable-atomic/blob/HEAD/src/imp/interrupt/README.md).
+
+  Consider using the [`critical-section` feature](#optional-features-critical-section) for systems that cannot use this feature.
+
+  It is **very strongly discouraged** to enable this feature in libraries that depend on `portable-atomic`. The recommended approach for libraries is to leave it up to the end user whether or not to enable this feature. (However, it may make sense to enable this feature by default for libraries specific to a platform where it is guaranteed to always be sound, for example in a hardware abstraction layer targeting a single-core chip.)
+
+  ARMv6-M (thumbv6m), pre-v6 ARM (e.g., thumbv4t, thumbv5te), RISC-V without A-extension, and Xtensa are currently supported.
+
+  Since all MSP430 and AVR are single-core, we always provide atomic CAS for them without this feature.
+
+  Enabling this feature for targets that have atomic CAS will result in a compile error.
+
+  Feel free to submit an issue if your target is not supported yet.
+
+## Optional cfg
+
+One of the ways to enable cfg is to set [rustflags in the cargo config](https://doc.rust-lang.org/cargo/reference/config.html#targettriplerustflags):
+
+```toml
+# .cargo/config.toml
+[target.<target>]
+rustflags = ["--cfg", "portable_atomic_no_outline_atomics"]
+```
+
+Or set environment variable:
+
+```sh
+RUSTFLAGS="--cfg portable_atomic_no_outline_atomics" cargo ...
+```
+
+- <a name="optional-cfg-unsafe-assume-single-core"></a>**`--cfg portable_atomic_unsafe_assume_single_core`**<br>
+  Since 1.4.0, this cfg is an alias of [`unsafe-assume-single-core` feature](#optional-features-unsafe-assume-single-core).
+
+  Originally, we were providing these as cfgs instead of features, but based on a strong request from the embedded ecosystem, we have agreed to provide them as features as well. See [#94](https://github.com/taiki-e/portable-atomic/pull/94) for more.
+
+- <a name="optional-cfg-no-outline-atomics"></a>**`--cfg portable_atomic_no_outline_atomics`**<br>
+  Disable dynamic dispatching by run-time CPU feature detection.
+
+  If dynamic dispatching by run-time CPU feature detection is enabled, it allows maintaining support for older CPUs while using features that are not supported on older CPUs, such as CMPXCHG16B (x86_64) and FEAT_LSE (aarch64).
+
+  Note:
+  - Dynamic detection is currently only enabled in Rust 1.59+ for aarch64, in Rust 1.59+ (AVX) or 1.69+ (CMPXCHG16B) for x86_64, nightly only for powerpc64 (disabled by default), otherwise it works the same as when this cfg is set.
+  - If the required target features are enabled at compile-time, the atomic operations are inlined.
+  - This is compatible with no-std (as with all features except `std`).
+  - On some targets, run-time detection is disabled by default mainly for compatibility with older versions of operating systems or incomplete build environments, and can be enabled by `--cfg portable_atomic_outline_atomics`. (When both cfg are enabled, `*_no_*` cfg is preferred.)
+  - Some aarch64 targets enable LLVM's `outline-atomics` target feature by default, so if you set this cfg, you may want to disable that as well. (portable-atomic's outline-atomics does not depend on the compiler-rt symbols, so even if you need to disable LLVM's outline-atomics, you may not need to disable portable-atomic's outline-atomics.)
+
+  See also the [`atomic128` module's readme](https://github.com/taiki-e/portable-atomic/blob/HEAD/src/imp/atomic128/README.md).
+
+## Related Projects
+
+- [atomic-maybe-uninit]: Atomic operations on potentially uninitialized integers.
+- [atomic-memcpy]: Byte-wise atomic memcpy.
+
+[#60]: https://github.com/taiki-e/portable-atomic/issues/60
+[atomic-maybe-uninit]: https://github.com/taiki-e/atomic-maybe-uninit
+[atomic-memcpy]: https://github.com/taiki-e/atomic-memcpy
+[critical-section]: https://github.com/rust-embedded/critical-section
+[rust-lang/rust#100650]: https://github.com/rust-lang/rust/issues/100650
+[serde]: https://github.com/serde-rs/serde
+
+<!-- tidy:crate-doc:end -->
+*/
+
+#![no_std]
+#![doc(test(
+    no_crate_inject,
+    attr(
+        deny(warnings, rust_2018_idioms, single_use_lifetimes),
+        allow(dead_code, unused_variables)
+    )
+))]
+#![cfg_attr(not(portable_atomic_no_unsafe_op_in_unsafe_fn), warn(unsafe_op_in_unsafe_fn))] // unsafe_op_in_unsafe_fn requires Rust 1.52
+#![cfg_attr(portable_atomic_no_unsafe_op_in_unsafe_fn, allow(unused_unsafe))]
+#![warn(
+    // Lints that may help when writing public library.
+    missing_debug_implementations,
+    // missing_docs,
+    clippy::alloc_instead_of_core,
+    clippy::exhaustive_enums,
+    clippy::exhaustive_structs,
+    clippy::impl_trait_in_params,
+    clippy::missing_inline_in_public_items,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+)]
+#![cfg_attr(not(portable_atomic_no_asm), warn(missing_docs))] // module-level #![allow(missing_docs)] doesn't work for macros on old rustc
+#![allow(
+    clippy::cast_lossless,
+    clippy::inline_always,
+    clippy::naive_bytecount,
+    clippy::unreadable_literal
+)]
+// asm_experimental_arch
+// AVR, MSP430, and Xtensa are tier 3 platforms and require nightly anyway.
+// On tier 2 platforms (powerpc64 and s390x), we use cfg set by build script to
+// determine whether this feature is available or not.
+#![cfg_attr(
+    all(
+        not(portable_atomic_no_asm),
+        any(
+            target_arch = "avr",
+            target_arch = "msp430",
+            all(target_arch = "xtensa", portable_atomic_unsafe_assume_single_core),
+            all(target_arch = "powerpc64", portable_atomic_unstable_asm_experimental_arch),
+            all(target_arch = "s390x", portable_atomic_unstable_asm_experimental_arch),
+        ),
+    ),
+    feature(asm_experimental_arch)
+)]
+// Old nightly only
+// These features are already stabilized or have already been removed from compilers,
+// and can safely be enabled for old nightly as long as version detection works.
+// - cfg(target_has_atomic)
+// - #[target_feature(enable = "cmpxchg16b")] on x86_64
+// - asm! on ARM, AArch64, RISC-V, x86_64
+// - llvm_asm! on AVR (tier 3) and MSP430 (tier 3)
+// - #[instruction_set] on non-Linux/Android pre-v6 ARM (tier 3)
+#![cfg_attr(portable_atomic_unstable_cfg_target_has_atomic, feature(cfg_target_has_atomic))]
+#![cfg_attr(
+    all(
+        target_arch = "x86_64",
+        portable_atomic_unstable_cmpxchg16b_target_feature,
+        not(portable_atomic_no_outline_atomics),
+        not(any(target_env = "sgx", miri)),
+        feature = "fallback",
+    ),
+    feature(cmpxchg16b_target_feature)
+)]
+#![cfg_attr(
+    all(
+        portable_atomic_unstable_asm,
+        any(
+            target_arch = "aarch64",
+            target_arch = "arm",
+            target_arch = "riscv32",
+            target_arch = "riscv64",
+            target_arch = "x86_64",
+        ),
+    ),
+    feature(asm)
+)]
+#![cfg_attr(
+    all(any(target_arch = "avr", target_arch = "msp430"), portable_atomic_no_asm),
+    feature(llvm_asm)
+)]
+#![cfg_attr(
+    all(
+        target_arch = "arm",
+        portable_atomic_unstable_isa_attribute,
+        any(test, portable_atomic_unsafe_assume_single_core),
+        not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
+        not(target_has_atomic = "ptr"),
+    ),
+    feature(isa_attribute)
+)]
+// Miri and/or ThreadSanitizer only
+// They do not support inline assembly, so we need to use unstable features instead.
+// Since they require nightly compilers anyway, we can use the unstable features.
+#![cfg_attr(
+    all(
+        any(target_arch = "aarch64", target_arch = "powerpc64", target_arch = "s390x"),
+        any(miri, portable_atomic_sanitize_thread),
+    ),
+    feature(core_intrinsics)
+)]
+// This feature is only enabled for old nightly because cmpxchg16b_intrinsic has been stabilized.
+#![cfg_attr(
+    all(
+        target_arch = "x86_64",
+        portable_atomic_unstable_cmpxchg16b_intrinsic,
+        any(miri, portable_atomic_sanitize_thread),
+    ),
+    feature(stdsimd)
+)]
+// docs.rs only
+#![cfg_attr(portable_atomic_doc_cfg, feature(doc_cfg))]
+#![cfg_attr(
+    all(
+        portable_atomic_no_atomic_load_store,
+        not(any(
+            target_arch = "avr",
+            target_arch = "bpf",
+            target_arch = "msp430",
+            target_arch = "riscv32",
+            target_arch = "riscv64",
+            feature = "critical-section",
+        )),
+    ),
+    allow(unused_imports, unused_macros)
+)]
+
+// There are currently no 8-bit, 128-bit, or higher builtin targets.
+// (Although some of our generic code is written with the future
+// addition of 128-bit targets in mind.)
+// Note that Rust (and C99) pointers must be at least 16-bits: https://github.com/rust-lang/rust/pull/49305
+#[cfg(not(any(
+    target_pointer_width = "16",
+    target_pointer_width = "32",
+    target_pointer_width = "64",
+)))]
+compile_error!(
+    "portable-atomic currently only supports targets with {16,32,64}-bit pointer width; \
+     if you need support for others, \
+     please submit an issue at <https://github.com/taiki-e/portable-atomic>"
+);
+
+#[cfg(portable_atomic_unsafe_assume_single_core)]
+#[cfg_attr(
+    portable_atomic_no_cfg_target_has_atomic,
+    cfg(any(
+        not(portable_atomic_no_atomic_cas),
+        not(any(
+            target_arch = "arm",
+            target_arch = "avr",
+            target_arch = "msp430",
+            target_arch = "riscv32",
+            target_arch = "riscv64",
+            target_arch = "xtensa",
+        )),
+    ))
+)]
+#[cfg_attr(
+    not(portable_atomic_no_cfg_target_has_atomic),
+    cfg(any(
+        target_has_atomic = "ptr",
+        not(any(
+            target_arch = "arm",
+            target_arch = "avr",
+            target_arch = "msp430",
+            target_arch = "riscv32",
+            target_arch = "riscv64",
+            target_arch = "xtensa",
+        )),
+    ))
+)]
+compile_error!(
+    "cfg(portable_atomic_unsafe_assume_single_core) does not compatible with this target;\n\
+     if you need cfg(portable_atomic_unsafe_assume_single_core) support for this target,\n\
+     please submit an issue at <https://github.com/taiki-e/portable-atomic>"
+);
+
+#[cfg(portable_atomic_no_outline_atomics)]
+#[cfg(not(any(
+    target_arch = "aarch64",
+    target_arch = "arm",
+    target_arch = "powerpc64",
+    target_arch = "x86_64",
+)))]
+compile_error!("cfg(portable_atomic_no_outline_atomics) does not compatible with this target");
+#[cfg(portable_atomic_outline_atomics)]
+#[cfg(not(any(target_arch = "aarch64", target_arch = "powerpc64")))]
+compile_error!("cfg(portable_atomic_outline_atomics) does not compatible with this target");
+#[cfg(portable_atomic_disable_fiq)]
+#[cfg(not(all(
+    target_arch = "arm",
+    not(any(target_feature = "mclass", portable_atomic_target_feature = "mclass")),
+)))]
+compile_error!("cfg(portable_atomic_disable_fiq) does not compatible with this target");
+#[cfg(portable_atomic_s_mode)]
+#[cfg(not(any(target_arch = "riscv32", target_arch = "riscv64")))]
+compile_error!("cfg(portable_atomic_s_mode) does not compatible with this target");
+#[cfg(portable_atomic_force_amo)]
+#[cfg(not(any(target_arch = "riscv32", target_arch = "riscv64")))]
+compile_error!("cfg(portable_atomic_force_amo) does not compatible with this target");
+
+#[cfg(portable_atomic_disable_fiq)]
+#[cfg(not(portable_atomic_unsafe_assume_single_core))]
+compile_error!(
+    "cfg(portable_atomic_disable_fiq) may only be used together with cfg(portable_atomic_unsafe_assume_single_core)"
+);
+#[cfg(portable_atomic_s_mode)]
+#[cfg(not(portable_atomic_unsafe_assume_single_core))]
+compile_error!(
+    "cfg(portable_atomic_s_mode) may only be used together with cfg(portable_atomic_unsafe_assume_single_core)"
+);
+#[cfg(portable_atomic_force_amo)]
+#[cfg(not(portable_atomic_unsafe_assume_single_core))]
+compile_error!(
+    "cfg(portable_atomic_force_amo) may only be used together with cfg(portable_atomic_unsafe_assume_single_core)"
+);
+
+#[cfg(all(portable_atomic_unsafe_assume_single_core, feature = "critical-section"))]
+compile_error!(
+    "you may not enable feature `critical-section` and cfg(portable_atomic_unsafe_assume_single_core) at the same time"
+);
+
+#[cfg(feature = "require-cas")]
+#[cfg_attr(
+    portable_atomic_no_cfg_target_has_atomic,
+    cfg(not(any(
+        not(portable_atomic_no_atomic_cas),
+        portable_atomic_unsafe_assume_single_core,
+        feature = "critical-section",
+        target_arch = "avr",
+        target_arch = "msp430",
+    )))
+)]
+#[cfg_attr(
+    not(portable_atomic_no_cfg_target_has_atomic),
+    cfg(not(any(
+        target_has_atomic = "ptr",
+        portable_atomic_unsafe_assume_single_core,
+        feature = "critical-section",
+        target_arch = "avr",
+        target_arch = "msp430",
+    )))
+)]
+compile_error!(
+    "dependents require atomic CAS but not available on this target by default;\n\
+    consider enabling one of the `unsafe-assume-single-core` or `critical-section` Cargo features.\n\
+    see <https://docs.rs/portable-atomic/latest/portable_atomic/#optional-features> for more."
+);
+
+#[cfg(any(test, feature = "std"))]
+extern crate std;
+
+#[macro_use]
+mod cfgs;
+#[cfg(target_pointer_width = "128")]
+pub use {cfg_has_atomic_128 as cfg_has_atomic_ptr, cfg_no_atomic_128 as cfg_no_atomic_ptr};
+#[cfg(target_pointer_width = "16")]
+pub use {cfg_has_atomic_16 as cfg_has_atomic_ptr, cfg_no_atomic_16 as cfg_no_atomic_ptr};
+#[cfg(target_pointer_width = "32")]
+pub use {cfg_has_atomic_32 as cfg_has_atomic_ptr, cfg_no_atomic_32 as cfg_no_atomic_ptr};
+#[cfg(target_pointer_width = "64")]
+pub use {cfg_has_atomic_64 as cfg_has_atomic_ptr, cfg_no_atomic_64 as cfg_no_atomic_ptr};
+
+#[macro_use]
+mod utils;
+
+#[cfg(test)]
+#[macro_use]
+mod tests;
+
+#[doc(no_inline)]
+pub use core::sync::atomic::Ordering;
+
+#[doc(no_inline)]
+// LLVM doesn't support fence/compiler_fence for MSP430.
+#[cfg(not(target_arch = "msp430"))]
+pub use core::sync::atomic::{compiler_fence, fence};
+#[cfg(target_arch = "msp430")]
+pub use imp::msp430::{compiler_fence, fence};
+
+mod imp;
+
+pub mod hint {
+    //! Re-export of the [`core::hint`] module.
+    //!
+    //! The only difference from the [`core::hint`] module is that [`spin_loop`]
+    //! is available in all rust versions that this crate supports.
+    //!
+    //! ```
+    //! use portable_atomic::hint;
+    //!
+    //! hint::spin_loop();
+    //! ```
+
+    #[doc(no_inline)]
+    pub use core::hint::*;
+
+    /// Emits a machine instruction to signal the processor that it is running in
+    /// a busy-wait spin-loop ("spin lock").
+    ///
+    /// Upon receiving the spin-loop signal the processor can optimize its behavior by,
+    /// for example, saving power or switching hyper-threads.
+    ///
+    /// This function is different from [`thread::yield_now`] which directly
+    /// yields to the system's scheduler, whereas `spin_loop` does not interact
+    /// with the operating system.
+    ///
+    /// A common use case for `spin_loop` is implementing bounded optimistic
+    /// spinning in a CAS loop in synchronization primitives. To avoid problems
+    /// like priority inversion, it is strongly recommended that the spin loop is
+    /// terminated after a finite amount of iterations and an appropriate blocking
+    /// syscall is made.
+    ///
+    /// **Note:** On platforms that do not support receiving spin-loop hints this
+    /// function does not do anything at all.
+    ///
+    /// [`thread::yield_now`]: https://doc.rust-lang.org/std/thread/fn.yield_now.html
+    #[inline]
+    pub fn spin_loop() {
+        #[allow(deprecated)]
+        core::sync::atomic::spin_loop_hint();
+    }
+}
+
+#[cfg(doc)]
+use core::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst};
+use core::{fmt, ptr};
+
+#[cfg(miri)]
+use crate::utils::strict;
+
+cfg_has_atomic_8! {
+cfg_has_atomic_cas! {
+// See https://github.com/rust-lang/rust/pull/114034 for details.
+// https://github.com/rust-lang/rust/blob/9339f446a5302cd5041d3f3b5e59761f36699167/library/core/src/sync/atomic.rs#L134
+// https://godbolt.org/z/5W85abT58
+#[cfg(portable_atomic_no_cfg_target_has_atomic)]
+const EMULATE_ATOMIC_BOOL: bool = cfg!(all(
+    not(portable_atomic_no_atomic_cas),
+    any(target_arch = "riscv32", target_arch = "riscv64", target_arch = "loongarch64"),
+));
+#[cfg(not(portable_atomic_no_cfg_target_has_atomic))]
+const EMULATE_ATOMIC_BOOL: bool = cfg!(all(
+    target_has_atomic = "8",
+    any(target_arch = "riscv32", target_arch = "riscv64", target_arch = "loongarch64"),
+));
+} // cfg_has_atomic_cas!
+
+/// A boolean type which can be safely shared between threads.
+///
+/// This type has the same in-memory representation as a [`bool`].
+///
+/// If the compiler and the platform support atomic loads and stores of `u8`,
+/// this type is a wrapper for the standard library's
+/// [`AtomicBool`](core::sync::atomic::AtomicBool). If the platform supports it
+/// but the compiler does not, atomic operations are implemented using inline
+/// assembly.
+#[repr(C, align(1))]
+pub struct AtomicBool {
+    v: core::cell::UnsafeCell<u8>,
+}
+
+impl Default for AtomicBool {
+    /// Creates an `AtomicBool` initialized to `false`.
+    #[inline]
+    fn default() -> Self {
+        Self::new(false)
+    }
+}
+
+impl From<bool> for AtomicBool {
+    /// Converts a `bool` into an `AtomicBool`.
+    #[inline]
+    fn from(b: bool) -> Self {
+        Self::new(b)
+    }
+}
+
+// Send is implicitly implemented.
+// SAFETY: any data races are prevented by disabling interrupts or
+// atomic intrinsics (see module-level comments).
+unsafe impl Sync for AtomicBool {}
+
+// UnwindSafe is implicitly implemented.
+#[cfg(not(portable_atomic_no_core_unwind_safe))]
+impl core::panic::RefUnwindSafe for AtomicBool {}
+#[cfg(all(portable_atomic_no_core_unwind_safe, feature = "std"))]
+impl std::panic::RefUnwindSafe for AtomicBool {}
+
+impl_debug_and_serde!(AtomicBool);
+
+impl AtomicBool {
+    /// Creates a new `AtomicBool`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::AtomicBool;
+    ///
+    /// let atomic_true = AtomicBool::new(true);
+    /// let atomic_false = AtomicBool::new(false);
+    /// ```
+    #[inline]
+    #[must_use]
+    pub const fn new(v: bool) -> Self {
+        static_assert_layout!(AtomicBool, bool);
+        Self { v: core::cell::UnsafeCell::new(v as u8) }
+    }
+
+    /// Creates a new `AtomicBool` from a pointer.
+    ///
+    /// # Safety
+    ///
+    /// * `ptr` must be aligned to `align_of::<AtomicBool>()` (note that on some platforms this can
+    ///   be bigger than `align_of::<bool>()`).
+    /// * `ptr` must be [valid] for both reads and writes for the whole lifetime `'a`.
+    /// * If this atomic type is [lock-free](Self::is_lock_free), non-atomic accesses to the value
+    ///   behind `ptr` must have a happens-before relationship with atomic accesses via the returned
+    ///   value (or vice-versa).
+    ///   * In other words, time periods where the value is accessed atomically may not overlap
+    ///     with periods where the value is accessed non-atomically.
+    ///   * This requirement is trivially satisfied if `ptr` is never used non-atomically for the
+    ///     duration of lifetime `'a`. Most use cases should be able to follow this guideline.
+    ///   * This requirement is also trivially satisfied if all accesses (atomic or not) are done
+    ///     from the same thread.
+    /// * If this atomic type is *not* lock-free:
+    ///   * Any accesses to the value behind `ptr` must have a happens-before relationship
+    ///     with accesses via the returned value (or vice-versa).
+    ///   * Any concurrent accesses to the value behind `ptr` for the duration of lifetime `'a` must
+    ///     be compatible with operations performed by this atomic type.
+    /// * This method must not be used to create overlapping or mixed-size atomic accesses, as
+    ///   these are not supported by the memory model.
+    ///
+    /// [valid]: core::ptr#safety
+    #[inline]
+    #[must_use]
+    pub unsafe fn from_ptr<'a>(ptr: *mut bool) -> &'a Self {
+        #[allow(clippy::cast_ptr_alignment)]
+        // SAFETY: guaranteed by the caller
+        unsafe { &*(ptr as *mut Self) }
+    }
+
+    /// Returns `true` if operations on values of this type are lock-free.
+    ///
+    /// If the compiler or the platform doesn't support the necessary
+    /// atomic instructions, global locks for every potentially
+    /// concurrent atomic operation will be used.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::AtomicBool;
+    ///
+    /// let is_lock_free = AtomicBool::is_lock_free();
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn is_lock_free() -> bool {
+        imp::AtomicU8::is_lock_free()
+    }
+
+    /// Returns `true` if operations on values of this type are lock-free.
+    ///
+    /// If the compiler or the platform doesn't support the necessary
+    /// atomic instructions, global locks for every potentially
+    /// concurrent atomic operation will be used.
+    ///
+    /// **Note:** If the atomic operation relies on dynamic CPU feature detection,
+    /// this type may be lock-free even if the function returns false.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::AtomicBool;
+    ///
+    /// const IS_ALWAYS_LOCK_FREE: bool = AtomicBool::is_always_lock_free();
+    /// ```
+    #[inline]
+    #[must_use]
+    pub const fn is_always_lock_free() -> bool {
+        imp::AtomicU8::is_always_lock_free()
+    }
+
+    /// Returns a mutable reference to the underlying [`bool`].
+    ///
+    /// This is safe because the mutable reference guarantees that no other threads are
+    /// concurrently accessing the atomic data.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let mut some_bool = AtomicBool::new(true);
+    /// assert_eq!(*some_bool.get_mut(), true);
+    /// *some_bool.get_mut() = false;
+    /// assert_eq!(some_bool.load(Ordering::SeqCst), false);
+    /// ```
+    #[inline]
+    pub fn get_mut(&mut self) -> &mut bool {
+        // SAFETY: the mutable reference guarantees unique ownership.
+        unsafe { &mut *(self.v.get() as *mut bool) }
+    }
+
+    // TODO: Add from_mut/get_mut_slice/from_mut_slice once it is stable on std atomic types.
+    // https://github.com/rust-lang/rust/issues/76314
+
+    /// Consumes the atomic and returns the contained value.
+    ///
+    /// This is safe because passing `self` by value guarantees that no other threads are
+    /// concurrently accessing the atomic data.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::AtomicBool;
+    ///
+    /// let some_bool = AtomicBool::new(true);
+    /// assert_eq!(some_bool.into_inner(), true);
+    /// ```
+    #[inline]
+    pub fn into_inner(self) -> bool {
+        self.v.into_inner() != 0
+    }
+
+    /// Loads a value from the bool.
+    ///
+    /// `load` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. Possible values are [`SeqCst`], [`Acquire`] and [`Relaxed`].
+    ///
+    /// # Panics
+    ///
+    /// Panics if `order` is [`Release`] or [`AcqRel`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let some_bool = AtomicBool::new(true);
+    ///
+    /// assert_eq!(some_bool.load(Ordering::Relaxed), true);
+    /// ```
+    #[inline]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub fn load(&self, order: Ordering) -> bool {
+        self.as_atomic_u8().load(order) != 0
+    }
+
+    /// Stores a value into the bool.
+    ///
+    /// `store` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. Possible values are [`SeqCst`], [`Release`] and [`Relaxed`].
+    ///
+    /// # Panics
+    ///
+    /// Panics if `order` is [`Acquire`] or [`AcqRel`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let some_bool = AtomicBool::new(true);
+    ///
+    /// some_bool.store(false, Ordering::Relaxed);
+    /// assert_eq!(some_bool.load(Ordering::Relaxed), false);
+    /// ```
+    #[inline]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub fn store(&self, val: bool, order: Ordering) {
+        self.as_atomic_u8().store(val as u8, order);
+    }
+
+    cfg_has_atomic_cas! {
+    /// Stores a value into the bool, returning the previous value.
+    ///
+    /// `swap` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let some_bool = AtomicBool::new(true);
+    ///
+    /// assert_eq!(some_bool.swap(false, Ordering::Relaxed), true);
+    /// assert_eq!(some_bool.load(Ordering::Relaxed), false);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn swap(&self, val: bool, order: Ordering) -> bool {
+        if EMULATE_ATOMIC_BOOL {
+            if val { self.fetch_or(true, order) } else { self.fetch_and(false, order) }
+        } else {
+            self.as_atomic_u8().swap(val as u8, order) != 0
+        }
+    }
+
+    /// Stores a value into the [`bool`] if the current value is the same as the `current` value.
+    ///
+    /// The return value is a result indicating whether the new value was written and containing
+    /// the previous value. On success this value is guaranteed to be equal to `current`.
+    ///
+    /// `compare_exchange` takes two [`Ordering`] arguments to describe the memory
+    /// ordering of this operation. `success` describes the required ordering for the
+    /// read-modify-write operation that takes place if the comparison with `current` succeeds.
+    /// `failure` describes the required ordering for the load operation that takes place when
+    /// the comparison fails. Using [`Acquire`] as success ordering makes the store part
+    /// of this operation [`Relaxed`], and using [`Release`] makes the successful load
+    /// [`Relaxed`]. The failure ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`].
+    ///
+    /// # Panics
+    ///
+    /// Panics if `failure` is [`Release`], [`AcqRel`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let some_bool = AtomicBool::new(true);
+    ///
+    /// assert_eq!(
+    ///     some_bool.compare_exchange(true, false, Ordering::Acquire, Ordering::Relaxed),
+    ///     Ok(true)
+    /// );
+    /// assert_eq!(some_bool.load(Ordering::Relaxed), false);
+    ///
+    /// assert_eq!(
+    ///     some_bool.compare_exchange(true, true, Ordering::SeqCst, Ordering::Acquire),
+    ///     Err(false)
+    /// );
+    /// assert_eq!(some_bool.load(Ordering::Relaxed), false);
+    /// ```
+    #[inline]
+    #[cfg_attr(portable_atomic_doc_cfg, doc(alias = "compare_and_swap"))]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub fn compare_exchange(
+        &self,
+        current: bool,
+        new: bool,
+        success: Ordering,
+        failure: Ordering,
+    ) -> Result<bool, bool> {
+        if EMULATE_ATOMIC_BOOL {
+            crate::utils::assert_compare_exchange_ordering(success, failure);
+            let order = crate::utils::upgrade_success_ordering(success, failure);
+            let old = if current == new {
+                // This is a no-op, but we still need to perform the operation
+                // for memory ordering reasons.
+                self.fetch_or(false, order)
+            } else {
+                // This sets the value to the new one and returns the old one.
+                self.swap(new, order)
+            };
+            if old == current { Ok(old) } else { Err(old) }
+        } else {
+            match self.as_atomic_u8().compare_exchange(current as u8, new as u8, success, failure) {
+                Ok(x) => Ok(x != 0),
+                Err(x) => Err(x != 0),
+            }
+        }
+    }
+
+    /// Stores a value into the [`bool`] if the current value is the same as the `current` value.
+    ///
+    /// Unlike [`AtomicBool::compare_exchange`], this function is allowed to spuriously fail even when the
+    /// comparison succeeds, which can result in more efficient code on some platforms. The
+    /// return value is a result indicating whether the new value was written and containing the
+    /// previous value.
+    ///
+    /// `compare_exchange_weak` takes two [`Ordering`] arguments to describe the memory
+    /// ordering of this operation. `success` describes the required ordering for the
+    /// read-modify-write operation that takes place if the comparison with `current` succeeds.
+    /// `failure` describes the required ordering for the load operation that takes place when
+    /// the comparison fails. Using [`Acquire`] as success ordering makes the store part
+    /// of this operation [`Relaxed`], and using [`Release`] makes the successful load
+    /// [`Relaxed`]. The failure ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`].
+    ///
+    /// # Panics
+    ///
+    /// Panics if `failure` is [`Release`], [`AcqRel`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let val = AtomicBool::new(false);
+    ///
+    /// let new = true;
+    /// let mut old = val.load(Ordering::Relaxed);
+    /// loop {
+    ///     match val.compare_exchange_weak(old, new, Ordering::SeqCst, Ordering::Relaxed) {
+    ///         Ok(_) => break,
+    ///         Err(x) => old = x,
+    ///     }
+    /// }
+    /// ```
+    #[inline]
+    #[cfg_attr(portable_atomic_doc_cfg, doc(alias = "compare_and_swap"))]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub fn compare_exchange_weak(
+        &self,
+        current: bool,
+        new: bool,
+        success: Ordering,
+        failure: Ordering,
+    ) -> Result<bool, bool> {
+        if EMULATE_ATOMIC_BOOL {
+            return self.compare_exchange(current, new, success, failure);
+        }
+
+        match self.as_atomic_u8().compare_exchange_weak(current as u8, new as u8, success, failure)
+        {
+            Ok(x) => Ok(x != 0),
+            Err(x) => Err(x != 0),
+        }
+    }
+
+    /// Logical "and" with a boolean value.
+    ///
+    /// Performs a logical "and" operation on the current value and the argument `val`, and sets
+    /// the new value to the result.
+    ///
+    /// Returns the previous value.
+    ///
+    /// `fetch_and` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// assert_eq!(foo.fetch_and(false, Ordering::SeqCst), true);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// assert_eq!(foo.fetch_and(true, Ordering::SeqCst), true);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    ///
+    /// let foo = AtomicBool::new(false);
+    /// assert_eq!(foo.fetch_and(false, Ordering::SeqCst), false);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn fetch_and(&self, val: bool, order: Ordering) -> bool {
+        self.as_atomic_u8().fetch_and(val as u8, order) != 0
+    }
+
+    /// Logical "and" with a boolean value.
+    ///
+    /// Performs a logical "and" operation on the current value and the argument `val`, and sets
+    /// the new value to the result.
+    ///
+    /// Unlike `fetch_and`, this does not return the previous value.
+    ///
+    /// `and` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This function may generate more efficient code than `fetch_and` on some platforms.
+    ///
+    /// - x86/x86_64: `lock and` instead of `cmpxchg` loop
+    /// - MSP430: `and` instead of disabling interrupts
+    ///
+    /// Note: On x86/x86_64, the use of either function should not usually
+    /// affect the generated code, because LLVM can properly optimize the case
+    /// where the result is unused.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// foo.and(false, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// foo.and(true, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    ///
+    /// let foo = AtomicBool::new(false);
+    /// foo.and(false, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn and(&self, val: bool, order: Ordering) {
+        self.as_atomic_u8().and(val as u8, order);
+    }
+
+    /// Logical "nand" with a boolean value.
+    ///
+    /// Performs a logical "nand" operation on the current value and the argument `val`, and sets
+    /// the new value to the result.
+    ///
+    /// Returns the previous value.
+    ///
+    /// `fetch_nand` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// assert_eq!(foo.fetch_nand(false, Ordering::SeqCst), true);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// assert_eq!(foo.fetch_nand(true, Ordering::SeqCst), true);
+    /// assert_eq!(foo.load(Ordering::SeqCst) as usize, 0);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    ///
+    /// let foo = AtomicBool::new(false);
+    /// assert_eq!(foo.fetch_nand(false, Ordering::SeqCst), false);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn fetch_nand(&self, val: bool, order: Ordering) -> bool {
+        // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs#L811-L825
+        if val {
+            // !(x & true) == !x
+            // We must invert the bool.
+            self.fetch_xor(true, order)
+        } else {
+            // !(x & false) == true
+            // We must set the bool to true.
+            self.swap(true, order)
+        }
+    }
+
+    /// Logical "or" with a boolean value.
+    ///
+    /// Performs a logical "or" operation on the current value and the argument `val`, and sets the
+    /// new value to the result.
+    ///
+    /// Returns the previous value.
+    ///
+    /// `fetch_or` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// assert_eq!(foo.fetch_or(false, Ordering::SeqCst), true);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// assert_eq!(foo.fetch_or(true, Ordering::SeqCst), true);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    ///
+    /// let foo = AtomicBool::new(false);
+    /// assert_eq!(foo.fetch_or(false, Ordering::SeqCst), false);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn fetch_or(&self, val: bool, order: Ordering) -> bool {
+        self.as_atomic_u8().fetch_or(val as u8, order) != 0
+    }
+
+    /// Logical "or" with a boolean value.
+    ///
+    /// Performs a logical "or" operation on the current value and the argument `val`, and sets the
+    /// new value to the result.
+    ///
+    /// Unlike `fetch_or`, this does not return the previous value.
+    ///
+    /// `or` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This function may generate more efficient code than `fetch_or` on some platforms.
+    ///
+    /// - x86/x86_64: `lock or` instead of `cmpxchg` loop
+    /// - MSP430: `bis` instead of disabling interrupts
+    ///
+    /// Note: On x86/x86_64, the use of either function should not usually
+    /// affect the generated code, because LLVM can properly optimize the case
+    /// where the result is unused.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// foo.or(false, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// foo.or(true, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    ///
+    /// let foo = AtomicBool::new(false);
+    /// foo.or(false, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn or(&self, val: bool, order: Ordering) {
+        self.as_atomic_u8().or(val as u8, order);
+    }
+
+    /// Logical "xor" with a boolean value.
+    ///
+    /// Performs a logical "xor" operation on the current value and the argument `val`, and sets
+    /// the new value to the result.
+    ///
+    /// Returns the previous value.
+    ///
+    /// `fetch_xor` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// assert_eq!(foo.fetch_xor(false, Ordering::SeqCst), true);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// assert_eq!(foo.fetch_xor(true, Ordering::SeqCst), true);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    ///
+    /// let foo = AtomicBool::new(false);
+    /// assert_eq!(foo.fetch_xor(false, Ordering::SeqCst), false);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn fetch_xor(&self, val: bool, order: Ordering) -> bool {
+        self.as_atomic_u8().fetch_xor(val as u8, order) != 0
+    }
+
+    /// Logical "xor" with a boolean value.
+    ///
+    /// Performs a logical "xor" operation on the current value and the argument `val`, and sets
+    /// the new value to the result.
+    ///
+    /// Unlike `fetch_xor`, this does not return the previous value.
+    ///
+    /// `xor` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This function may generate more efficient code than `fetch_xor` on some platforms.
+    ///
+    /// - x86/x86_64: `lock xor` instead of `cmpxchg` loop
+    /// - MSP430: `xor` instead of disabling interrupts
+    ///
+    /// Note: On x86/x86_64, the use of either function should not usually
+    /// affect the generated code, because LLVM can properly optimize the case
+    /// where the result is unused.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// foo.xor(false, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// foo.xor(true, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    ///
+    /// let foo = AtomicBool::new(false);
+    /// foo.xor(false, Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn xor(&self, val: bool, order: Ordering) {
+        self.as_atomic_u8().xor(val as u8, order);
+    }
+
+    /// Logical "not" with a boolean value.
+    ///
+    /// Performs a logical "not" operation on the current value, and sets
+    /// the new value to the result.
+    ///
+    /// Returns the previous value.
+    ///
+    /// `fetch_not` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// assert_eq!(foo.fetch_not(Ordering::SeqCst), true);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    ///
+    /// let foo = AtomicBool::new(false);
+    /// assert_eq!(foo.fetch_not(Ordering::SeqCst), false);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn fetch_not(&self, order: Ordering) -> bool {
+        self.fetch_xor(true, order)
+    }
+
+    /// Logical "not" with a boolean value.
+    ///
+    /// Performs a logical "not" operation on the current value, and sets
+    /// the new value to the result.
+    ///
+    /// Unlike `fetch_not`, this does not return the previous value.
+    ///
+    /// `not` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This function may generate more efficient code than `fetch_not` on some platforms.
+    ///
+    /// - x86/x86_64: `lock xor` instead of `cmpxchg` loop
+    /// - MSP430: `xor` instead of disabling interrupts
+    ///
+    /// Note: On x86/x86_64, the use of either function should not usually
+    /// affect the generated code, because LLVM can properly optimize the case
+    /// where the result is unused.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let foo = AtomicBool::new(true);
+    /// foo.not(Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), false);
+    ///
+    /// let foo = AtomicBool::new(false);
+    /// foo.not(Ordering::SeqCst);
+    /// assert_eq!(foo.load(Ordering::SeqCst), true);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn not(&self, order: Ordering) {
+        self.xor(true, order);
+    }
+
+    /// Fetches the value, and applies a function to it that returns an optional
+    /// new value. Returns a `Result` of `Ok(previous_value)` if the function
+    /// returned `Some(_)`, else `Err(previous_value)`.
+    ///
+    /// Note: This may call the function multiple times if the value has been
+    /// changed from other threads in the meantime, as long as the function
+    /// returns `Some(_)`, but the function will have been applied only once to
+    /// the stored value.
+    ///
+    /// `fetch_update` takes two [`Ordering`] arguments to describe the memory
+    /// ordering of this operation. The first describes the required ordering for
+    /// when the operation finally succeeds while the second describes the
+    /// required ordering for loads. These correspond to the success and failure
+    /// orderings of [`compare_exchange`](Self::compare_exchange) respectively.
+    ///
+    /// Using [`Acquire`] as success ordering makes the store part of this
+    /// operation [`Relaxed`], and using [`Release`] makes the final successful
+    /// load [`Relaxed`]. The (failed) load ordering can only be [`SeqCst`],
+    /// [`Acquire`] or [`Relaxed`].
+    ///
+    /// # Considerations
+    ///
+    /// This method is not magic; it is not provided by the hardware.
+    /// It is implemented in terms of [`compare_exchange_weak`](Self::compare_exchange_weak),
+    /// and suffers from the same drawbacks.
+    /// In particular, this method will not circumvent the [ABA Problem].
+    ///
+    /// [ABA Problem]: https://en.wikipedia.org/wiki/ABA_problem
+    ///
+    /// # Panics
+    ///
+    /// Panics if `fetch_order` is [`Release`], [`AcqRel`].
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use portable_atomic::{AtomicBool, Ordering};
+    ///
+    /// let x = AtomicBool::new(false);
+    /// assert_eq!(x.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |_| None), Err(false));
+    /// assert_eq!(x.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |x| Some(!x)), Ok(false));
+    /// assert_eq!(x.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |x| Some(!x)), Ok(true));
+    /// assert_eq!(x.load(Ordering::SeqCst), false);
+    /// ```
+    #[inline]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub fn fetch_update<F>(
+        &self,
+        set_order: Ordering,
+        fetch_order: Ordering,
+        mut f: F,
+    ) -> Result<bool, bool>
+    where
+        F: FnMut(bool) -> Option<bool>,
+    {
+        let mut prev = self.load(fetch_order);
+        while let Some(next) = f(prev) {
+            match self.compare_exchange_weak(prev, next, set_order, fetch_order) {
+                x @ Ok(_) => return x,
+                Err(next_prev) => prev = next_prev,
+            }
+        }
+        Err(prev)
+    }
+    } // cfg_has_atomic_cas!
+
+    const_fn! {
+        // This function is actually `const fn`-compatible on Rust 1.32+,
+        // but makes `const fn` only on Rust 1.58+ to match other atomic types.
+        const_if: #[cfg(not(portable_atomic_no_const_raw_ptr_deref))];
+        /// Returns a mutable pointer to the underlying [`bool`].
+        ///
+        /// Returning an `*mut` pointer from a shared reference to this atomic is
+        /// safe because the atomic types work with interior mutability. Any use of
+        /// the returned raw pointer requires an `unsafe` block and has to uphold
+        /// the safety requirements. If there is concurrent access, note the following
+        /// additional safety requirements:
+        ///
+        /// - If this atomic type is [lock-free](Self::is_lock_free), any concurrent
+        ///   operations on it must be atomic.
+        /// - Otherwise, any concurrent operations on it must be compatible with
+        ///   operations performed by this atomic type.
+        ///
+        /// This is `const fn` on Rust 1.58+.
+        #[inline]
+        pub const fn as_ptr(&self) -> *mut bool {
+            self.v.get() as *mut bool
+        }
+    }
+
+    #[inline]
+    fn as_atomic_u8(&self) -> &imp::AtomicU8 {
+        // SAFETY: AtomicBool and imp::AtomicU8 have the same layout,
+        // and both access data in the same way.
+        unsafe { &*(self as *const Self as *const imp::AtomicU8) }
+    }
+}
+} // cfg_has_atomic_8!
+
+cfg_has_atomic_ptr! {
+/// A raw pointer type which can be safely shared between threads.
+///
+/// This type has the same in-memory representation as a `*mut T`.
+///
+/// If the compiler and the platform support atomic loads and stores of pointers,
+/// this type is a wrapper for the standard library's
+/// [`AtomicPtr`](core::sync::atomic::AtomicPtr). If the platform supports it
+/// but the compiler does not, atomic operations are implemented using inline
+/// assembly.
+// We can use #[repr(transparent)] here, but #[repr(C, align(N))]
+// will show clearer docs.
+#[cfg_attr(target_pointer_width = "16", repr(C, align(2)))]
+#[cfg_attr(target_pointer_width = "32", repr(C, align(4)))]
+#[cfg_attr(target_pointer_width = "64", repr(C, align(8)))]
+#[cfg_attr(target_pointer_width = "128", repr(C, align(16)))]
+pub struct AtomicPtr<T> {
+    inner: imp::AtomicPtr<T>,
+}
+
+impl<T> Default for AtomicPtr<T> {
+    /// Creates a null `AtomicPtr<T>`.
+    #[inline]
+    fn default() -> Self {
+        Self::new(ptr::null_mut())
+    }
+}
+
+impl<T> From<*mut T> for AtomicPtr<T> {
+    #[inline]
+    fn from(p: *mut T) -> Self {
+        Self::new(p)
+    }
+}
+
+impl<T> fmt::Debug for AtomicPtr<T> {
+    #[allow(clippy::missing_inline_in_public_items)] // fmt is not hot path
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // std atomic types use Relaxed in Debug::fmt: https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs#L2024
+        fmt::Debug::fmt(&self.load(Ordering::Relaxed), f)
+    }
+}
+
+impl<T> fmt::Pointer for AtomicPtr<T> {
+    #[allow(clippy::missing_inline_in_public_items)] // fmt is not hot path
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // std atomic types use Relaxed in Debug::fmt: https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs#L2024
+        fmt::Pointer::fmt(&self.load(Ordering::Relaxed), f)
+    }
+}
+
+// UnwindSafe is implicitly implemented.
+#[cfg(not(portable_atomic_no_core_unwind_safe))]
+impl<T> core::panic::RefUnwindSafe for AtomicPtr<T> {}
+#[cfg(all(portable_atomic_no_core_unwind_safe, feature = "std"))]
+impl<T> std::panic::RefUnwindSafe for AtomicPtr<T> {}
+
+impl<T> AtomicPtr<T> {
+    /// Creates a new `AtomicPtr`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::AtomicPtr;
+    ///
+    /// let ptr = &mut 5;
+    /// let atomic_ptr = AtomicPtr::new(ptr);
+    /// ```
+    #[inline]
+    #[must_use]
+    pub const fn new(p: *mut T) -> Self {
+        static_assert_layout!(AtomicPtr<()>, *mut ());
+        Self { inner: imp::AtomicPtr::new(p) }
+    }
+
+    /// Creates a new `AtomicPtr` from a pointer.
+    ///
+    /// # Safety
+    ///
+    /// * `ptr` must be aligned to `align_of::<AtomicPtr<T>>()` (note that on some platforms this
+    ///   can be bigger than `align_of::<*mut T>()`).
+    /// * `ptr` must be [valid] for both reads and writes for the whole lifetime `'a`.
+    /// * If this atomic type is [lock-free](Self::is_lock_free), non-atomic accesses to the value
+    ///   behind `ptr` must have a happens-before relationship with atomic accesses via the returned
+    ///   value (or vice-versa).
+    ///   * In other words, time periods where the value is accessed atomically may not overlap
+    ///     with periods where the value is accessed non-atomically.
+    ///   * This requirement is trivially satisfied if `ptr` is never used non-atomically for the
+    ///     duration of lifetime `'a`. Most use cases should be able to follow this guideline.
+    ///   * This requirement is also trivially satisfied if all accesses (atomic or not) are done
+    ///     from the same thread.
+    /// * If this atomic type is *not* lock-free:
+    ///   * Any accesses to the value behind `ptr` must have a happens-before relationship
+    ///     with accesses via the returned value (or vice-versa).
+    ///   * Any concurrent accesses to the value behind `ptr` for the duration of lifetime `'a` must
+    ///     be compatible with operations performed by this atomic type.
+    /// * This method must not be used to create overlapping or mixed-size atomic accesses, as
+    ///   these are not supported by the memory model.
+    ///
+    /// [valid]: core::ptr#safety
+    #[inline]
+    #[must_use]
+    pub unsafe fn from_ptr<'a>(ptr: *mut *mut T) -> &'a Self {
+        #[allow(clippy::cast_ptr_alignment)]
+        // SAFETY: guaranteed by the caller
+        unsafe { &*(ptr as *mut Self) }
+    }
+
+    /// Returns `true` if operations on values of this type are lock-free.
+    ///
+    /// If the compiler or the platform doesn't support the necessary
+    /// atomic instructions, global locks for every potentially
+    /// concurrent atomic operation will be used.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::AtomicPtr;
+    ///
+    /// let is_lock_free = AtomicPtr::<()>::is_lock_free();
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn is_lock_free() -> bool {
+        <imp::AtomicPtr<T>>::is_lock_free()
+    }
+
+    /// Returns `true` if operations on values of this type are lock-free.
+    ///
+    /// If the compiler or the platform doesn't support the necessary
+    /// atomic instructions, global locks for every potentially
+    /// concurrent atomic operation will be used.
+    ///
+    /// **Note:** If the atomic operation relies on dynamic CPU feature detection,
+    /// this type may be lock-free even if the function returns false.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::AtomicPtr;
+    ///
+    /// const IS_ALWAYS_LOCK_FREE: bool = AtomicPtr::<()>::is_always_lock_free();
+    /// ```
+    #[inline]
+    #[must_use]
+    pub const fn is_always_lock_free() -> bool {
+        <imp::AtomicPtr<T>>::is_always_lock_free()
+    }
+
+    /// Returns a mutable reference to the underlying pointer.
+    ///
+    /// This is safe because the mutable reference guarantees that no other threads are
+    /// concurrently accessing the atomic data.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    ///
+    /// let mut data = 10;
+    /// let mut atomic_ptr = AtomicPtr::new(&mut data);
+    /// let mut other_data = 5;
+    /// *atomic_ptr.get_mut() = &mut other_data;
+    /// assert_eq!(unsafe { *atomic_ptr.load(Ordering::SeqCst) }, 5);
+    /// ```
+    #[inline]
+    pub fn get_mut(&mut self) -> &mut *mut T {
+        self.inner.get_mut()
+    }
+
+    // TODO: Add from_mut/get_mut_slice/from_mut_slice once it is stable on std atomic types.
+    // https://github.com/rust-lang/rust/issues/76314
+
+    /// Consumes the atomic and returns the contained value.
+    ///
+    /// This is safe because passing `self` by value guarantees that no other threads are
+    /// concurrently accessing the atomic data.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::AtomicPtr;
+    ///
+    /// let mut data = 5;
+    /// let atomic_ptr = AtomicPtr::new(&mut data);
+    /// assert_eq!(unsafe { *atomic_ptr.into_inner() }, 5);
+    /// ```
+    #[inline]
+    pub fn into_inner(self) -> *mut T {
+        self.inner.into_inner()
+    }
+
+    /// Loads a value from the pointer.
+    ///
+    /// `load` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. Possible values are [`SeqCst`], [`Acquire`] and [`Relaxed`].
+    ///
+    /// # Panics
+    ///
+    /// Panics if `order` is [`Release`] or [`AcqRel`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    ///
+    /// let ptr = &mut 5;
+    /// let some_ptr = AtomicPtr::new(ptr);
+    ///
+    /// let value = some_ptr.load(Ordering::Relaxed);
+    /// ```
+    #[inline]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub fn load(&self, order: Ordering) -> *mut T {
+        self.inner.load(order)
+    }
+
+    /// Stores a value into the pointer.
+    ///
+    /// `store` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. Possible values are [`SeqCst`], [`Release`] and [`Relaxed`].
+    ///
+    /// # Panics
+    ///
+    /// Panics if `order` is [`Acquire`] or [`AcqRel`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    ///
+    /// let ptr = &mut 5;
+    /// let some_ptr = AtomicPtr::new(ptr);
+    ///
+    /// let other_ptr = &mut 10;
+    ///
+    /// some_ptr.store(other_ptr, Ordering::Relaxed);
+    /// ```
+    #[inline]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub fn store(&self, ptr: *mut T, order: Ordering) {
+        self.inner.store(ptr, order);
+    }
+
+    cfg_has_atomic_cas! {
+    /// Stores a value into the pointer, returning the previous value.
+    ///
+    /// `swap` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    ///
+    /// let ptr = &mut 5;
+    /// let some_ptr = AtomicPtr::new(ptr);
+    ///
+    /// let other_ptr = &mut 10;
+    ///
+    /// let value = some_ptr.swap(other_ptr, Ordering::Relaxed);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn swap(&self, ptr: *mut T, order: Ordering) -> *mut T {
+        self.inner.swap(ptr, order)
+    }
+
+    /// Stores a value into the pointer if the current value is the same as the `current` value.
+    ///
+    /// The return value is a result indicating whether the new value was written and containing
+    /// the previous value. On success this value is guaranteed to be equal to `current`.
+    ///
+    /// `compare_exchange` takes two [`Ordering`] arguments to describe the memory
+    /// ordering of this operation. `success` describes the required ordering for the
+    /// read-modify-write operation that takes place if the comparison with `current` succeeds.
+    /// `failure` describes the required ordering for the load operation that takes place when
+    /// the comparison fails. Using [`Acquire`] as success ordering makes the store part
+    /// of this operation [`Relaxed`], and using [`Release`] makes the successful load
+    /// [`Relaxed`]. The failure ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`].
+    ///
+    /// # Panics
+    ///
+    /// Panics if `failure` is [`Release`], [`AcqRel`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    ///
+    /// let ptr = &mut 5;
+    /// let some_ptr = AtomicPtr::new(ptr);
+    ///
+    /// let other_ptr = &mut 10;
+    ///
+    /// let value = some_ptr.compare_exchange(ptr, other_ptr, Ordering::SeqCst, Ordering::Relaxed);
+    /// ```
+    #[inline]
+    #[cfg_attr(portable_atomic_doc_cfg, doc(alias = "compare_and_swap"))]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub fn compare_exchange(
+        &self,
+        current: *mut T,
+        new: *mut T,
+        success: Ordering,
+        failure: Ordering,
+    ) -> Result<*mut T, *mut T> {
+        self.inner.compare_exchange(current, new, success, failure)
+    }
+
+    /// Stores a value into the pointer if the current value is the same as the `current` value.
+    ///
+    /// Unlike [`AtomicPtr::compare_exchange`], this function is allowed to spuriously fail even when the
+    /// comparison succeeds, which can result in more efficient code on some platforms. The
+    /// return value is a result indicating whether the new value was written and containing the
+    /// previous value.
+    ///
+    /// `compare_exchange_weak` takes two [`Ordering`] arguments to describe the memory
+    /// ordering of this operation. `success` describes the required ordering for the
+    /// read-modify-write operation that takes place if the comparison with `current` succeeds.
+    /// `failure` describes the required ordering for the load operation that takes place when
+    /// the comparison fails. Using [`Acquire`] as success ordering makes the store part
+    /// of this operation [`Relaxed`], and using [`Release`] makes the successful load
+    /// [`Relaxed`]. The failure ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`].
+    ///
+    /// # Panics
+    ///
+    /// Panics if `failure` is [`Release`], [`AcqRel`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    ///
+    /// let some_ptr = AtomicPtr::new(&mut 5);
+    ///
+    /// let new = &mut 10;
+    /// let mut old = some_ptr.load(Ordering::Relaxed);
+    /// loop {
+    ///     match some_ptr.compare_exchange_weak(old, new, Ordering::SeqCst, Ordering::Relaxed) {
+    ///         Ok(_) => break,
+    ///         Err(x) => old = x,
+    ///     }
+    /// }
+    /// ```
+    #[inline]
+    #[cfg_attr(portable_atomic_doc_cfg, doc(alias = "compare_and_swap"))]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub fn compare_exchange_weak(
+        &self,
+        current: *mut T,
+        new: *mut T,
+        success: Ordering,
+        failure: Ordering,
+    ) -> Result<*mut T, *mut T> {
+        self.inner.compare_exchange_weak(current, new, success, failure)
+    }
+
+    /// Fetches the value, and applies a function to it that returns an optional
+    /// new value. Returns a `Result` of `Ok(previous_value)` if the function
+    /// returned `Some(_)`, else `Err(previous_value)`.
+    ///
+    /// Note: This may call the function multiple times if the value has been
+    /// changed from other threads in the meantime, as long as the function
+    /// returns `Some(_)`, but the function will have been applied only once to
+    /// the stored value.
+    ///
+    /// `fetch_update` takes two [`Ordering`] arguments to describe the memory
+    /// ordering of this operation. The first describes the required ordering for
+    /// when the operation finally succeeds while the second describes the
+    /// required ordering for loads. These correspond to the success and failure
+    /// orderings of [`compare_exchange`](Self::compare_exchange) respectively.
+    ///
+    /// Using [`Acquire`] as success ordering makes the store part of this
+    /// operation [`Relaxed`], and using [`Release`] makes the final successful
+    /// load [`Relaxed`]. The (failed) load ordering can only be [`SeqCst`],
+    /// [`Acquire`] or [`Relaxed`].
+    ///
+    /// # Panics
+    ///
+    /// Panics if `fetch_order` is [`Release`], [`AcqRel`].
+    ///
+    /// # Considerations
+    ///
+    /// This method is not magic; it is not provided by the hardware.
+    /// It is implemented in terms of [`compare_exchange_weak`](Self::compare_exchange_weak),
+    /// and suffers from the same drawbacks.
+    /// In particular, this method will not circumvent the [ABA Problem].
+    ///
+    /// [ABA Problem]: https://en.wikipedia.org/wiki/ABA_problem
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    ///
+    /// let ptr: *mut _ = &mut 5;
+    /// let some_ptr = AtomicPtr::new(ptr);
+    ///
+    /// let new: *mut _ = &mut 10;
+    /// assert_eq!(some_ptr.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |_| None), Err(ptr));
+    /// let result = some_ptr.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |x| {
+    ///     if x == ptr {
+    ///         Some(new)
+    ///     } else {
+    ///         None
+    ///     }
+    /// });
+    /// assert_eq!(result, Ok(ptr));
+    /// assert_eq!(some_ptr.load(Ordering::SeqCst), new);
+    /// ```
+    #[inline]
+    #[cfg_attr(
+        any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+        track_caller
+    )]
+    pub fn fetch_update<F>(
+        &self,
+        set_order: Ordering,
+        fetch_order: Ordering,
+        mut f: F,
+    ) -> Result<*mut T, *mut T>
+    where
+        F: FnMut(*mut T) -> Option<*mut T>,
+    {
+        let mut prev = self.load(fetch_order);
+        while let Some(next) = f(prev) {
+            match self.compare_exchange_weak(prev, next, set_order, fetch_order) {
+                x @ Ok(_) => return x,
+                Err(next_prev) => prev = next_prev,
+            }
+        }
+        Err(prev)
+    }
+
+    #[cfg(miri)]
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    fn fetch_update_<F>(&self, order: Ordering, mut f: F) -> *mut T
+    where
+        F: FnMut(*mut T) -> *mut T,
+    {
+        // This is a private function and all instances of `f` only operate on the value
+        // loaded, so there is no need to synchronize the first load/failed CAS.
+        let mut prev = self.load(Ordering::Relaxed);
+        loop {
+            let next = f(prev);
+            match self.compare_exchange_weak(prev, next, order, Ordering::Relaxed) {
+                Ok(x) => return x,
+                Err(next_prev) => prev = next_prev,
+            }
+        }
+    }
+
+    /// Offsets the pointer's address by adding `val` (in units of `T`),
+    /// returning the previous pointer.
+    ///
+    /// This is equivalent to using [`wrapping_add`] to atomically perform the
+    /// equivalent of `ptr = ptr.wrapping_add(val);`.
+    ///
+    /// This method operates in units of `T`, which means that it cannot be used
+    /// to offset the pointer by an amount which is not a multiple of
+    /// `size_of::<T>()`. This can sometimes be inconvenient, as you may want to
+    /// work with a deliberately misaligned pointer. In such cases, you may use
+    /// the [`fetch_byte_add`](Self::fetch_byte_add) method instead.
+    ///
+    /// `fetch_ptr_add` takes an [`Ordering`] argument which describes the
+    /// memory ordering of this operation. All ordering modes are possible. Note
+    /// that using [`Acquire`] makes the store part of this operation
+    /// [`Relaxed`], and using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// [`wrapping_add`]: https://doc.rust-lang.org/std/primitive.pointer.html#method.wrapping_add
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![allow(unstable_name_collisions)]
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    /// use sptr::Strict; // stable polyfill for strict provenance
+    ///
+    /// let atom = AtomicPtr::<i64>::new(core::ptr::null_mut());
+    /// assert_eq!(atom.fetch_ptr_add(1, Ordering::Relaxed).addr(), 0);
+    /// // Note: units of `size_of::<i64>()`.
+    /// assert_eq!(atom.load(Ordering::Relaxed).addr(), 8);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn fetch_ptr_add(&self, val: usize, order: Ordering) -> *mut T {
+        self.fetch_byte_add(val.wrapping_mul(core::mem::size_of::<T>()), order)
+    }
+
+    /// Offsets the pointer's address by subtracting `val` (in units of `T`),
+    /// returning the previous pointer.
+    ///
+    /// This is equivalent to using [`wrapping_sub`] to atomically perform the
+    /// equivalent of `ptr = ptr.wrapping_sub(val);`.
+    ///
+    /// This method operates in units of `T`, which means that it cannot be used
+    /// to offset the pointer by an amount which is not a multiple of
+    /// `size_of::<T>()`. This can sometimes be inconvenient, as you may want to
+    /// work with a deliberately misaligned pointer. In such cases, you may use
+    /// the [`fetch_byte_sub`](Self::fetch_byte_sub) method instead.
+    ///
+    /// `fetch_ptr_sub` takes an [`Ordering`] argument which describes the memory
+    /// ordering of this operation. All ordering modes are possible. Note that
+    /// using [`Acquire`] makes the store part of this operation [`Relaxed`],
+    /// and using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// [`wrapping_sub`]: https://doc.rust-lang.org/std/primitive.pointer.html#method.wrapping_sub
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    ///
+    /// let array = [1i32, 2i32];
+    /// let atom = AtomicPtr::new(array.as_ptr().wrapping_add(1) as *mut _);
+    ///
+    /// assert!(core::ptr::eq(atom.fetch_ptr_sub(1, Ordering::Relaxed), &array[1],));
+    /// assert!(core::ptr::eq(atom.load(Ordering::Relaxed), &array[0]));
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn fetch_ptr_sub(&self, val: usize, order: Ordering) -> *mut T {
+        self.fetch_byte_sub(val.wrapping_mul(core::mem::size_of::<T>()), order)
+    }
+
+    /// Offsets the pointer's address by adding `val` *bytes*, returning the
+    /// previous pointer.
+    ///
+    /// This is equivalent to using [`wrapping_add`] and [`cast`] to atomically
+    /// perform `ptr = ptr.cast::<u8>().wrapping_add(val).cast::<T>()`.
+    ///
+    /// `fetch_byte_add` takes an [`Ordering`] argument which describes the
+    /// memory ordering of this operation. All ordering modes are possible. Note
+    /// that using [`Acquire`] makes the store part of this operation
+    /// [`Relaxed`], and using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// [`wrapping_add`]: https://doc.rust-lang.org/std/primitive.pointer.html#method.wrapping_add
+    /// [`cast`]: https://doc.rust-lang.org/std/primitive.pointer.html#method.cast
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![allow(unstable_name_collisions)]
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    /// use sptr::Strict; // stable polyfill for strict provenance
+    ///
+    /// let atom = AtomicPtr::<i64>::new(core::ptr::null_mut());
+    /// assert_eq!(atom.fetch_byte_add(1, Ordering::Relaxed).addr(), 0);
+    /// // Note: in units of bytes, not `size_of::<i64>()`.
+    /// assert_eq!(atom.load(Ordering::Relaxed).addr(), 1);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn fetch_byte_add(&self, val: usize, order: Ordering) -> *mut T {
+        // Ideally, we would always use AtomicPtr::fetch_* since it is strict-provenance
+        // compatible, but it is unstable. So, for now emulate it only on cfg(miri).
+        // Code using AtomicUsize::fetch_* via casts is still permissive-provenance
+        // compatible and is sound.
+        // TODO: Once `#![feature(strict_provenance_atomic_ptr)]` is stabilized,
+        // use AtomicPtr::fetch_* in all cases from the version in which it is stabilized.
+        #[cfg(miri)]
+        {
+            self.fetch_update_(order, |x| strict::map_addr(x, |x| x.wrapping_add(val)))
+        }
+        #[cfg(not(miri))]
+        {
+            self.as_atomic_usize().fetch_add(val, order) as *mut T
+        }
+    }
+
+    /// Offsets the pointer's address by subtracting `val` *bytes*, returning the
+    /// previous pointer.
+    ///
+    /// This is equivalent to using [`wrapping_sub`] and [`cast`] to atomically
+    /// perform `ptr = ptr.cast::<u8>().wrapping_sub(val).cast::<T>()`.
+    ///
+    /// `fetch_byte_sub` takes an [`Ordering`] argument which describes the
+    /// memory ordering of this operation. All ordering modes are possible. Note
+    /// that using [`Acquire`] makes the store part of this operation
+    /// [`Relaxed`], and using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// [`wrapping_sub`]: https://doc.rust-lang.org/std/primitive.pointer.html#method.wrapping_sub
+    /// [`cast`]: https://doc.rust-lang.org/std/primitive.pointer.html#method.cast
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![allow(unstable_name_collisions)]
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    /// use sptr::Strict; // stable polyfill for strict provenance
+    ///
+    /// let atom = AtomicPtr::<i64>::new(sptr::invalid_mut(1));
+    /// assert_eq!(atom.fetch_byte_sub(1, Ordering::Relaxed).addr(), 1);
+    /// assert_eq!(atom.load(Ordering::Relaxed).addr(), 0);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn fetch_byte_sub(&self, val: usize, order: Ordering) -> *mut T {
+        // Ideally, we would always use AtomicPtr::fetch_* since it is strict-provenance
+        // compatible, but it is unstable. So, for now emulate it only on cfg(miri).
+        // Code using AtomicUsize::fetch_* via casts is still permissive-provenance
+        // compatible and is sound.
+        // TODO: Once `#![feature(strict_provenance_atomic_ptr)]` is stabilized,
+        // use AtomicPtr::fetch_* in all cases from the version in which it is stabilized.
+        #[cfg(miri)]
+        {
+            self.fetch_update_(order, |x| strict::map_addr(x, |x| x.wrapping_sub(val)))
+        }
+        #[cfg(not(miri))]
+        {
+            self.as_atomic_usize().fetch_sub(val, order) as *mut T
+        }
+    }
+
+    /// Performs a bitwise "or" operation on the address of the current pointer,
+    /// and the argument `val`, and stores a pointer with provenance of the
+    /// current pointer and the resulting address.
+    ///
+    /// This is equivalent to using [`map_addr`] to atomically perform
+    /// `ptr = ptr.map_addr(|a| a | val)`. This can be used in tagged
+    /// pointer schemes to atomically set tag bits.
+    ///
+    /// **Caveat**: This operation returns the previous value. To compute the
+    /// stored value without losing provenance, you may use [`map_addr`]. For
+    /// example: `a.fetch_or(val).map_addr(|a| a | val)`.
+    ///
+    /// `fetch_or` takes an [`Ordering`] argument which describes the memory
+    /// ordering of this operation. All ordering modes are possible. Note that
+    /// using [`Acquire`] makes the store part of this operation [`Relaxed`],
+    /// and using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This API and its claimed semantics are part of the Strict Provenance
+    /// experiment, see the [module documentation for `ptr`][core::ptr] for
+    /// details.
+    ///
+    /// [`map_addr`]: https://doc.rust-lang.org/std/primitive.pointer.html#method.map_addr
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![allow(unstable_name_collisions)]
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    /// use sptr::Strict; // stable polyfill for strict provenance
+    ///
+    /// let pointer = &mut 3i64 as *mut i64;
+    ///
+    /// let atom = AtomicPtr::<i64>::new(pointer);
+    /// // Tag the bottom bit of the pointer.
+    /// assert_eq!(atom.fetch_or(1, Ordering::Relaxed).addr() & 1, 0);
+    /// // Extract and untag.
+    /// let tagged = atom.load(Ordering::Relaxed);
+    /// assert_eq!(tagged.addr() & 1, 1);
+    /// assert_eq!(tagged.map_addr(|p| p & !1), pointer);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn fetch_or(&self, val: usize, order: Ordering) -> *mut T {
+        // Ideally, we would always use AtomicPtr::fetch_* since it is strict-provenance
+        // compatible, but it is unstable. So, for now emulate it only on cfg(miri).
+        // Code using AtomicUsize::fetch_* via casts is still permissive-provenance
+        // compatible and is sound.
+        // TODO: Once `#![feature(strict_provenance_atomic_ptr)]` is stabilized,
+        // use AtomicPtr::fetch_* in all cases from the version in which it is stabilized.
+        #[cfg(miri)]
+        {
+            self.fetch_update_(order, |x| strict::map_addr(x, |x| x | val))
+        }
+        #[cfg(not(miri))]
+        {
+            self.as_atomic_usize().fetch_or(val, order) as *mut T
+        }
+    }
+
+    /// Performs a bitwise "and" operation on the address of the current
+    /// pointer, and the argument `val`, and stores a pointer with provenance of
+    /// the current pointer and the resulting address.
+    ///
+    /// This is equivalent to using [`map_addr`] to atomically perform
+    /// `ptr = ptr.map_addr(|a| a & val)`. This can be used in tagged
+    /// pointer schemes to atomically unset tag bits.
+    ///
+    /// **Caveat**: This operation returns the previous value. To compute the
+    /// stored value without losing provenance, you may use [`map_addr`]. For
+    /// example: `a.fetch_and(val).map_addr(|a| a & val)`.
+    ///
+    /// `fetch_and` takes an [`Ordering`] argument which describes the memory
+    /// ordering of this operation. All ordering modes are possible. Note that
+    /// using [`Acquire`] makes the store part of this operation [`Relaxed`],
+    /// and using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This API and its claimed semantics are part of the Strict Provenance
+    /// experiment, see the [module documentation for `ptr`][core::ptr] for
+    /// details.
+    ///
+    /// [`map_addr`]: https://doc.rust-lang.org/std/primitive.pointer.html#method.map_addr
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![allow(unstable_name_collisions)]
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    /// use sptr::Strict; // stable polyfill for strict provenance
+    ///
+    /// let pointer = &mut 3i64 as *mut i64;
+    /// // A tagged pointer
+    /// let atom = AtomicPtr::<i64>::new(pointer.map_addr(|a| a | 1));
+    /// assert_eq!(atom.fetch_or(1, Ordering::Relaxed).addr() & 1, 1);
+    /// // Untag, and extract the previously tagged pointer.
+    /// let untagged = atom.fetch_and(!1, Ordering::Relaxed).map_addr(|a| a & !1);
+    /// assert_eq!(untagged, pointer);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn fetch_and(&self, val: usize, order: Ordering) -> *mut T {
+        // Ideally, we would always use AtomicPtr::fetch_* since it is strict-provenance
+        // compatible, but it is unstable. So, for now emulate it only on cfg(miri).
+        // Code using AtomicUsize::fetch_* via casts is still permissive-provenance
+        // compatible and is sound.
+        // TODO: Once `#![feature(strict_provenance_atomic_ptr)]` is stabilized,
+        // use AtomicPtr::fetch_* in all cases from the version in which it is stabilized.
+        #[cfg(miri)]
+        {
+            self.fetch_update_(order, |x| strict::map_addr(x, |x| x & val))
+        }
+        #[cfg(not(miri))]
+        {
+            self.as_atomic_usize().fetch_and(val, order) as *mut T
+        }
+    }
+
+    /// Performs a bitwise "xor" operation on the address of the current
+    /// pointer, and the argument `val`, and stores a pointer with provenance of
+    /// the current pointer and the resulting address.
+    ///
+    /// This is equivalent to using [`map_addr`] to atomically perform
+    /// `ptr = ptr.map_addr(|a| a ^ val)`. This can be used in tagged
+    /// pointer schemes to atomically toggle tag bits.
+    ///
+    /// **Caveat**: This operation returns the previous value. To compute the
+    /// stored value without losing provenance, you may use [`map_addr`]. For
+    /// example: `a.fetch_xor(val).map_addr(|a| a ^ val)`.
+    ///
+    /// `fetch_xor` takes an [`Ordering`] argument which describes the memory
+    /// ordering of this operation. All ordering modes are possible. Note that
+    /// using [`Acquire`] makes the store part of this operation [`Relaxed`],
+    /// and using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This API and its claimed semantics are part of the Strict Provenance
+    /// experiment, see the [module documentation for `ptr`][core::ptr] for
+    /// details.
+    ///
+    /// [`map_addr`]: https://doc.rust-lang.org/std/primitive.pointer.html#method.map_addr
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![allow(unstable_name_collisions)]
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    /// use sptr::Strict; // stable polyfill for strict provenance
+    ///
+    /// let pointer = &mut 3i64 as *mut i64;
+    /// let atom = AtomicPtr::<i64>::new(pointer);
+    ///
+    /// // Toggle a tag bit on the pointer.
+    /// atom.fetch_xor(1, Ordering::Relaxed);
+    /// assert_eq!(atom.load(Ordering::Relaxed).addr() & 1, 1);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn fetch_xor(&self, val: usize, order: Ordering) -> *mut T {
+        // Ideally, we would always use AtomicPtr::fetch_* since it is strict-provenance
+        // compatible, but it is unstable. So, for now emulate it only on cfg(miri).
+        // Code using AtomicUsize::fetch_* via casts is still permissive-provenance
+        // compatible and is sound.
+        // TODO: Once `#![feature(strict_provenance_atomic_ptr)]` is stabilized,
+        // use AtomicPtr::fetch_* in all cases from the version in which it is stabilized.
+        #[cfg(miri)]
+        {
+            self.fetch_update_(order, |x| strict::map_addr(x, |x| x ^ val))
+        }
+        #[cfg(not(miri))]
+        {
+            self.as_atomic_usize().fetch_xor(val, order) as *mut T
+        }
+    }
+
+    /// Sets the bit at the specified bit-position to 1.
+    ///
+    /// Returns `true` if the specified bit was previously set to 1.
+    ///
+    /// `bit_set` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This corresponds to x86's `lock bts`, and the implementation calls them on x86/x86_64.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![allow(unstable_name_collisions)]
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    /// use sptr::Strict; // stable polyfill for strict provenance
+    ///
+    /// let pointer = &mut 3i64 as *mut i64;
+    ///
+    /// let atom = AtomicPtr::<i64>::new(pointer);
+    /// // Tag the bottom bit of the pointer.
+    /// assert!(!atom.bit_set(0, Ordering::Relaxed));
+    /// // Extract and untag.
+    /// let tagged = atom.load(Ordering::Relaxed);
+    /// assert_eq!(tagged.addr() & 1, 1);
+    /// assert_eq!(tagged.map_addr(|p| p & !1), pointer);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn bit_set(&self, bit: u32, order: Ordering) -> bool {
+        // Ideally, we would always use AtomicPtr::fetch_* since it is strict-provenance
+        // compatible, but it is unstable. So, for now emulate it only on cfg(miri).
+        // Code using AtomicUsize::fetch_* via casts is still permissive-provenance
+        // compatible and is sound.
+        // TODO: Once `#![feature(strict_provenance_atomic_ptr)]` is stabilized,
+        // use AtomicPtr::fetch_* in all cases from the version in which it is stabilized.
+        #[cfg(miri)]
+        {
+            let mask = 1_usize.wrapping_shl(bit);
+            self.fetch_or(mask, order) as usize & mask != 0
+        }
+        #[cfg(not(miri))]
+        {
+            self.as_atomic_usize().bit_set(bit, order)
+        }
+    }
+
+    /// Clears the bit at the specified bit-position to 1.
+    ///
+    /// Returns `true` if the specified bit was previously set to 1.
+    ///
+    /// `bit_clear` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This corresponds to x86's `lock btr`, and the implementation calls them on x86/x86_64.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![allow(unstable_name_collisions)]
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    /// use sptr::Strict; // stable polyfill for strict provenance
+    ///
+    /// let pointer = &mut 3i64 as *mut i64;
+    /// // A tagged pointer
+    /// let atom = AtomicPtr::<i64>::new(pointer.map_addr(|a| a | 1));
+    /// assert!(atom.bit_set(0, Ordering::Relaxed));
+    /// // Untag
+    /// assert!(atom.bit_clear(0, Ordering::Relaxed));
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn bit_clear(&self, bit: u32, order: Ordering) -> bool {
+        // Ideally, we would always use AtomicPtr::fetch_* since it is strict-provenance
+        // compatible, but it is unstable. So, for now emulate it only on cfg(miri).
+        // Code using AtomicUsize::fetch_* via casts is still permissive-provenance
+        // compatible and is sound.
+        // TODO: Once `#![feature(strict_provenance_atomic_ptr)]` is stabilized,
+        // use AtomicPtr::fetch_* in all cases from the version in which it is stabilized.
+        #[cfg(miri)]
+        {
+            let mask = 1_usize.wrapping_shl(bit);
+            self.fetch_and(!mask, order) as usize & mask != 0
+        }
+        #[cfg(not(miri))]
+        {
+            self.as_atomic_usize().bit_clear(bit, order)
+        }
+    }
+
+    /// Toggles the bit at the specified bit-position.
+    ///
+    /// Returns `true` if the specified bit was previously set to 1.
+    ///
+    /// `bit_toggle` takes an [`Ordering`] argument which describes the memory ordering
+    /// of this operation. All ordering modes are possible. Note that using
+    /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+    /// using [`Release`] makes the load part [`Relaxed`].
+    ///
+    /// This corresponds to x86's `lock btc`, and the implementation calls them on x86/x86_64.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![allow(unstable_name_collisions)]
+    /// use portable_atomic::{AtomicPtr, Ordering};
+    /// use sptr::Strict; // stable polyfill for strict provenance
+    ///
+    /// let pointer = &mut 3i64 as *mut i64;
+    /// let atom = AtomicPtr::<i64>::new(pointer);
+    ///
+    /// // Toggle a tag bit on the pointer.
+    /// atom.bit_toggle(0, Ordering::Relaxed);
+    /// assert_eq!(atom.load(Ordering::Relaxed).addr() & 1, 1);
+    /// ```
+    #[inline]
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub fn bit_toggle(&self, bit: u32, order: Ordering) -> bool {
+        // Ideally, we would always use AtomicPtr::fetch_* since it is strict-provenance
+        // compatible, but it is unstable. So, for now emulate it only on cfg(miri).
+        // Code using AtomicUsize::fetch_* via casts is still permissive-provenance
+        // compatible and is sound.
+        // TODO: Once `#![feature(strict_provenance_atomic_ptr)]` is stabilized,
+        // use AtomicPtr::fetch_* in all cases from the version in which it is stabilized.
+        #[cfg(miri)]
+        {
+            let mask = 1_usize.wrapping_shl(bit);
+            self.fetch_xor(mask, order) as usize & mask != 0
+        }
+        #[cfg(not(miri))]
+        {
+            self.as_atomic_usize().bit_toggle(bit, order)
+        }
+    }
+
+    #[cfg(not(miri))]
+    #[inline]
+    fn as_atomic_usize(&self) -> &AtomicUsize {
+        static_assert!(
+            core::mem::size_of::<AtomicPtr<()>>() == core::mem::size_of::<AtomicUsize>()
+        );
+        static_assert!(
+            core::mem::align_of::<AtomicPtr<()>>() == core::mem::align_of::<AtomicUsize>()
+        );
+        // SAFETY: AtomicPtr and AtomicUsize have the same layout,
+        // and both access data in the same way.
+        unsafe { &*(self as *const Self as *const AtomicUsize) }
+    }
+    } // cfg_has_atomic_cas!
+
+    const_fn! {
+        const_if: #[cfg(not(portable_atomic_no_const_raw_ptr_deref))];
+        /// Returns a mutable pointer to the underlying pointer.
+        ///
+        /// Returning an `*mut` pointer from a shared reference to this atomic is
+        /// safe because the atomic types work with interior mutability. Any use of
+        /// the returned raw pointer requires an `unsafe` block and has to uphold
+        /// the safety requirements. If there is concurrent access, note the following
+        /// additional safety requirements:
+        ///
+        /// - If this atomic type is [lock-free](Self::is_lock_free), any concurrent
+        ///   operations on it must be atomic.
+        /// - Otherwise, any concurrent operations on it must be compatible with
+        ///   operations performed by this atomic type.
+        ///
+        /// This is `const fn` on Rust 1.58+.
+        #[inline]
+        pub const fn as_ptr(&self) -> *mut *mut T {
+            self.inner.as_ptr()
+        }
+    }
+}
+} // cfg_has_atomic_ptr!
+
+macro_rules! atomic_int {
+    // TODO: support AtomicF{16,128} once https://github.com/rust-lang/rust/issues/116909 stabilized.
+    (AtomicU32, $int_type:ident, $align:literal) => {
+        atomic_int!(int, AtomicU32, $int_type, $align);
+        #[cfg(feature = "float")]
+        atomic_int!(float, AtomicF32, f32, AtomicU32, $int_type, $align);
+    };
+    (AtomicU64, $int_type:ident, $align:literal) => {
+        atomic_int!(int, AtomicU64, $int_type, $align);
+        #[cfg(feature = "float")]
+        atomic_int!(float, AtomicF64, f64, AtomicU64, $int_type, $align);
+    };
+    ($atomic_type:ident, $int_type:ident, $align:literal) => {
+        atomic_int!(int, $atomic_type, $int_type, $align);
+    };
+
+    // Atomic{I,U}* impls
+    (int, $atomic_type:ident, $int_type:ident, $align:literal) => {
+        doc_comment! {
+            concat!("An integer type which can be safely shared between threads.
+
+This type has the same in-memory representation as the underlying integer type,
+[`", stringify!($int_type), "`].
+
+If the compiler and the platform support atomic loads and stores of [`", stringify!($int_type),
+"`], this type is a wrapper for the standard library's `", stringify!($atomic_type),
+"`. If the platform supports it but the compiler does not, atomic operations are implemented using
+inline assembly. Otherwise synchronizes using global locks.
+You can call [`", stringify!($atomic_type), "::is_lock_free()`] to check whether
+atomic instructions or locks will be used.
+"
+            ),
+            // We can use #[repr(transparent)] here, but #[repr(C, align(N))]
+            // will show clearer docs.
+            #[repr(C, align($align))]
+            pub struct $atomic_type {
+                inner: imp::$atomic_type,
+            }
+        }
+
+        impl Default for $atomic_type {
+            #[inline]
+            fn default() -> Self {
+                Self::new($int_type::default())
+            }
+        }
+
+        impl From<$int_type> for $atomic_type {
+            #[inline]
+            fn from(v: $int_type) -> Self {
+                Self::new(v)
+            }
+        }
+
+        // UnwindSafe is implicitly implemented.
+        #[cfg(not(portable_atomic_no_core_unwind_safe))]
+        impl core::panic::RefUnwindSafe for $atomic_type {}
+        #[cfg(all(portable_atomic_no_core_unwind_safe, feature = "std"))]
+        impl std::panic::RefUnwindSafe for $atomic_type {}
+
+        impl_debug_and_serde!($atomic_type);
+
+        impl $atomic_type {
+            doc_comment! {
+                concat!(
+                    "Creates a new atomic integer.
+
+# Examples
+
+```
+use portable_atomic::", stringify!($atomic_type), ";
+
+let atomic_forty_two = ", stringify!($atomic_type), "::new(42);
+```"
+                ),
+                #[inline]
+                #[must_use]
+                pub const fn new(v: $int_type) -> Self {
+                    static_assert_layout!($atomic_type, $int_type);
+                    Self { inner: imp::$atomic_type::new(v) }
+                }
+            }
+
+            doc_comment! {
+                concat!("Creates a new reference to an atomic integer from a pointer.
+
+# Safety
+
+* `ptr` must be aligned to `align_of::<", stringify!($atomic_type), ">()` (note that on some platforms this
+  can be bigger than `align_of::<", stringify!($int_type), ">()`).
+* `ptr` must be [valid] for both reads and writes for the whole lifetime `'a`.
+* If this atomic type is [lock-free](Self::is_lock_free), non-atomic accesses to the value
+  behind `ptr` must have a happens-before relationship with atomic accesses via
+  the returned value (or vice-versa).
+  * In other words, time periods where the value is accessed atomically may not
+    overlap with periods where the value is accessed non-atomically.
+  * This requirement is trivially satisfied if `ptr` is never used non-atomically
+    for the duration of lifetime `'a`. Most use cases should be able to follow
+    this guideline.
+  * This requirement is also trivially satisfied if all accesses (atomic or not) are
+    done from the same thread.
+* If this atomic type is *not* lock-free:
+  * Any accesses to the value behind `ptr` must have a happens-before relationship
+    with accesses via the returned value (or vice-versa).
+  * Any concurrent accesses to the value behind `ptr` for the duration of lifetime `'a` must
+    be compatible with operations performed by this atomic type.
+* This method must not be used to create overlapping or mixed-size atomic
+  accesses, as these are not supported by the memory model.
+
+[valid]: core::ptr#safety"),
+                #[inline]
+                #[must_use]
+                pub unsafe fn from_ptr<'a>(ptr: *mut $int_type) -> &'a Self {
+                    #[allow(clippy::cast_ptr_alignment)]
+                    // SAFETY: guaranteed by the caller
+                    unsafe { &*(ptr as *mut Self) }
+                }
+            }
+
+            doc_comment! {
+                concat!("Returns `true` if operations on values of this type are lock-free.
+
+If the compiler or the platform doesn't support the necessary
+atomic instructions, global locks for every potentially
+concurrent atomic operation will be used.
+
+# Examples
+
+```
+use portable_atomic::", stringify!($atomic_type), ";
+
+let is_lock_free = ", stringify!($atomic_type), "::is_lock_free();
+```"),
+                #[inline]
+                #[must_use]
+                pub fn is_lock_free() -> bool {
+                    <imp::$atomic_type>::is_lock_free()
+                }
+            }
+
+            doc_comment! {
+                concat!("Returns `true` if operations on values of this type are lock-free.
+
+If the compiler or the platform doesn't support the necessary
+atomic instructions, global locks for every potentially
+concurrent atomic operation will be used.
+
+**Note:** If the atomic operation relies on dynamic CPU feature detection,
+this type may be lock-free even if the function returns false.
+
+# Examples
+
+```
+use portable_atomic::", stringify!($atomic_type), ";
+
+const IS_ALWAYS_LOCK_FREE: bool = ", stringify!($atomic_type), "::is_always_lock_free();
+```"),
+                #[inline]
+                #[must_use]
+                pub const fn is_always_lock_free() -> bool {
+                    <imp::$atomic_type>::is_always_lock_free()
+                }
+            }
+
+            doc_comment! {
+                concat!("Returns a mutable reference to the underlying integer.\n
+This is safe because the mutable reference guarantees that no other threads are
+concurrently accessing the atomic data.
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let mut some_var = ", stringify!($atomic_type), "::new(10);
+assert_eq!(*some_var.get_mut(), 10);
+*some_var.get_mut() = 5;
+assert_eq!(some_var.load(Ordering::SeqCst), 5);
+```"),
+                #[inline]
+                pub fn get_mut(&mut self) -> &mut $int_type {
+                    self.inner.get_mut()
+                }
+            }
+
+            // TODO: Add from_mut/get_mut_slice/from_mut_slice once it is stable on std atomic types.
+            // https://github.com/rust-lang/rust/issues/76314
+
+            doc_comment! {
+                concat!("Consumes the atomic and returns the contained value.
+
+This is safe because passing `self` by value guarantees that no other threads are
+concurrently accessing the atomic data.
+
+# Examples
+
+```
+use portable_atomic::", stringify!($atomic_type), ";
+
+let some_var = ", stringify!($atomic_type), "::new(5);
+assert_eq!(some_var.into_inner(), 5);
+```"),
+                #[inline]
+                pub fn into_inner(self) -> $int_type {
+                    self.inner.into_inner()
+                }
+            }
+
+            doc_comment! {
+                concat!("Loads a value from the atomic integer.
+
+`load` takes an [`Ordering`] argument which describes the memory ordering of this operation.
+Possible values are [`SeqCst`], [`Acquire`] and [`Relaxed`].
+
+# Panics
+
+Panics if `order` is [`Release`] or [`AcqRel`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let some_var = ", stringify!($atomic_type), "::new(5);
+
+assert_eq!(some_var.load(Ordering::Relaxed), 5);
+```"),
+                #[inline]
+                #[cfg_attr(
+                    any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                    track_caller
+                )]
+                pub fn load(&self, order: Ordering) -> $int_type {
+                    self.inner.load(order)
+                }
+            }
+
+            doc_comment! {
+                concat!("Stores a value into the atomic integer.
+
+`store` takes an [`Ordering`] argument which describes the memory ordering of this operation.
+Possible values are [`SeqCst`], [`Release`] and [`Relaxed`].
+
+# Panics
+
+Panics if `order` is [`Acquire`] or [`AcqRel`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let some_var = ", stringify!($atomic_type), "::new(5);
+
+some_var.store(10, Ordering::Relaxed);
+assert_eq!(some_var.load(Ordering::Relaxed), 10);
+```"),
+                #[inline]
+                #[cfg_attr(
+                    any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                    track_caller
+                )]
+                pub fn store(&self, val: $int_type, order: Ordering) {
+                    self.inner.store(val, order)
+                }
+            }
+
+            cfg_has_atomic_cas! {
+            doc_comment! {
+                concat!("Stores a value into the atomic integer, returning the previous value.
+
+`swap` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let some_var = ", stringify!($atomic_type), "::new(5);
+
+assert_eq!(some_var.swap(10, Ordering::Relaxed), 5);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn swap(&self, val: $int_type, order: Ordering) -> $int_type {
+                    self.inner.swap(val, order)
+                }
+            }
+
+            doc_comment! {
+                concat!("Stores a value into the atomic integer if the current value is the same as
+the `current` value.
+
+The return value is a result indicating whether the new value was written and
+containing the previous value. On success this value is guaranteed to be equal to
+`current`.
+
+`compare_exchange` takes two [`Ordering`] arguments to describe the memory
+ordering of this operation. `success` describes the required ordering for the
+read-modify-write operation that takes place if the comparison with `current` succeeds.
+`failure` describes the required ordering for the load operation that takes place when
+the comparison fails. Using [`Acquire`] as success ordering makes the store part
+of this operation [`Relaxed`], and using [`Release`] makes the successful load
+[`Relaxed`]. The failure ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`].
+
+# Panics
+
+Panics if `failure` is [`Release`], [`AcqRel`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let some_var = ", stringify!($atomic_type), "::new(5);
+
+assert_eq!(
+    some_var.compare_exchange(5, 10, Ordering::Acquire, Ordering::Relaxed),
+    Ok(5),
+);
+assert_eq!(some_var.load(Ordering::Relaxed), 10);
+
+assert_eq!(
+    some_var.compare_exchange(6, 12, Ordering::SeqCst, Ordering::Acquire),
+    Err(10),
+);
+assert_eq!(some_var.load(Ordering::Relaxed), 10);
+```"),
+                #[inline]
+                #[cfg_attr(portable_atomic_doc_cfg, doc(alias = "compare_and_swap"))]
+                #[cfg_attr(
+                    any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                    track_caller
+                )]
+                pub fn compare_exchange(
+                    &self,
+                    current: $int_type,
+                    new: $int_type,
+                    success: Ordering,
+                    failure: Ordering,
+                ) -> Result<$int_type, $int_type> {
+                    self.inner.compare_exchange(current, new, success, failure)
+                }
+            }
+
+            doc_comment! {
+                concat!("Stores a value into the atomic integer if the current value is the same as
+the `current` value.
+Unlike [`compare_exchange`](Self::compare_exchange)
+this function is allowed to spuriously fail even
+when the comparison succeeds, which can result in more efficient code on some
+platforms. The return value is a result indicating whether the new value was
+written and containing the previous value.
+
+`compare_exchange_weak` takes two [`Ordering`] arguments to describe the memory
+ordering of this operation. `success` describes the required ordering for the
+read-modify-write operation that takes place if the comparison with `current` succeeds.
+`failure` describes the required ordering for the load operation that takes place when
+the comparison fails. Using [`Acquire`] as success ordering makes the store part
+of this operation [`Relaxed`], and using [`Release`] makes the successful load
+[`Relaxed`]. The failure ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`].
+
+# Panics
+
+Panics if `failure` is [`Release`], [`AcqRel`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let val = ", stringify!($atomic_type), "::new(4);
+
+let mut old = val.load(Ordering::Relaxed);
+loop {
+    let new = old * 2;
+    match val.compare_exchange_weak(old, new, Ordering::SeqCst, Ordering::Relaxed) {
+        Ok(_) => break,
+        Err(x) => old = x,
+    }
+}
+```"),
+                #[inline]
+                #[cfg_attr(portable_atomic_doc_cfg, doc(alias = "compare_and_swap"))]
+                #[cfg_attr(
+                    any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                    track_caller
+                )]
+                pub fn compare_exchange_weak(
+                    &self,
+                    current: $int_type,
+                    new: $int_type,
+                    success: Ordering,
+                    failure: Ordering,
+                ) -> Result<$int_type, $int_type> {
+                    self.inner.compare_exchange_weak(current, new, success, failure)
+                }
+            }
+
+            doc_comment! {
+                concat!("Adds to the current value, returning the previous value.
+
+This operation wraps around on overflow.
+
+`fetch_add` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0);
+assert_eq!(foo.fetch_add(10, Ordering::SeqCst), 0);
+assert_eq!(foo.load(Ordering::SeqCst), 10);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn fetch_add(&self, val: $int_type, order: Ordering) -> $int_type {
+                    self.inner.fetch_add(val, order)
+                }
+            }
+
+            doc_comment! {
+                concat!("Adds to the current value.
+
+This operation wraps around on overflow.
+
+Unlike `fetch_add`, this does not return the previous value.
+
+`add` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This function may generate more efficient code than `fetch_add` on some platforms.
+
+- MSP430: `add` instead of disabling interrupts ({8,16}-bit atomics)
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0);
+foo.add(10, Ordering::SeqCst);
+assert_eq!(foo.load(Ordering::SeqCst), 10);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn add(&self, val: $int_type, order: Ordering) {
+                    self.inner.add(val, order);
+                }
+            }
+
+            doc_comment! {
+                concat!("Subtracts from the current value, returning the previous value.
+
+This operation wraps around on overflow.
+
+`fetch_sub` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(20);
+assert_eq!(foo.fetch_sub(10, Ordering::SeqCst), 20);
+assert_eq!(foo.load(Ordering::SeqCst), 10);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn fetch_sub(&self, val: $int_type, order: Ordering) -> $int_type {
+                    self.inner.fetch_sub(val, order)
+                }
+            }
+
+            doc_comment! {
+                concat!("Subtracts from the current value.
+
+This operation wraps around on overflow.
+
+Unlike `fetch_sub`, this does not return the previous value.
+
+`sub` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This function may generate more efficient code than `fetch_sub` on some platforms.
+
+- MSP430: `sub` instead of disabling interrupts ({8,16}-bit atomics)
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(20);
+foo.sub(10, Ordering::SeqCst);
+assert_eq!(foo.load(Ordering::SeqCst), 10);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn sub(&self, val: $int_type, order: Ordering) {
+                    self.inner.sub(val, order);
+                }
+            }
+
+            doc_comment! {
+                concat!("Bitwise \"and\" with the current value.
+
+Performs a bitwise \"and\" operation on the current value and the argument `val`, and
+sets the new value to the result.
+
+Returns the previous value.
+
+`fetch_and` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0b101101);
+assert_eq!(foo.fetch_and(0b110011, Ordering::SeqCst), 0b101101);
+assert_eq!(foo.load(Ordering::SeqCst), 0b100001);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type {
+                    self.inner.fetch_and(val, order)
+                }
+            }
+
+            doc_comment! {
+                concat!("Bitwise \"and\" with the current value.
+
+Performs a bitwise \"and\" operation on the current value and the argument `val`, and
+sets the new value to the result.
+
+Unlike `fetch_and`, this does not return the previous value.
+
+`and` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This function may generate more efficient code than `fetch_and` on some platforms.
+
+- x86/x86_64: `lock and` instead of `cmpxchg` loop ({8,16,32}-bit atomics on x86, but additionally 64-bit atomics on x86_64)
+- MSP430: `and` instead of disabling interrupts ({8,16}-bit atomics)
+
+Note: On x86/x86_64, the use of either function should not usually
+affect the generated code, because LLVM can properly optimize the case
+where the result is unused.
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0b101101);
+assert_eq!(foo.fetch_and(0b110011, Ordering::SeqCst), 0b101101);
+assert_eq!(foo.load(Ordering::SeqCst), 0b100001);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn and(&self, val: $int_type, order: Ordering) {
+                    self.inner.and(val, order);
+                }
+            }
+
+            doc_comment! {
+                concat!("Bitwise \"nand\" with the current value.
+
+Performs a bitwise \"nand\" operation on the current value and the argument `val`, and
+sets the new value to the result.
+
+Returns the previous value.
+
+`fetch_nand` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0x13);
+assert_eq!(foo.fetch_nand(0x31, Ordering::SeqCst), 0x13);
+assert_eq!(foo.load(Ordering::SeqCst), !(0x13 & 0x31));
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn fetch_nand(&self, val: $int_type, order: Ordering) -> $int_type {
+                    self.inner.fetch_nand(val, order)
+                }
+            }
+
+            doc_comment! {
+                concat!("Bitwise \"or\" with the current value.
+
+Performs a bitwise \"or\" operation on the current value and the argument `val`, and
+sets the new value to the result.
+
+Returns the previous value.
+
+`fetch_or` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0b101101);
+assert_eq!(foo.fetch_or(0b110011, Ordering::SeqCst), 0b101101);
+assert_eq!(foo.load(Ordering::SeqCst), 0b111111);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type {
+                    self.inner.fetch_or(val, order)
+                }
+            }
+
+            doc_comment! {
+                concat!("Bitwise \"or\" with the current value.
+
+Performs a bitwise \"or\" operation on the current value and the argument `val`, and
+sets the new value to the result.
+
+Unlike `fetch_or`, this does not return the previous value.
+
+`or` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This function may generate more efficient code than `fetch_or` on some platforms.
+
+- x86/x86_64: `lock or` instead of `cmpxchg` loop ({8,16,32}-bit atomics on x86, but additionally 64-bit atomics on x86_64)
+- MSP430: `or` instead of disabling interrupts ({8,16}-bit atomics)
+
+Note: On x86/x86_64, the use of either function should not usually
+affect the generated code, because LLVM can properly optimize the case
+where the result is unused.
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0b101101);
+assert_eq!(foo.fetch_or(0b110011, Ordering::SeqCst), 0b101101);
+assert_eq!(foo.load(Ordering::SeqCst), 0b111111);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn or(&self, val: $int_type, order: Ordering) {
+                    self.inner.or(val, order);
+                }
+            }
+
+            doc_comment! {
+                concat!("Bitwise \"xor\" with the current value.
+
+Performs a bitwise \"xor\" operation on the current value and the argument `val`, and
+sets the new value to the result.
+
+Returns the previous value.
+
+`fetch_xor` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0b101101);
+assert_eq!(foo.fetch_xor(0b110011, Ordering::SeqCst), 0b101101);
+assert_eq!(foo.load(Ordering::SeqCst), 0b011110);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type {
+                    self.inner.fetch_xor(val, order)
+                }
+            }
+
+            doc_comment! {
+                concat!("Bitwise \"xor\" with the current value.
+
+Performs a bitwise \"xor\" operation on the current value and the argument `val`, and
+sets the new value to the result.
+
+Unlike `fetch_xor`, this does not return the previous value.
+
+`xor` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This function may generate more efficient code than `fetch_xor` on some platforms.
+
+- x86/x86_64: `lock xor` instead of `cmpxchg` loop ({8,16,32}-bit atomics on x86, but additionally 64-bit atomics on x86_64)
+- MSP430: `xor` instead of disabling interrupts ({8,16}-bit atomics)
+
+Note: On x86/x86_64, the use of either function should not usually
+affect the generated code, because LLVM can properly optimize the case
+where the result is unused.
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0b101101);
+foo.xor(0b110011, Ordering::SeqCst);
+assert_eq!(foo.load(Ordering::SeqCst), 0b011110);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn xor(&self, val: $int_type, order: Ordering) {
+                    self.inner.xor(val, order);
+                }
+            }
+
+            doc_comment! {
+                concat!("Fetches the value, and applies a function to it that returns an optional
+new value. Returns a `Result` of `Ok(previous_value)` if the function returned `Some(_)`, else
+`Err(previous_value)`.
+
+Note: This may call the function multiple times if the value has been changed from other threads in
+the meantime, as long as the function returns `Some(_)`, but the function will have been applied
+only once to the stored value.
+
+`fetch_update` takes two [`Ordering`] arguments to describe the memory ordering of this operation.
+The first describes the required ordering for when the operation finally succeeds while the second
+describes the required ordering for loads. These correspond to the success and failure orderings of
+[`compare_exchange`](Self::compare_exchange) respectively.
+
+Using [`Acquire`] as success ordering makes the store part
+of this operation [`Relaxed`], and using [`Release`] makes the final successful load
+[`Relaxed`]. The (failed) load ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`].
+
+# Panics
+
+Panics if `fetch_order` is [`Release`], [`AcqRel`].
+
+# Considerations
+
+This method is not magic; it is not provided by the hardware.
+It is implemented in terms of [`compare_exchange_weak`](Self::compare_exchange_weak),
+and suffers from the same drawbacks.
+In particular, this method will not circumvent the [ABA Problem].
+
+[ABA Problem]: https://en.wikipedia.org/wiki/ABA_problem
+
+# Examples
+
+```rust
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let x = ", stringify!($atomic_type), "::new(7);
+assert_eq!(x.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |_| None), Err(7));
+assert_eq!(x.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |x| Some(x + 1)), Ok(7));
+assert_eq!(x.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |x| Some(x + 1)), Ok(8));
+assert_eq!(x.load(Ordering::SeqCst), 9);
+```"),
+                #[inline]
+                #[cfg_attr(
+                    any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                    track_caller
+                )]
+                pub fn fetch_update<F>(
+                    &self,
+                    set_order: Ordering,
+                    fetch_order: Ordering,
+                    mut f: F,
+                ) -> Result<$int_type, $int_type>
+                where
+                    F: FnMut($int_type) -> Option<$int_type>,
+                {
+                    let mut prev = self.load(fetch_order);
+                    while let Some(next) = f(prev) {
+                        match self.compare_exchange_weak(prev, next, set_order, fetch_order) {
+                            x @ Ok(_) => return x,
+                            Err(next_prev) => prev = next_prev,
+                        }
+                    }
+                    Err(prev)
+                }
+            }
+
+            doc_comment! {
+                concat!("Maximum with the current value.
+
+Finds the maximum of the current value and the argument `val`, and
+sets the new value to the result.
+
+Returns the previous value.
+
+`fetch_max` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(23);
+assert_eq!(foo.fetch_max(42, Ordering::SeqCst), 23);
+assert_eq!(foo.load(Ordering::SeqCst), 42);
+```
+
+If you want to obtain the maximum value in one step, you can use the following:
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(23);
+let bar = 42;
+let max_foo = foo.fetch_max(bar, Ordering::SeqCst).max(bar);
+assert!(max_foo == 42);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type {
+                    self.inner.fetch_max(val, order)
+                }
+            }
+
+            doc_comment! {
+                concat!("Minimum with the current value.
+
+Finds the minimum of the current value and the argument `val`, and
+sets the new value to the result.
+
+Returns the previous value.
+
+`fetch_min` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(23);
+assert_eq!(foo.fetch_min(42, Ordering::Relaxed), 23);
+assert_eq!(foo.load(Ordering::Relaxed), 23);
+assert_eq!(foo.fetch_min(22, Ordering::Relaxed), 23);
+assert_eq!(foo.load(Ordering::Relaxed), 22);
+```
+
+If you want to obtain the minimum value in one step, you can use the following:
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(23);
+let bar = 12;
+let min_foo = foo.fetch_min(bar, Ordering::SeqCst).min(bar);
+assert_eq!(min_foo, 12);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type {
+                    self.inner.fetch_min(val, order)
+                }
+            }
+
+            doc_comment! {
+                concat!("Sets the bit at the specified bit-position to 1.
+
+Returns `true` if the specified bit was previously set to 1.
+
+`bit_set` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This corresponds to x86's `lock bts`, and the implementation calls them on x86/x86_64.
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0b0000);
+assert!(!foo.bit_set(0, Ordering::Relaxed));
+assert_eq!(foo.load(Ordering::Relaxed), 0b0001);
+assert!(foo.bit_set(0, Ordering::Relaxed));
+assert_eq!(foo.load(Ordering::Relaxed), 0b0001);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn bit_set(&self, bit: u32, order: Ordering) -> bool {
+                    self.inner.bit_set(bit, order)
+                }
+            }
+
+            doc_comment! {
+                concat!("Clears the bit at the specified bit-position to 1.
+
+Returns `true` if the specified bit was previously set to 1.
+
+`bit_clear` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This corresponds to x86's `lock btr`, and the implementation calls them on x86/x86_64.
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0b0001);
+assert!(foo.bit_clear(0, Ordering::Relaxed));
+assert_eq!(foo.load(Ordering::Relaxed), 0b0000);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn bit_clear(&self, bit: u32, order: Ordering) -> bool {
+                    self.inner.bit_clear(bit, order)
+                }
+            }
+
+            doc_comment! {
+                concat!("Toggles the bit at the specified bit-position.
+
+Returns `true` if the specified bit was previously set to 1.
+
+`bit_toggle` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This corresponds to x86's `lock btc`, and the implementation calls them on x86/x86_64.
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0b0000);
+assert!(!foo.bit_toggle(0, Ordering::Relaxed));
+assert_eq!(foo.load(Ordering::Relaxed), 0b0001);
+assert!(foo.bit_toggle(0, Ordering::Relaxed));
+assert_eq!(foo.load(Ordering::Relaxed), 0b0000);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn bit_toggle(&self, bit: u32, order: Ordering) -> bool {
+                    self.inner.bit_toggle(bit, order)
+                }
+            }
+
+            doc_comment! {
+                concat!("Logical negates the current value, and sets the new value to the result.
+
+Returns the previous value.
+
+`fetch_not` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0);
+assert_eq!(foo.fetch_not(Ordering::Relaxed), 0);
+assert_eq!(foo.load(Ordering::Relaxed), !0);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn fetch_not(&self, order: Ordering) -> $int_type {
+                    self.inner.fetch_not(order)
+                }
+
+                doc_comment! {
+                    concat!("Logical negates the current value, and sets the new value to the result.
+
+Unlike `fetch_not`, this does not return the previous value.
+
+`not` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This function may generate more efficient code than `fetch_not` on some platforms.
+
+- x86/x86_64: `lock not` instead of `cmpxchg` loop ({8,16,32}-bit atomics on x86, but additionally 64-bit atomics on x86_64)
+- MSP430: `inv` instead of disabling interrupts ({8,16}-bit atomics)
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(0);
+foo.not(Ordering::Relaxed);
+assert_eq!(foo.load(Ordering::Relaxed), !0);
+```"),
+                    #[inline]
+                    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                    pub fn not(&self, order: Ordering) {
+                        self.inner.not(order);
+                    }
+                }
+            }
+
+            doc_comment! {
+                concat!("Negates the current value, and sets the new value to the result.
+
+Returns the previous value.
+
+`fetch_neg` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(5);
+assert_eq!(foo.fetch_neg(Ordering::Relaxed), 5);
+assert_eq!(foo.load(Ordering::Relaxed), 5_", stringify!($int_type), ".wrapping_neg());
+assert_eq!(foo.fetch_neg(Ordering::Relaxed), 5_", stringify!($int_type), ".wrapping_neg());
+assert_eq!(foo.load(Ordering::Relaxed), 5);
+```"),
+                #[inline]
+                #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                pub fn fetch_neg(&self, order: Ordering) -> $int_type {
+                    self.inner.fetch_neg(order)
+                }
+
+                doc_comment! {
+                    concat!("Negates the current value, and sets the new value to the result.
+
+Unlike `fetch_neg`, this does not return the previous value.
+
+`neg` takes an [`Ordering`] argument which describes the memory ordering
+of this operation. All ordering modes are possible. Note that using
+[`Acquire`] makes the store part of this operation [`Relaxed`], and
+using [`Release`] makes the load part [`Relaxed`].
+
+This function may generate more efficient code than `fetch_neg` on some platforms.
+
+- x86/x86_64: `lock neg` instead of `cmpxchg` loop ({8,16,32}-bit atomics on x86, but additionally 64-bit atomics on x86_64)
+
+# Examples
+
+```
+use portable_atomic::{", stringify!($atomic_type), ", Ordering};
+
+let foo = ", stringify!($atomic_type), "::new(5);
+foo.neg(Ordering::Relaxed);
+assert_eq!(foo.load(Ordering::Relaxed), 5_", stringify!($int_type), ".wrapping_neg());
+foo.neg(Ordering::Relaxed);
+assert_eq!(foo.load(Ordering::Relaxed), 5);
+```"),
+                    #[inline]
+                    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+                    pub fn neg(&self, order: Ordering) {
+                        self.inner.neg(order);
+                    }
+                }
+            }
+            } // cfg_has_atomic_cas!
+
+            const_fn! {
+                const_if: #[cfg(not(portable_atomic_no_const_raw_ptr_deref))];
+                /// Returns a mutable pointer to the underlying integer.
+                ///
+                /// Returning an `*mut` pointer from a shared reference to this atomic is
+                /// safe because the atomic types work with interior mutability. Any use of
+                /// the returned raw pointer requires an `unsafe` block and has to uphold
+                /// the safety requirements. If there is concurrent access, note the following
+                /// additional safety requirements:
+                ///
+                /// - If this atomic type is [lock-free](Self::is_lock_free), any concurrent
+                ///   operations on it must be atomic.
+                /// - Otherwise, any concurrent operations on it must be compatible with
+                ///   operations performed by this atomic type.
+                ///
+                /// This is `const fn` on Rust 1.58+.
+                #[inline]
+                pub const fn as_ptr(&self) -> *mut $int_type {
+                    self.inner.as_ptr()
+                }
+            }
+        }
+    };
+
+    // AtomicF* impls
+    (float,
+        $atomic_type:ident,
+        $float_type:ident,
+        $atomic_int_type:ident,
+        $int_type:ident,
+        $align:literal
+    ) => {
+        doc_comment! {
+            concat!("A floating point type which can be safely shared between threads.
+
+This type has the same in-memory representation as the underlying floating point type,
+[`", stringify!($float_type), "`].
+"
+            ),
+            #[cfg_attr(portable_atomic_doc_cfg, doc(cfg(feature = "float")))]
+            // We can use #[repr(transparent)] here, but #[repr(C, align(N))]
+            // will show clearer docs.
+            #[repr(C, align($align))]
+            pub struct $atomic_type {
+                inner: imp::float::$atomic_type,
+            }
+        }
+
+        impl Default for $atomic_type {
+            #[inline]
+            fn default() -> Self {
+                Self::new($float_type::default())
+            }
+        }
+
+        impl From<$float_type> for $atomic_type {
+            #[inline]
+            fn from(v: $float_type) -> Self {
+                Self::new(v)
+            }
+        }
+
+        // UnwindSafe is implicitly implemented.
+        #[cfg(not(portable_atomic_no_core_unwind_safe))]
+        impl core::panic::RefUnwindSafe for $atomic_type {}
+        #[cfg(all(portable_atomic_no_core_unwind_safe, feature = "std"))]
+        impl std::panic::RefUnwindSafe for $atomic_type {}
+
+        impl_debug_and_serde!($atomic_type);
+
+        impl $atomic_type {
+            /// Creates a new atomic float.
+            #[inline]
+            #[must_use]
+            pub const fn new(v: $float_type) -> Self {
+                static_assert_layout!($atomic_type, $float_type);
+                Self { inner: imp::float::$atomic_type::new(v) }
+            }
+
+            doc_comment! {
+                concat!("Creates a new reference to an atomic float from a pointer.
+
+# Safety
+
+* `ptr` must be aligned to `align_of::<", stringify!($atomic_type), ">()` (note that on some platforms this
+  can be bigger than `align_of::<", stringify!($float_type), ">()`).
+* `ptr` must be [valid] for both reads and writes for the whole lifetime `'a`.
+* If this atomic type is [lock-free](Self::is_lock_free), non-atomic accesses to the value
+  behind `ptr` must have a happens-before relationship with atomic accesses via
+  the returned value (or vice-versa).
+  * In other words, time periods where the value is accessed atomically may not
+    overlap with periods where the value is accessed non-atomically.
+  * This requirement is trivially satisfied if `ptr` is never used non-atomically
+    for the duration of lifetime `'a`. Most use cases should be able to follow
+    this guideline.
+  * This requirement is also trivially satisfied if all accesses (atomic or not) are
+    done from the same thread.
+* If this atomic type is *not* lock-free:
+  * Any accesses to the value behind `ptr` must have a happens-before relationship
+    with accesses via the returned value (or vice-versa).
+  * Any concurrent accesses to the value behind `ptr` for the duration of lifetime `'a` must
+    be compatible with operations performed by this atomic type.
+* This method must not be used to create overlapping or mixed-size atomic
+  accesses, as these are not supported by the memory model.
+
+[valid]: core::ptr#safety"),
+                #[inline]
+                #[must_use]
+                pub unsafe fn from_ptr<'a>(ptr: *mut $float_type) -> &'a Self {
+                    #[allow(clippy::cast_ptr_alignment)]
+                    // SAFETY: guaranteed by the caller
+                    unsafe { &*(ptr as *mut Self) }
+                }
+            }
+
+            /// Returns `true` if operations on values of this type are lock-free.
+            ///
+            /// If the compiler or the platform doesn't support the necessary
+            /// atomic instructions, global locks for every potentially
+            /// concurrent atomic operation will be used.
+            #[inline]
+            #[must_use]
+            pub fn is_lock_free() -> bool {
+                <imp::float::$atomic_type>::is_lock_free()
+            }
+
+            /// Returns `true` if operations on values of this type are lock-free.
+            ///
+            /// If the compiler or the platform doesn't support the necessary
+            /// atomic instructions, global locks for every potentially
+            /// concurrent atomic operation will be used.
+            ///
+            /// **Note:** If the atomic operation relies on dynamic CPU feature detection,
+            /// this type may be lock-free even if the function returns false.
+            #[inline]
+            #[must_use]
+            pub const fn is_always_lock_free() -> bool {
+                <imp::float::$atomic_type>::is_always_lock_free()
+            }
+
+            /// Returns a mutable reference to the underlying float.
+            ///
+            /// This is safe because the mutable reference guarantees that no other threads are
+            /// concurrently accessing the atomic data.
+            #[inline]
+            pub fn get_mut(&mut self) -> &mut $float_type {
+                self.inner.get_mut()
+            }
+
+            // TODO: Add from_mut/get_mut_slice/from_mut_slice once it is stable on std atomic types.
+            // https://github.com/rust-lang/rust/issues/76314
+
+            /// Consumes the atomic and returns the contained value.
+            ///
+            /// This is safe because passing `self` by value guarantees that no other threads are
+            /// concurrently accessing the atomic data.
+            #[inline]
+            pub fn into_inner(self) -> $float_type {
+                self.inner.into_inner()
+            }
+
+            /// Loads a value from the atomic float.
+            ///
+            /// `load` takes an [`Ordering`] argument which describes the memory ordering of this operation.
+            /// Possible values are [`SeqCst`], [`Acquire`] and [`Relaxed`].
+            ///
+            /// # Panics
+            ///
+            /// Panics if `order` is [`Release`] or [`AcqRel`].
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub fn load(&self, order: Ordering) -> $float_type {
+                self.inner.load(order)
+            }
+
+            /// Stores a value into the atomic float.
+            ///
+            /// `store` takes an [`Ordering`] argument which describes the memory ordering of this operation.
+            ///  Possible values are [`SeqCst`], [`Release`] and [`Relaxed`].
+            ///
+            /// # Panics
+            ///
+            /// Panics if `order` is [`Acquire`] or [`AcqRel`].
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub fn store(&self, val: $float_type, order: Ordering) {
+                self.inner.store(val, order)
+            }
+
+            cfg_has_atomic_cas! {
+            /// Stores a value into the atomic float, returning the previous value.
+            ///
+            /// `swap` takes an [`Ordering`] argument which describes the memory ordering
+            /// of this operation. All ordering modes are possible. Note that using
+            /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+            /// using [`Release`] makes the load part [`Relaxed`].
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub fn swap(&self, val: $float_type, order: Ordering) -> $float_type {
+                self.inner.swap(val, order)
+            }
+
+            /// Stores a value into the atomic float if the current value is the same as
+            /// the `current` value.
+            ///
+            /// The return value is a result indicating whether the new value was written and
+            /// containing the previous value. On success this value is guaranteed to be equal to
+            /// `current`.
+            ///
+            /// `compare_exchange` takes two [`Ordering`] arguments to describe the memory
+            /// ordering of this operation. `success` describes the required ordering for the
+            /// read-modify-write operation that takes place if the comparison with `current` succeeds.
+            /// `failure` describes the required ordering for the load operation that takes place when
+            /// the comparison fails. Using [`Acquire`] as success ordering makes the store part
+            /// of this operation [`Relaxed`], and using [`Release`] makes the successful load
+            /// [`Relaxed`]. The failure ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`].
+            ///
+            /// # Panics
+            ///
+            /// Panics if `failure` is [`Release`], [`AcqRel`].
+            #[inline]
+            #[cfg_attr(portable_atomic_doc_cfg, doc(alias = "compare_and_swap"))]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub fn compare_exchange(
+                &self,
+                current: $float_type,
+                new: $float_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$float_type, $float_type> {
+                self.inner.compare_exchange(current, new, success, failure)
+            }
+
+            /// Stores a value into the atomic float if the current value is the same as
+            /// the `current` value.
+            /// Unlike [`compare_exchange`](Self::compare_exchange)
+            /// this function is allowed to spuriously fail even
+            /// when the comparison succeeds, which can result in more efficient code on some
+            /// platforms. The return value is a result indicating whether the new value was
+            /// written and containing the previous value.
+            ///
+            /// `compare_exchange_weak` takes two [`Ordering`] arguments to describe the memory
+            /// ordering of this operation. `success` describes the required ordering for the
+            /// read-modify-write operation that takes place if the comparison with `current` succeeds.
+            /// `failure` describes the required ordering for the load operation that takes place when
+            /// the comparison fails. Using [`Acquire`] as success ordering makes the store part
+            /// of this operation [`Relaxed`], and using [`Release`] makes the successful load
+            /// [`Relaxed`]. The failure ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`].
+            ///
+            /// # Panics
+            ///
+            /// Panics if `failure` is [`Release`], [`AcqRel`].
+            #[inline]
+            #[cfg_attr(portable_atomic_doc_cfg, doc(alias = "compare_and_swap"))]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub fn compare_exchange_weak(
+                &self,
+                current: $float_type,
+                new: $float_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$float_type, $float_type> {
+                self.inner.compare_exchange_weak(current, new, success, failure)
+            }
+
+            /// Adds to the current value, returning the previous value.
+            ///
+            /// This operation wraps around on overflow.
+            ///
+            /// `fetch_add` takes an [`Ordering`] argument which describes the memory ordering
+            /// of this operation. All ordering modes are possible. Note that using
+            /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+            /// using [`Release`] makes the load part [`Relaxed`].
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub fn fetch_add(&self, val: $float_type, order: Ordering) -> $float_type {
+                self.inner.fetch_add(val, order)
+            }
+
+            /// Subtracts from the current value, returning the previous value.
+            ///
+            /// This operation wraps around on overflow.
+            ///
+            /// `fetch_sub` takes an [`Ordering`] argument which describes the memory ordering
+            /// of this operation. All ordering modes are possible. Note that using
+            /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+            /// using [`Release`] makes the load part [`Relaxed`].
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub fn fetch_sub(&self, val: $float_type, order: Ordering) -> $float_type {
+                self.inner.fetch_sub(val, order)
+            }
+
+            /// Fetches the value, and applies a function to it that returns an optional
+            /// new value. Returns a `Result` of `Ok(previous_value)` if the function returned `Some(_)`, else
+            /// `Err(previous_value)`.
+            ///
+            /// Note: This may call the function multiple times if the value has been changed from other threads in
+            /// the meantime, as long as the function returns `Some(_)`, but the function will have been applied
+            /// only once to the stored value.
+            ///
+            /// `fetch_update` takes two [`Ordering`] arguments to describe the memory ordering of this operation.
+            /// The first describes the required ordering for when the operation finally succeeds while the second
+            /// describes the required ordering for loads. These correspond to the success and failure orderings of
+            /// [`compare_exchange`](Self::compare_exchange) respectively.
+            ///
+            /// Using [`Acquire`] as success ordering makes the store part
+            /// of this operation [`Relaxed`], and using [`Release`] makes the final successful load
+            /// [`Relaxed`]. The (failed) load ordering can only be [`SeqCst`], [`Acquire`] or [`Relaxed`].
+            ///
+            /// # Panics
+            ///
+            /// Panics if `fetch_order` is [`Release`], [`AcqRel`].
+            ///
+            /// # Considerations
+            ///
+            /// This method is not magic; it is not provided by the hardware.
+            /// It is implemented in terms of [`compare_exchange_weak`](Self::compare_exchange_weak),
+            /// and suffers from the same drawbacks.
+            /// In particular, this method will not circumvent the [ABA Problem].
+            ///
+            /// [ABA Problem]: https://en.wikipedia.org/wiki/ABA_problem
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub fn fetch_update<F>(
+                &self,
+                set_order: Ordering,
+                fetch_order: Ordering,
+                mut f: F,
+            ) -> Result<$float_type, $float_type>
+            where
+                F: FnMut($float_type) -> Option<$float_type>,
+            {
+                let mut prev = self.load(fetch_order);
+                while let Some(next) = f(prev) {
+                    match self.compare_exchange_weak(prev, next, set_order, fetch_order) {
+                        x @ Ok(_) => return x,
+                        Err(next_prev) => prev = next_prev,
+                    }
+                }
+                Err(prev)
+            }
+
+            /// Maximum with the current value.
+            ///
+            /// Finds the maximum of the current value and the argument `val`, and
+            /// sets the new value to the result.
+            ///
+            /// Returns the previous value.
+            ///
+            /// `fetch_max` takes an [`Ordering`] argument which describes the memory ordering
+            /// of this operation. All ordering modes are possible. Note that using
+            /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+            /// using [`Release`] makes the load part [`Relaxed`].
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub fn fetch_max(&self, val: $float_type, order: Ordering) -> $float_type {
+                self.inner.fetch_max(val, order)
+            }
+
+            /// Minimum with the current value.
+            ///
+            /// Finds the minimum of the current value and the argument `val`, and
+            /// sets the new value to the result.
+            ///
+            /// Returns the previous value.
+            ///
+            /// `fetch_min` takes an [`Ordering`] argument which describes the memory ordering
+            /// of this operation. All ordering modes are possible. Note that using
+            /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+            /// using [`Release`] makes the load part [`Relaxed`].
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub fn fetch_min(&self, val: $float_type, order: Ordering) -> $float_type {
+                self.inner.fetch_min(val, order)
+            }
+
+            /// Negates the current value, and sets the new value to the result.
+            ///
+            /// Returns the previous value.
+            ///
+            /// `fetch_neg` takes an [`Ordering`] argument which describes the memory ordering
+            /// of this operation. All ordering modes are possible. Note that using
+            /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+            /// using [`Release`] makes the load part [`Relaxed`].
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub fn fetch_neg(&self, order: Ordering) -> $float_type {
+                self.inner.fetch_neg(order)
+            }
+
+            /// Computes the absolute value of the current value, and sets the
+            /// new value to the result.
+            ///
+            /// Returns the previous value.
+            ///
+            /// `fetch_abs` takes an [`Ordering`] argument which describes the memory ordering
+            /// of this operation. All ordering modes are possible. Note that using
+            /// [`Acquire`] makes the store part of this operation [`Relaxed`], and
+            /// using [`Release`] makes the load part [`Relaxed`].
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub fn fetch_abs(&self, order: Ordering) -> $float_type {
+                self.inner.fetch_abs(order)
+            }
+            } // cfg_has_atomic_cas!
+
+            #[cfg(not(portable_atomic_no_const_raw_ptr_deref))]
+            doc_comment! {
+                concat!("Raw transmutation to `&", stringify!($atomic_int_type), "`.
+
+See [`", stringify!($float_type) ,"::from_bits`] for some discussion of the
+portability of this operation (there are almost no issues).
+
+This is `const fn` on Rust 1.58+."),
+                #[inline]
+                pub const fn as_bits(&self) -> &$atomic_int_type {
+                    self.inner.as_bits()
+                }
+            }
+            #[cfg(portable_atomic_no_const_raw_ptr_deref)]
+            doc_comment! {
+                concat!("Raw transmutation to `&", stringify!($atomic_int_type), "`.
+
+See [`", stringify!($float_type) ,"::from_bits`] for some discussion of the
+portability of this operation (there are almost no issues).
+
+This is `const fn` on Rust 1.58+."),
+                #[inline]
+                pub fn as_bits(&self) -> &$atomic_int_type {
+                    self.inner.as_bits()
+                }
+            }
+
+            const_fn! {
+                const_if: #[cfg(not(portable_atomic_no_const_raw_ptr_deref))];
+                /// Returns a mutable pointer to the underlying float.
+                ///
+                /// Returning an `*mut` pointer from a shared reference to this atomic is
+                /// safe because the atomic types work with interior mutability. Any use of
+                /// the returned raw pointer requires an `unsafe` block and has to uphold
+                /// the safety requirements. If there is concurrent access, note the following
+                /// additional safety requirements:
+                ///
+                /// - If this atomic type is [lock-free](Self::is_lock_free), any concurrent
+                ///   operations on it must be atomic.
+                /// - Otherwise, any concurrent operations on it must be compatible with
+                ///   operations performed by this atomic type.
+                ///
+                /// This is `const fn` on Rust 1.58+.
+                #[inline]
+                pub const fn as_ptr(&self) -> *mut $float_type {
+                    self.inner.as_ptr()
+                }
+            }
+        }
+    };
+}
+
+cfg_has_atomic_ptr! {
+    #[cfg(target_pointer_width = "16")]
+    atomic_int!(AtomicIsize, isize, 2);
+    #[cfg(target_pointer_width = "16")]
+    atomic_int!(AtomicUsize, usize, 2);
+    #[cfg(target_pointer_width = "32")]
+    atomic_int!(AtomicIsize, isize, 4);
+    #[cfg(target_pointer_width = "32")]
+    atomic_int!(AtomicUsize, usize, 4);
+    #[cfg(target_pointer_width = "64")]
+    atomic_int!(AtomicIsize, isize, 8);
+    #[cfg(target_pointer_width = "64")]
+    atomic_int!(AtomicUsize, usize, 8);
+    #[cfg(target_pointer_width = "128")]
+    atomic_int!(AtomicIsize, isize, 16);
+    #[cfg(target_pointer_width = "128")]
+    atomic_int!(AtomicUsize, usize, 16);
+}
+
+cfg_has_atomic_8! {
+    atomic_int!(AtomicI8, i8, 1);
+    atomic_int!(AtomicU8, u8, 1);
+}
+cfg_has_atomic_16! {
+    atomic_int!(AtomicI16, i16, 2);
+    atomic_int!(AtomicU16, u16, 2);
+}
+cfg_has_atomic_32! {
+    atomic_int!(AtomicI32, i32, 4);
+    atomic_int!(AtomicU32, u32, 4);
+}
+cfg_has_atomic_64! {
+    atomic_int!(AtomicI64, i64, 8);
+    atomic_int!(AtomicU64, u64, 8);
+}
+cfg_has_atomic_128! {
+    atomic_int!(AtomicI128, i128, 16);
+    atomic_int!(AtomicU128, u128, 16);
+}
diff --git a/vendor/portable-atomic/src/tests/helper.rs b/vendor/portable-atomic/src/tests/helper.rs
new file mode 100644
index 0000000..65831c6
--- /dev/null
+++ b/vendor/portable-atomic/src/tests/helper.rs
@@ -0,0 +1,2420 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+#![allow(unused_macros)]
+
+use core::sync::atomic::Ordering;
+
+macro_rules! __test_atomic_common {
+    ($atomic_type:ty, $value_type:ty) => {
+        #[test]
+        fn assert_auto_traits() {
+            fn _assert<T: Send + Sync + Unpin + std::panic::UnwindSafe>() {}
+            _assert::<$atomic_type>();
+        }
+        #[test]
+        fn alignment() {
+            // https://github.com/rust-lang/rust/blob/1.70.0/library/core/tests/atomic.rs#L250
+            assert_eq!(core::mem::align_of::<$atomic_type>(), core::mem::size_of::<$atomic_type>());
+            assert_eq!(core::mem::size_of::<$atomic_type>(), core::mem::size_of::<$value_type>());
+        }
+        #[test]
+        fn is_lock_free() {
+            const IS_ALWAYS_LOCK_FREE: bool = <$atomic_type>::is_always_lock_free();
+            assert_eq!(IS_ALWAYS_LOCK_FREE, <$atomic_type>::is_always_lock_free());
+            let is_lock_free = <$atomic_type>::is_lock_free();
+            if IS_ALWAYS_LOCK_FREE {
+                // If is_always_lock_free is true, then is_lock_free must always be true.
+                assert!(is_lock_free);
+            }
+        }
+    };
+}
+macro_rules! __test_atomic_pub_common {
+    ($atomic_type:ty, $value_type:ty) => {
+        #[test]
+        fn assert_ref_unwind_safe() {
+            #[cfg(not(all(portable_atomic_no_core_unwind_safe, not(feature = "std"))))]
+            static_assertions::assert_impl_all!($atomic_type: std::panic::RefUnwindSafe);
+            #[cfg(all(portable_atomic_no_core_unwind_safe, not(feature = "std")))]
+            static_assertions::assert_not_impl_all!($atomic_type: std::panic::RefUnwindSafe);
+        }
+    };
+}
+
+macro_rules! __test_atomic_int_load_store {
+    ($atomic_type:ty, $int_type:ident, single_thread) => {
+        __test_atomic_common!($atomic_type, $int_type);
+        use crate::tests::helper::*;
+        #[test]
+        fn accessor() {
+            let mut a = <$atomic_type>::new(10);
+            assert_eq!(*a.get_mut(), 10);
+            *a.get_mut() = 5;
+            assert_eq!(a.as_ptr() as *const (), &a as *const _ as *const ());
+            assert_eq!(a.into_inner(), 5);
+        }
+        // https://bugs.llvm.org/show_bug.cgi?id=37061
+        #[test]
+        fn static_load_only() {
+            static VAR: $atomic_type = <$atomic_type>::new(10);
+            for &order in &test_helper::LOAD_ORDERINGS {
+                assert_eq!(VAR.load(order), 10);
+            }
+        }
+        #[test]
+        fn load_store() {
+            static VAR: $atomic_type = <$atomic_type>::new(10);
+            test_load_ordering(|order| VAR.load(order));
+            test_store_ordering(|order| VAR.store(10, order));
+            for (&load_order, &store_order) in
+                test_helper::LOAD_ORDERINGS.iter().zip(&test_helper::STORE_ORDERINGS)
+            {
+                assert_eq!(VAR.load(load_order), 10);
+                VAR.store(5, store_order);
+                assert_eq!(VAR.load(load_order), 5);
+                VAR.store(10, store_order);
+                let a = <$atomic_type>::new(1);
+                assert_eq!(a.load(load_order), 1);
+                a.store(2, store_order);
+                assert_eq!(a.load(load_order), 2);
+            }
+        }
+    };
+    ($atomic_type:ty, $int_type:ident) => {
+        __test_atomic_int_load_store!($atomic_type, $int_type, single_thread);
+        use crossbeam_utils::thread;
+        use std::{collections::BTreeSet, vec, vec::Vec};
+        #[test]
+        fn stress_load_store() {
+            let (iterations, threads) = stress_test_config();
+            let data1 = (0..iterations).map(|_| fastrand::$int_type(..)).collect::<Vec<_>>();
+            let set = data1.iter().copied().collect::<BTreeSet<_>>();
+            let a = <$atomic_type>::new(data1[fastrand::usize(0..iterations)]);
+            let now = &std::time::Instant::now();
+            thread::scope(|s| {
+                for _ in 0..threads {
+                    s.spawn(|_| {
+                        let now = *now;
+                        for i in 0..iterations {
+                            a.store(data1[i], rand_store_ordering());
+                        }
+                        std::eprintln!("store end={:?}", now.elapsed());
+                    });
+                    s.spawn(|_| {
+                        let now = *now;
+                        let mut v = vec![0; iterations];
+                        for i in 0..iterations {
+                            v[i] = a.load(rand_load_ordering());
+                        }
+                        std::eprintln!("load end={:?}", now.elapsed());
+                        for v in v {
+                            assert!(set.contains(&v), "v={}", v);
+                        }
+                    });
+                }
+            })
+            .unwrap();
+        }
+    };
+}
+macro_rules! __test_atomic_float_load_store {
+    ($atomic_type:ty, $float_type:ident, single_thread) => {
+        __test_atomic_common!($atomic_type, $float_type);
+        use crate::tests::helper::*;
+        #[test]
+        fn accessor() {
+            let mut a = <$atomic_type>::new(10.0);
+            assert_eq!(*a.get_mut(), 10.0);
+            *a.get_mut() = 5.0;
+            assert_eq!(a.as_ptr() as *const (), &a as *const _ as *const ());
+            assert_eq!(a.into_inner(), 5.0);
+        }
+        // https://bugs.llvm.org/show_bug.cgi?id=37061
+        #[test]
+        fn static_load_only() {
+            static VAR: $atomic_type = <$atomic_type>::new(10.0);
+            for &order in &test_helper::LOAD_ORDERINGS {
+                assert_eq!(VAR.load(order), 10.0);
+            }
+        }
+        #[test]
+        fn load_store() {
+            static VAR: $atomic_type = <$atomic_type>::new(10.0);
+            test_load_ordering(|order| VAR.load(order));
+            test_store_ordering(|order| VAR.store(10.0, order));
+            for (&load_order, &store_order) in
+                test_helper::LOAD_ORDERINGS.iter().zip(&test_helper::STORE_ORDERINGS)
+            {
+                assert_eq!(VAR.load(load_order), 10.0);
+                VAR.store(5.0, store_order);
+                assert_eq!(VAR.load(load_order), 5.0);
+                VAR.store(10.0, store_order);
+                let a = <$atomic_type>::new(1.0);
+                assert_eq!(a.load(load_order), 1.0);
+                a.store(2.0, store_order);
+                assert_eq!(a.load(load_order), 2.0);
+            }
+        }
+    };
+    ($atomic_type:ty, $float_type:ident) => {
+        __test_atomic_float_load_store!($atomic_type, $float_type, single_thread);
+        // TODO: multi thread
+    };
+}
+macro_rules! __test_atomic_bool_load_store {
+    ($atomic_type:ty, single_thread) => {
+        __test_atomic_common!($atomic_type, bool);
+        use crate::tests::helper::*;
+        #[test]
+        fn accessor() {
+            let mut a = <$atomic_type>::new(false);
+            assert_eq!(*a.get_mut(), false);
+            *a.get_mut() = true;
+            assert_eq!(a.as_ptr() as *const (), &a as *const _ as *const ());
+            assert_eq!(a.into_inner(), true);
+        }
+        // https://bugs.llvm.org/show_bug.cgi?id=37061
+        #[test]
+        fn static_load_only() {
+            static VAR: $atomic_type = <$atomic_type>::new(false);
+            for &order in &test_helper::LOAD_ORDERINGS {
+                assert_eq!(VAR.load(order), false);
+            }
+        }
+        #[test]
+        fn load_store() {
+            static VAR: $atomic_type = <$atomic_type>::new(false);
+            test_load_ordering(|order| VAR.load(order));
+            test_store_ordering(|order| VAR.store(false, order));
+            for (&load_order, &store_order) in
+                test_helper::LOAD_ORDERINGS.iter().zip(&test_helper::STORE_ORDERINGS)
+            {
+                assert_eq!(VAR.load(load_order), false);
+                VAR.store(true, store_order);
+                assert_eq!(VAR.load(load_order), true);
+                VAR.store(false, store_order);
+                let a = <$atomic_type>::new(true);
+                assert_eq!(a.load(load_order), true);
+                a.store(false, store_order);
+                assert_eq!(a.load(load_order), false);
+            }
+        }
+    };
+    ($atomic_type:ty) => {
+        __test_atomic_bool_load_store!($atomic_type, single_thread);
+        // TODO: multi thread
+    };
+}
+macro_rules! __test_atomic_ptr_load_store {
+    ($atomic_type:ty, single_thread) => {
+        __test_atomic_common!($atomic_type, *mut u8);
+        use crate::tests::helper::*;
+        use std::ptr;
+        #[test]
+        fn accessor() {
+            let mut v = 1;
+            let mut a = <$atomic_type>::new(ptr::null_mut());
+            assert!(a.get_mut().is_null());
+            *a.get_mut() = &mut v;
+            assert_eq!(a.as_ptr() as *const (), &a as *const _ as *const ());
+            assert!(!a.into_inner().is_null());
+        }
+        // https://bugs.llvm.org/show_bug.cgi?id=37061
+        #[test]
+        fn static_load_only() {
+            static VAR: $atomic_type = <$atomic_type>::new(ptr::null_mut());
+            for &order in &test_helper::LOAD_ORDERINGS {
+                assert_eq!(VAR.load(order), ptr::null_mut());
+            }
+        }
+        #[test]
+        fn load_store() {
+            static VAR: $atomic_type = <$atomic_type>::new(ptr::null_mut());
+            test_load_ordering(|order| VAR.load(order));
+            test_store_ordering(|order| VAR.store(ptr::null_mut(), order));
+            let mut v = 1_u8;
+            let p = &mut v as *mut u8;
+            for (&load_order, &store_order) in
+                test_helper::LOAD_ORDERINGS.iter().zip(&test_helper::STORE_ORDERINGS)
+            {
+                assert_eq!(VAR.load(load_order), ptr::null_mut());
+                VAR.store(p, store_order);
+                assert_eq!(VAR.load(load_order), p);
+                VAR.store(ptr::null_mut(), store_order);
+                let a = <$atomic_type>::new(p);
+                assert_eq!(a.load(load_order), p);
+                a.store(ptr::null_mut(), store_order);
+                assert_eq!(a.load(load_order), ptr::null_mut());
+            }
+        }
+    };
+    ($atomic_type:ty) => {
+        __test_atomic_ptr_load_store!($atomic_type, single_thread);
+        // TODO: multi thread
+    };
+}
+
+macro_rules! __test_atomic_int {
+    ($atomic_type:ty, $int_type:ident, single_thread) => {
+        use core::$int_type;
+        #[test]
+        fn swap() {
+            let a = <$atomic_type>::new(5);
+            test_swap_ordering(|order| a.swap(5, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                assert_eq!(a.swap(10, order), 5);
+                assert_eq!(a.swap(5, order), 10);
+            }
+        }
+        #[test]
+        fn compare_exchange() {
+            let a = <$atomic_type>::new(5);
+            test_compare_exchange_ordering(|success, failure| {
+                a.compare_exchange(5, 5, success, failure)
+            });
+            for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                let a = <$atomic_type>::new(5);
+                assert_eq!(a.compare_exchange(5, 10, success, failure), Ok(5));
+                assert_eq!(a.load(Ordering::Relaxed), 10);
+                assert_eq!(a.compare_exchange(6, 12, success, failure), Err(10));
+                assert_eq!(a.load(Ordering::Relaxed), 10);
+            }
+        }
+        #[test]
+        fn compare_exchange_weak() {
+            let a = <$atomic_type>::new(4);
+            test_compare_exchange_ordering(|success, failure| {
+                a.compare_exchange_weak(4, 4, success, failure)
+            });
+            for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                let a = <$atomic_type>::new(4);
+                assert_eq!(a.compare_exchange_weak(6, 8, success, failure), Err(4));
+                let mut old = a.load(Ordering::Relaxed);
+                loop {
+                    let new = old * 2;
+                    match a.compare_exchange_weak(old, new, success, failure) {
+                        Ok(_) => break,
+                        Err(x) => old = x,
+                    }
+                }
+                assert_eq!(a.load(Ordering::Relaxed), 8);
+            }
+        }
+        #[test]
+        fn fetch_add() {
+            let a = <$atomic_type>::new(0);
+            test_swap_ordering(|order| a.fetch_add(0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(0);
+                assert_eq!(a.fetch_add(10, order), 0);
+                assert_eq!(a.load(Ordering::Relaxed), 10);
+                let a = <$atomic_type>::new($int_type::MAX);
+                assert_eq!(a.fetch_add(1, order), $int_type::MAX);
+                assert_eq!(a.load(Ordering::Relaxed), $int_type::MAX.wrapping_add(1));
+            }
+        }
+        #[test]
+        fn add() {
+            let a = <$atomic_type>::new(0);
+            test_swap_ordering(|order| a.add(0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(0);
+                a.add(10, order);
+                assert_eq!(a.load(Ordering::Relaxed), 10);
+                let a = <$atomic_type>::new($int_type::MAX);
+                a.add(1, order);
+                assert_eq!(a.load(Ordering::Relaxed), $int_type::MAX.wrapping_add(1));
+            }
+        }
+        #[test]
+        fn fetch_sub() {
+            let a = <$atomic_type>::new(20);
+            test_swap_ordering(|order| a.fetch_sub(0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(20);
+                assert_eq!(a.fetch_sub(10, order), 20);
+                assert_eq!(a.load(Ordering::Relaxed), 10);
+                let a = <$atomic_type>::new($int_type::MIN);
+                assert_eq!(a.fetch_sub(1, order), $int_type::MIN);
+                assert_eq!(a.load(Ordering::Relaxed), $int_type::MIN.wrapping_sub(1));
+            }
+        }
+        #[test]
+        fn sub() {
+            let a = <$atomic_type>::new(20);
+            test_swap_ordering(|order| a.sub(0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(20);
+                a.sub(10, order);
+                assert_eq!(a.load(Ordering::Relaxed), 10);
+                let a = <$atomic_type>::new($int_type::MIN);
+                a.sub(1, order);
+                assert_eq!(a.load(Ordering::Relaxed), $int_type::MIN.wrapping_sub(1));
+            }
+        }
+        #[test]
+        fn fetch_and() {
+            let a = <$atomic_type>::new(0b101101);
+            test_swap_ordering(|order| a.fetch_and(0b101101, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(0b101101);
+                assert_eq!(a.fetch_and(0b110011, order), 0b101101);
+                assert_eq!(a.load(Ordering::Relaxed), 0b100001);
+            }
+        }
+        #[test]
+        fn and() {
+            let a = <$atomic_type>::new(0b101101);
+            test_swap_ordering(|order| a.and(0b101101, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(0b101101);
+                a.and(0b110011, order);
+                assert_eq!(a.load(Ordering::Relaxed), 0b100001);
+            }
+        }
+        #[test]
+        fn fetch_nand() {
+            let a = <$atomic_type>::new(0x13);
+            test_swap_ordering(|order| a.fetch_nand(0x31, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(0x13);
+                assert_eq!(a.fetch_nand(0x31, order), 0x13);
+                assert_eq!(a.load(Ordering::Relaxed), !(0x13 & 0x31));
+            }
+        }
+        #[test]
+        fn fetch_or() {
+            let a = <$atomic_type>::new(0b101101);
+            test_swap_ordering(|order| a.fetch_or(0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(0b101101);
+                assert_eq!(a.fetch_or(0b110011, order), 0b101101);
+                assert_eq!(a.load(Ordering::Relaxed), 0b111111);
+            }
+        }
+        #[test]
+        fn or() {
+            let a = <$atomic_type>::new(0b101101);
+            test_swap_ordering(|order| a.or(0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(0b101101);
+                a.or(0b110011, order);
+                assert_eq!(a.load(Ordering::Relaxed), 0b111111);
+            }
+        }
+        #[test]
+        fn fetch_xor() {
+            let a = <$atomic_type>::new(0b101101);
+            test_swap_ordering(|order| a.fetch_xor(0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(0b101101);
+                assert_eq!(a.fetch_xor(0b110011, order), 0b101101);
+                assert_eq!(a.load(Ordering::Relaxed), 0b011110);
+            }
+        }
+        #[test]
+        fn xor() {
+            let a = <$atomic_type>::new(0b101101);
+            test_swap_ordering(|order| a.xor(0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(0b101101);
+                a.xor(0b110011, order);
+                assert_eq!(a.load(Ordering::Relaxed), 0b011110);
+            }
+        }
+        #[test]
+        fn fetch_max() {
+            let a = <$atomic_type>::new(23);
+            test_swap_ordering(|order| a.fetch_max(23, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(23);
+                assert_eq!(a.fetch_max(22, order), 23);
+                assert_eq!(a.load(Ordering::Relaxed), 23);
+                assert_eq!(a.fetch_max(24, order), 23);
+                assert_eq!(a.load(Ordering::Relaxed), 24);
+                let a = <$atomic_type>::new(0);
+                assert_eq!(a.fetch_max(1, order), 0);
+                assert_eq!(a.load(Ordering::Relaxed), 1);
+                assert_eq!(a.fetch_max(0, order), 1);
+                assert_eq!(a.load(Ordering::Relaxed), 1);
+                let a = <$atomic_type>::new(!0);
+                assert_eq!(a.fetch_max(0, order), !0);
+                assert_eq!(a.load(Ordering::Relaxed), core::cmp::max(!0, 0));
+            }
+        }
+        #[test]
+        fn fetch_min() {
+            let a = <$atomic_type>::new(23);
+            test_swap_ordering(|order| a.fetch_min(23, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(23);
+                assert_eq!(a.fetch_min(24, order), 23);
+                assert_eq!(a.load(Ordering::Relaxed), 23);
+                assert_eq!(a.fetch_min(22, order), 23);
+                assert_eq!(a.load(Ordering::Relaxed), 22);
+                let a = <$atomic_type>::new(1);
+                assert_eq!(a.fetch_min(0, order), 1);
+                assert_eq!(a.load(Ordering::Relaxed), 0);
+                assert_eq!(a.fetch_min(1, order), 0);
+                assert_eq!(a.load(Ordering::Relaxed), 0);
+                let a = <$atomic_type>::new(!0);
+                assert_eq!(a.fetch_min(0, order), !0);
+                assert_eq!(a.load(Ordering::Relaxed), core::cmp::min(!0, 0));
+            }
+        }
+        #[test]
+        fn fetch_not() {
+            let a = <$atomic_type>::new(1);
+            test_swap_ordering(|order| a.fetch_not(order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(1);
+                assert_eq!(a.fetch_not(order), 1);
+                assert_eq!(a.load(Ordering::Relaxed), !1);
+            }
+        }
+        #[test]
+        fn not() {
+            let a = <$atomic_type>::new(1);
+            test_swap_ordering(|order| a.not(order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(1);
+                a.not(order);
+                assert_eq!(a.load(Ordering::Relaxed), !1);
+            }
+        }
+        #[test]
+        fn fetch_neg() {
+            let a = <$atomic_type>::new(5);
+            test_swap_ordering(|order| a.fetch_neg(order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(5);
+                assert_eq!(a.fetch_neg(order), 5);
+                assert_eq!(a.load(Ordering::Relaxed), <$int_type>::wrapping_neg(5));
+                assert_eq!(a.fetch_neg(order), <$int_type>::wrapping_neg(5));
+                assert_eq!(a.load(Ordering::Relaxed), 5);
+                let a = <$atomic_type>::new(<$int_type>::MIN);
+                assert_eq!(a.fetch_neg(order), <$int_type>::MIN);
+                assert_eq!(a.load(Ordering::Relaxed), <$int_type>::MIN.wrapping_neg());
+                assert_eq!(a.fetch_neg(order), <$int_type>::MIN.wrapping_neg());
+                assert_eq!(a.load(Ordering::Relaxed), <$int_type>::MIN);
+            }
+        }
+        #[test]
+        fn neg() {
+            let a = <$atomic_type>::new(5);
+            test_swap_ordering(|order| a.neg(order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(5);
+                a.neg(order);
+                assert_eq!(a.load(Ordering::Relaxed), <$int_type>::wrapping_neg(5));
+                a.neg(order);
+                assert_eq!(a.load(Ordering::Relaxed), 5);
+                let a = <$atomic_type>::new(<$int_type>::MIN);
+                a.neg(order);
+                assert_eq!(a.load(Ordering::Relaxed), <$int_type>::MIN.wrapping_neg());
+                a.neg(order);
+                assert_eq!(a.load(Ordering::Relaxed), <$int_type>::MIN);
+            }
+        }
+        #[test]
+        fn bit_set() {
+            let a = <$atomic_type>::new(0b0001);
+            test_swap_ordering(|order| assert!(a.bit_set(0, order)));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(0b0000);
+                assert!(!a.bit_set(0, order));
+                assert_eq!(a.load(Ordering::Relaxed), 0b0001);
+                assert!(a.bit_set(0, order));
+                assert_eq!(a.load(Ordering::Relaxed), 0b0001);
+            }
+        }
+        #[test]
+        fn bit_clear() {
+            let a = <$atomic_type>::new(0b0000);
+            test_swap_ordering(|order| assert!(!a.bit_clear(0, order)));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(0b0001);
+                assert!(a.bit_clear(0, order));
+                assert_eq!(a.load(Ordering::Relaxed), 0b0000);
+                assert!(!a.bit_clear(0, order));
+                assert_eq!(a.load(Ordering::Relaxed), 0b0000);
+            }
+        }
+        #[test]
+        fn bit_toggle() {
+            let a = <$atomic_type>::new(0b0000);
+            test_swap_ordering(|order| a.bit_toggle(0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(0b0000);
+                assert!(!a.bit_toggle(0, order));
+                assert_eq!(a.load(Ordering::Relaxed), 0b0001);
+                assert!(a.bit_toggle(0, order));
+                assert_eq!(a.load(Ordering::Relaxed), 0b0000);
+            }
+        }
+        ::quickcheck::quickcheck! {
+            fn quickcheck_swap(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(a.swap(y, order), x);
+                    assert_eq!(a.swap(x, order), y);
+                }
+                true
+            }
+            fn quickcheck_compare_exchange(x: $int_type, y: $int_type) -> bool {
+                #[cfg(all(
+                    target_arch = "arm",
+                    not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
+                ))]
+                {
+                    // TODO: LLVM bug:
+                    // https://github.com/llvm/llvm-project/issues/61880
+                    // https://github.com/taiki-e/portable-atomic/issues/2
+                    if core::mem::size_of::<$int_type>() <= 2 {
+                        return true;
+                    }
+                }
+                let z = loop {
+                    let z = fastrand::$int_type(..);
+                    if z != y {
+                        break z;
+                    }
+                };
+                for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(a.compare_exchange(x, y, success, failure).unwrap(), x);
+                    assert_eq!(a.load(Ordering::Relaxed), y);
+                    assert_eq!(a.compare_exchange(z, x, success, failure).unwrap_err(), y);
+                    assert_eq!(a.load(Ordering::Relaxed), y);
+                }
+                true
+            }
+            fn quickcheck_fetch_add(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(a.fetch_add(y, order), x);
+                    assert_eq!(a.load(Ordering::Relaxed), x.wrapping_add(y));
+                    let a = <$atomic_type>::new(y);
+                    assert_eq!(a.fetch_add(x, order), y);
+                    assert_eq!(a.load(Ordering::Relaxed), y.wrapping_add(x));
+                }
+                true
+            }
+            fn quickcheck_add(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    a.add(y, order);
+                    assert_eq!(a.load(Ordering::Relaxed), x.wrapping_add(y));
+                    let a = <$atomic_type>::new(y);
+                    a.add(x, order);
+                    assert_eq!(a.load(Ordering::Relaxed), y.wrapping_add(x));
+                }
+                true
+            }
+            fn quickcheck_fetch_sub(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(a.fetch_sub(y, order), x);
+                    assert_eq!(a.load(Ordering::Relaxed), x.wrapping_sub(y));
+                    let a = <$atomic_type>::new(y);
+                    assert_eq!(a.fetch_sub(x, order), y);
+                    assert_eq!(a.load(Ordering::Relaxed), y.wrapping_sub(x));
+                }
+                true
+            }
+            fn quickcheck_sub(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    a.sub(y, order);
+                    assert_eq!(a.load(Ordering::Relaxed), x.wrapping_sub(y));
+                    let a = <$atomic_type>::new(y);
+                    a.sub(x, order);
+                    assert_eq!(a.load(Ordering::Relaxed), y.wrapping_sub(x));
+                }
+                true
+            }
+            fn quickcheck_fetch_and(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(a.fetch_and(y, order), x);
+                    assert_eq!(a.load(Ordering::Relaxed), x & y);
+                    let a = <$atomic_type>::new(y);
+                    assert_eq!(a.fetch_and(x, order), y);
+                    assert_eq!(a.load(Ordering::Relaxed), y & x);
+                }
+                true
+            }
+            fn quickcheck_and(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    a.and(y, order);
+                    assert_eq!(a.load(Ordering::Relaxed), x & y);
+                    let a = <$atomic_type>::new(y);
+                    a.and(x, order);
+                    assert_eq!(a.load(Ordering::Relaxed), y & x);
+                }
+                true
+            }
+            fn quickcheck_fetch_nand(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(a.fetch_nand(y, order), x);
+                    assert_eq!(a.load(Ordering::Relaxed), !(x & y));
+                    let a = <$atomic_type>::new(y);
+                    assert_eq!(a.fetch_nand(x, order), y);
+                    assert_eq!(a.load(Ordering::Relaxed), !(y & x));
+                }
+                true
+            }
+            fn quickcheck_fetch_or(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(a.fetch_or(y, order), x);
+                    assert_eq!(a.load(Ordering::Relaxed), x | y);
+                    let a = <$atomic_type>::new(y);
+                    assert_eq!(a.fetch_or(x, order), y);
+                    assert_eq!(a.load(Ordering::Relaxed), y | x);
+                }
+                true
+            }
+            fn quickcheck_or(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    a.or(y, order);
+                    assert_eq!(a.load(Ordering::Relaxed), x | y);
+                    let a = <$atomic_type>::new(y);
+                    a.or(x, order);
+                    assert_eq!(a.load(Ordering::Relaxed), y | x);
+                }
+                true
+            }
+            fn quickcheck_fetch_xor(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(a.fetch_xor(y, order), x);
+                    assert_eq!(a.load(Ordering::Relaxed), x ^ y);
+                    let a = <$atomic_type>::new(y);
+                    assert_eq!(a.fetch_xor(x, order), y);
+                    assert_eq!(a.load(Ordering::Relaxed), y ^ x);
+                }
+                true
+            }
+            fn quickcheck_xor(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    a.xor(y, order);
+                    assert_eq!(a.load(Ordering::Relaxed), x ^ y);
+                    let a = <$atomic_type>::new(y);
+                    a.xor(x, order);
+                    assert_eq!(a.load(Ordering::Relaxed), y ^ x);
+                }
+                true
+            }
+            fn quickcheck_fetch_max(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(a.fetch_max(y, order), x);
+                    assert_eq!(a.load(Ordering::Relaxed), core::cmp::max(x, y));
+                    let a = <$atomic_type>::new(y);
+                    assert_eq!(a.fetch_max(x, order), y);
+                    assert_eq!(a.load(Ordering::Relaxed), core::cmp::max(y, x));
+                }
+                true
+            }
+            fn quickcheck_fetch_min(x: $int_type, y: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(a.fetch_min(y, order), x);
+                    assert_eq!(a.load(Ordering::Relaxed), core::cmp::min(x, y));
+                    let a = <$atomic_type>::new(y);
+                    assert_eq!(a.fetch_min(x, order), y);
+                    assert_eq!(a.load(Ordering::Relaxed), core::cmp::min(y, x));
+                }
+                true
+            }
+            fn quickcheck_fetch_not(x: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(a.fetch_not(order), x);
+                    assert_eq!(a.load(Ordering::Relaxed), !x);
+                    assert_eq!(a.fetch_not(order), !x);
+                    assert_eq!(a.load(Ordering::Relaxed), x);
+                }
+                true
+            }
+            fn quickcheck_not(x: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    a.not(order);
+                    assert_eq!(a.load(Ordering::Relaxed), !x);
+                    a.not(order);
+                    assert_eq!(a.load(Ordering::Relaxed), x);
+                }
+                true
+            }
+            fn quickcheck_fetch_neg(x: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(a.fetch_neg(order), x);
+                    assert_eq!(a.load(Ordering::Relaxed), x.wrapping_neg());
+                    assert_eq!(a.fetch_neg(order), x.wrapping_neg());
+                    assert_eq!(a.load(Ordering::Relaxed), x);
+                }
+                true
+            }
+            fn quickcheck_neg(x: $int_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    a.neg(order);
+                    assert_eq!(a.load(Ordering::Relaxed), x.wrapping_neg());
+                    a.neg(order);
+                    assert_eq!(a.load(Ordering::Relaxed), x);
+                }
+                true
+            }
+            fn quickcheck_bit_set(x: $int_type, bit: u32) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    let b = a.bit_set(bit, order);
+                    let mask = <$int_type>::wrapping_shl(1, bit);
+                    assert_eq!(a.load(Ordering::Relaxed), x | mask);
+                    assert_eq!(b, x & mask != 0);
+                }
+                true
+            }
+            fn quickcheck_bit_clear(x: $int_type, bit: u32) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    let b = a.bit_clear(bit, order);
+                    let mask = <$int_type>::wrapping_shl(1, bit);
+                    assert_eq!(a.load(Ordering::Relaxed), x & !mask);
+                    assert_eq!(b, x & mask != 0);
+                }
+                true
+            }
+            fn quickcheck_bit_toggle(x: $int_type, bit: u32) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    let b = a.bit_toggle(bit, order);
+                    let mask = <$int_type>::wrapping_shl(1, bit);
+                    assert_eq!(a.load(Ordering::Relaxed), x ^ mask);
+                    assert_eq!(b, x & mask != 0);
+                }
+                true
+            }
+        }
+    };
+    ($atomic_type:ty, $int_type:ident) => {
+        __test_atomic_int!($atomic_type, $int_type, single_thread);
+
+        #[test]
+        fn stress_swap() {
+            let (iterations, threads) = stress_test_config();
+            let data1 = &(0..threads)
+                .map(|_| (0..iterations).map(|_| fastrand::$int_type(..)).collect::<Vec<_>>())
+                .collect::<Vec<_>>();
+            let data2 = &(0..threads)
+                .map(|_| (0..iterations).map(|_| fastrand::$int_type(..)).collect::<Vec<_>>())
+                .collect::<Vec<_>>();
+            let set = &data1
+                .iter()
+                .flat_map(|v| v.iter().copied())
+                .chain(data2.iter().flat_map(|v| v.iter().copied()))
+                .collect::<BTreeSet<_>>();
+            let a = &<$atomic_type>::new(data2[0][fastrand::usize(0..iterations)]);
+            let now = &std::time::Instant::now();
+            thread::scope(|s| {
+                for thread in 0..threads {
+                    if thread % 2 == 0 {
+                        s.spawn(move |_| {
+                            let now = *now;
+                            for i in 0..iterations {
+                                a.store(data1[thread][i], rand_store_ordering());
+                            }
+                            std::eprintln!("store end={:?}", now.elapsed());
+                        });
+                    } else {
+                        s.spawn(|_| {
+                            let now = *now;
+                            let mut v = vec![0; iterations];
+                            for i in 0..iterations {
+                                v[i] = a.load(rand_load_ordering());
+                            }
+                            std::eprintln!("load end={:?}", now.elapsed());
+                            for v in v {
+                                assert!(set.contains(&v), "v={}", v);
+                            }
+                        });
+                    }
+                    s.spawn(move |_| {
+                        let now = *now;
+                        let mut v = vec![0; iterations];
+                        for i in 0..iterations {
+                            v[i] = a.swap(data2[thread][i], rand_swap_ordering());
+                        }
+                        std::eprintln!("swap end={:?}", now.elapsed());
+                        for v in v {
+                            assert!(set.contains(&v), "v={}", v);
+                        }
+                    });
+                }
+            })
+            .unwrap();
+        }
+        #[test]
+        fn stress_compare_exchange() {
+            let (iterations, threads) = stress_test_config();
+            let data1 = &(0..threads)
+                .map(|_| (0..iterations).map(|_| fastrand::$int_type(..)).collect::<Vec<_>>())
+                .collect::<Vec<_>>();
+            let data2 = &(0..threads)
+                .map(|_| (0..iterations).map(|_| fastrand::$int_type(..)).collect::<Vec<_>>())
+                .collect::<Vec<_>>();
+            let set = &data1
+                .iter()
+                .flat_map(|v| v.iter().copied())
+                .chain(data2.iter().flat_map(|v| v.iter().copied()))
+                .collect::<BTreeSet<_>>();
+            let a = &<$atomic_type>::new(data2[0][fastrand::usize(0..iterations)]);
+            let now = &std::time::Instant::now();
+            thread::scope(|s| {
+                for thread in 0..threads {
+                    s.spawn(move |_| {
+                        let now = *now;
+                        for i in 0..iterations {
+                            a.store(data1[thread][i], rand_store_ordering());
+                        }
+                        std::eprintln!("store end={:?}", now.elapsed());
+                    });
+                    s.spawn(|_| {
+                        let now = *now;
+                        let mut v = vec![data2[0][0]; iterations];
+                        for i in 0..iterations {
+                            v[i] = a.load(rand_load_ordering());
+                        }
+                        std::eprintln!("load end={:?}", now.elapsed());
+                        for v in v {
+                            assert!(set.contains(&v), "v={}", v);
+                        }
+                    });
+                    s.spawn(move |_| {
+                        let now = *now;
+                        let mut v = vec![data2[0][0]; iterations];
+                        for i in 0..iterations {
+                            let old = if i % 2 == 0 {
+                                fastrand::$int_type(..)
+                            } else {
+                                a.load(Ordering::Relaxed)
+                            };
+                            let new = data2[thread][i];
+                            let o = rand_compare_exchange_ordering();
+                            match a.compare_exchange(old, new, o.0, o.1) {
+                                Ok(r) => assert_eq!(old, r),
+                                Err(r) => v[i] = r,
+                            }
+                        }
+                        std::eprintln!("compare_exchange end={:?}", now.elapsed());
+                        for v in v {
+                            assert!(set.contains(&v), "v={}", v);
+                        }
+                    });
+                }
+            })
+            .unwrap();
+        }
+    };
+}
+macro_rules! __test_atomic_float {
+    ($atomic_type:ty, $float_type:ident, single_thread) => {
+        use core::$float_type;
+        #[test]
+        fn swap() {
+            let a = <$atomic_type>::new(5.0);
+            test_swap_ordering(|order| a.swap(5.0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                assert_eq!(a.swap(10.0, order), 5.0);
+                assert_eq!(a.swap(5.0, order), 10.0);
+            }
+        }
+        #[test]
+        fn compare_exchange() {
+            let a = <$atomic_type>::new(5.0);
+            test_compare_exchange_ordering(|success, failure| {
+                a.compare_exchange(5.0, 5.0, success, failure)
+            });
+            for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                let a = <$atomic_type>::new(5.0);
+                assert_eq!(a.compare_exchange(5.0, 10.0, success, failure), Ok(5.0));
+                assert_eq!(a.load(Ordering::Relaxed), 10.0);
+                assert_eq!(a.compare_exchange(6.0, 12.0, success, failure), Err(10.0));
+                assert_eq!(a.load(Ordering::Relaxed), 10.0);
+            }
+        }
+        #[test]
+        fn compare_exchange_weak() {
+            let a = <$atomic_type>::new(4.0);
+            test_compare_exchange_ordering(|success, failure| {
+                a.compare_exchange_weak(4.0, 4.0, success, failure)
+            });
+            for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                let a = <$atomic_type>::new(4.0);
+                assert_eq!(a.compare_exchange_weak(6.0, 8.0, success, failure), Err(4.0));
+                let mut old = a.load(Ordering::Relaxed);
+                loop {
+                    let new = old * 2.0;
+                    match a.compare_exchange_weak(old, new, success, failure) {
+                        Ok(_) => break,
+                        Err(x) => old = x,
+                    }
+                }
+                assert_eq!(a.load(Ordering::Relaxed), 8.0);
+            }
+        }
+        #[test]
+        fn fetch_add() {
+            let a = <$atomic_type>::new(0.0);
+            test_swap_ordering(|order| a.fetch_add(0.0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(0.0);
+                assert_eq!(a.fetch_add(10.0, order), 0.0);
+                assert_eq!(a.load(Ordering::Relaxed), 10.0);
+                let a = <$atomic_type>::new($float_type::MAX);
+                assert_eq!(a.fetch_add(1.0, order), $float_type::MAX);
+                assert_eq!(a.load(Ordering::Relaxed), $float_type::MAX + 1.0);
+            }
+        }
+        #[test]
+        fn fetch_sub() {
+            let a = <$atomic_type>::new(20.0);
+            test_swap_ordering(|order| a.fetch_sub(0.0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(20.0);
+                assert_eq!(a.fetch_sub(10.0, order), 20.0);
+                assert_eq!(a.load(Ordering::Relaxed), 10.0);
+                let a = <$atomic_type>::new($float_type::MIN);
+                assert_eq!(a.fetch_sub(1.0, order), $float_type::MIN);
+                assert_eq!(a.load(Ordering::Relaxed), $float_type::MIN - 1.0);
+            }
+        }
+        #[test]
+        fn fetch_max() {
+            let a = <$atomic_type>::new(23.0);
+            test_swap_ordering(|order| a.fetch_max(23.0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(23.0);
+                assert_eq!(a.fetch_max(22.0, order), 23.0);
+                assert_eq!(a.load(Ordering::Relaxed), 23.0);
+                assert_eq!(a.fetch_max(24.0, order), 23.0);
+                assert_eq!(a.load(Ordering::Relaxed), 24.0);
+            }
+        }
+        #[test]
+        fn fetch_min() {
+            let a = <$atomic_type>::new(23.0);
+            test_swap_ordering(|order| a.fetch_min(23.0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(23.0);
+                assert_eq!(a.fetch_min(24.0, order), 23.0);
+                assert_eq!(a.load(Ordering::Relaxed), 23.0);
+                assert_eq!(a.fetch_min(22.0, order), 23.0);
+                assert_eq!(a.load(Ordering::Relaxed), 22.0);
+            }
+        }
+        #[test]
+        fn fetch_neg() {
+            let a = <$atomic_type>::new(5.0);
+            test_swap_ordering(|order| a.fetch_neg(order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(5.0);
+                assert_eq!(a.fetch_neg(order), 5.0);
+                assert_eq!(a.load(Ordering::Relaxed), -5.0);
+                assert_eq!(a.fetch_neg(order), -5.0);
+                assert_eq!(a.load(Ordering::Relaxed), 5.0);
+            }
+        }
+        #[test]
+        fn fetch_abs() {
+            let a = <$atomic_type>::new(23.0);
+            test_swap_ordering(|order| a.fetch_abs(order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(-23.0);
+                assert_eq!(a.fetch_abs(order), -23.0);
+                assert_eq!(a.load(Ordering::Relaxed), 23.0);
+                assert_eq!(a.fetch_abs(order), 23.0);
+                assert_eq!(a.load(Ordering::Relaxed), 23.0);
+            }
+        }
+        ::quickcheck::quickcheck! {
+            fn quickcheck_swap(x: $float_type, y: $float_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_float_op_eq!(a.swap(y, order), x);
+                    assert_float_op_eq!(a.swap(x, order), y);
+                }
+                true
+            }
+            fn quickcheck_compare_exchange(x: $float_type, y: $float_type) -> bool {
+                let z = loop {
+                    let z = fastrand::$float_type();
+                    if z != y {
+                        break z;
+                    }
+                };
+                for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_float_op_eq!(a.compare_exchange(x, y, success, failure).unwrap(), x);
+                    assert_float_op_eq!(a.load(Ordering::Relaxed), y);
+                    assert_float_op_eq!(
+                        a.compare_exchange(z, x, success, failure).unwrap_err(),
+                        y,
+                    );
+                    assert_float_op_eq!(a.load(Ordering::Relaxed), y);
+                }
+                true
+            }
+            fn quickcheck_fetch_add(x: $float_type, y: $float_type) -> bool {
+                if cfg!(all(not(debug_assertions), target_arch = "x86", not(target_feature = "sse2"))) {
+                    // TODO: rustc bug:
+                    // https://github.com/rust-lang/rust/issues/72327
+                    // https://github.com/rust-lang/rust/issues/73288
+                    return true;
+                }
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_float_op_eq!(a.fetch_add(y, order), x);
+                    assert_float_op_eq!(a.load(Ordering::Relaxed), x + y);
+                    let a = <$atomic_type>::new(y);
+                    assert_float_op_eq!(a.fetch_add(x, order), y);
+                    assert_float_op_eq!(a.load(Ordering::Relaxed), y + x);
+                }
+                true
+            }
+            fn quickcheck_fetch_sub(x: $float_type, y: $float_type) -> bool {
+                if cfg!(all(not(debug_assertions), target_arch = "x86", not(target_feature = "sse2"))) {
+                    // TODO: rustc bug:
+                    // https://github.com/rust-lang/rust/issues/72327
+                    // https://github.com/rust-lang/rust/issues/73288
+                    return true;
+                }
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_float_op_eq!(a.fetch_sub(y, order), x);
+                    assert_float_op_eq!(a.load(Ordering::Relaxed), x - y);
+                    let a = <$atomic_type>::new(y);
+                    assert_float_op_eq!(a.fetch_sub(x, order), y);
+                    assert_float_op_eq!(a.load(Ordering::Relaxed), y - x);
+                }
+                true
+            }
+            fn quickcheck_fetch_max(x: $float_type, y: $float_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_float_op_eq!(a.fetch_max(y, order), x);
+                    assert_float_op_eq!(a.load(Ordering::Relaxed), x.max(y));
+                    let a = <$atomic_type>::new(y);
+                    assert_float_op_eq!(a.fetch_max(x, order), y);
+                    assert_float_op_eq!(a.load(Ordering::Relaxed), y.max(x));
+                }
+                true
+            }
+            fn quickcheck_fetch_min(x: $float_type, y: $float_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_float_op_eq!(a.fetch_min(y, order), x);
+                    assert_float_op_eq!(a.load(Ordering::Relaxed), x.min(y));
+                    let a = <$atomic_type>::new(y);
+                    assert_float_op_eq!(a.fetch_min(x, order), y);
+                    assert_float_op_eq!(a.load(Ordering::Relaxed), y.min(x));
+                }
+                true
+            }
+            fn quickcheck_fetch_neg(x: $float_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_float_op_eq!(a.fetch_neg(order), x);
+                    assert_float_op_eq!(a.load(Ordering::Relaxed), -x);
+                    assert_float_op_eq!(a.fetch_neg(order), -x);
+                    assert_float_op_eq!(a.load(Ordering::Relaxed), x);
+                }
+                true
+            }
+            fn quickcheck_fetch_abs(x: $float_type) -> bool {
+                for &order in &test_helper::SWAP_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_float_op_eq!(a.fetch_abs(order), x);
+                    assert_float_op_eq!(a.fetch_abs(order), x.abs());
+                    assert_float_op_eq!(a.load(Ordering::Relaxed), x.abs());
+                }
+                true
+            }
+        }
+    };
+    ($atomic_type:ty, $float_type:ident) => {
+        __test_atomic_float!($atomic_type, $float_type, single_thread);
+        // TODO: multi thread
+    };
+}
+macro_rules! __test_atomic_bool {
+    ($atomic_type:ty, single_thread) => {
+        #[test]
+        fn swap() {
+            let a = <$atomic_type>::new(true);
+            test_swap_ordering(|order| a.swap(true, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                assert_eq!(a.swap(true, order), true);
+                assert_eq!(a.swap(false, order), true);
+                assert_eq!(a.swap(false, order), false);
+                assert_eq!(a.swap(true, order), false);
+            }
+        }
+        #[test]
+        fn compare_exchange() {
+            let a = <$atomic_type>::new(true);
+            test_compare_exchange_ordering(|success, failure| {
+                a.compare_exchange(true, true, success, failure)
+            });
+            for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                let a = <$atomic_type>::new(true);
+                assert_eq!(a.compare_exchange(true, false, success, failure), Ok(true));
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                assert_eq!(a.compare_exchange(true, true, success, failure), Err(false));
+                assert_eq!(a.load(Ordering::Relaxed), false);
+            }
+        }
+        #[test]
+        fn compare_exchange_weak() {
+            let a = <$atomic_type>::new(false);
+            test_compare_exchange_ordering(|success, failure| {
+                a.compare_exchange_weak(false, false, success, failure)
+            });
+            for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                let a = <$atomic_type>::new(false);
+                assert_eq!(a.compare_exchange_weak(true, true, success, failure), Err(false));
+                let mut old = a.load(Ordering::Relaxed);
+                let new = true;
+                loop {
+                    match a.compare_exchange_weak(old, new, success, failure) {
+                        Ok(_) => break,
+                        Err(x) => old = x,
+                    }
+                }
+                assert_eq!(a.load(Ordering::Relaxed), true);
+            }
+        }
+        #[test]
+        fn fetch_and() {
+            let a = <$atomic_type>::new(true);
+            test_swap_ordering(|order| assert_eq!(a.fetch_and(true, order), true));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(true);
+                assert_eq!(a.fetch_and(false, order), true);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(true);
+                assert_eq!(a.fetch_and(true, order), true);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(false);
+                assert_eq!(a.fetch_and(false, order), false);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(false);
+                assert_eq!(a.fetch_and(true, order), false);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+            }
+        }
+        #[test]
+        fn and() {
+            let a = <$atomic_type>::new(true);
+            test_swap_ordering(|order| a.and(true, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(true);
+                a.and(false, order);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(true);
+                a.and(true, order);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(false);
+                a.and(false, order);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(false);
+                a.and(true, order);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+            }
+        }
+        #[test]
+        fn fetch_or() {
+            let a = <$atomic_type>::new(true);
+            test_swap_ordering(|order| assert_eq!(a.fetch_or(false, order), true));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(true);
+                assert_eq!(a.fetch_or(false, order), true);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(true);
+                assert_eq!(a.fetch_or(true, order), true);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(false);
+                assert_eq!(a.fetch_or(false, order), false);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(false);
+                assert_eq!(a.fetch_or(true, order), false);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+            }
+        }
+        #[test]
+        fn or() {
+            let a = <$atomic_type>::new(true);
+            test_swap_ordering(|order| a.or(false, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(true);
+                a.or(false, order);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(true);
+                a.or(true, order);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(false);
+                a.or(false, order);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(false);
+                a.or(true, order);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+            }
+        }
+        #[test]
+        fn fetch_xor() {
+            let a = <$atomic_type>::new(true);
+            test_swap_ordering(|order| assert_eq!(a.fetch_xor(false, order), true));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(true);
+                assert_eq!(a.fetch_xor(false, order), true);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(true);
+                assert_eq!(a.fetch_xor(true, order), true);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(false);
+                assert_eq!(a.fetch_xor(false, order), false);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(false);
+                assert_eq!(a.fetch_xor(true, order), false);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+            }
+        }
+        #[test]
+        fn xor() {
+            let a = <$atomic_type>::new(true);
+            test_swap_ordering(|order| a.xor(false, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(true);
+                a.xor(false, order);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(true);
+                a.xor(true, order);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(false);
+                a.xor(false, order);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(false);
+                a.xor(true, order);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+            }
+        }
+        ::quickcheck::quickcheck! {
+            fn quickcheck_compare_exchange(x: bool, y: bool) -> bool {
+                let z = !y;
+                for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(a.compare_exchange(x, y, success, failure).unwrap(), x);
+                    assert_eq!(a.load(Ordering::Relaxed), y);
+                    assert_eq!(a.compare_exchange(z, x, success, failure).unwrap_err(), y);
+                    assert_eq!(a.load(Ordering::Relaxed), y);
+                }
+                true
+            }
+        }
+    };
+    ($atomic_type:ty) => {
+        __test_atomic_bool!($atomic_type, single_thread);
+        // TODO: multi thread
+    };
+}
+macro_rules! __test_atomic_ptr {
+    ($atomic_type:ty, single_thread) => {
+        #[test]
+        fn swap() {
+            let a = <$atomic_type>::new(ptr::null_mut());
+            test_swap_ordering(|order| a.swap(ptr::null_mut(), order));
+            let x = &mut 1;
+            for &order in &test_helper::SWAP_ORDERINGS {
+                assert_eq!(a.swap(x, order), ptr::null_mut());
+                assert_eq!(a.swap(ptr::null_mut(), order), x as _);
+            }
+        }
+        #[test]
+        fn compare_exchange() {
+            let a = <$atomic_type>::new(ptr::null_mut());
+            test_compare_exchange_ordering(|success, failure| {
+                a.compare_exchange(ptr::null_mut(), ptr::null_mut(), success, failure)
+            });
+            for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                let a = <$atomic_type>::new(ptr::null_mut());
+                let x = &mut 1;
+                assert_eq!(
+                    a.compare_exchange(ptr::null_mut(), x, success, failure),
+                    Ok(ptr::null_mut()),
+                );
+                assert_eq!(a.load(Ordering::Relaxed), x as _);
+                assert_eq!(
+                    a.compare_exchange(ptr::null_mut(), ptr::null_mut(), success, failure),
+                    Err(x as _),
+                );
+                assert_eq!(a.load(Ordering::Relaxed), x as _);
+            }
+        }
+        #[test]
+        fn compare_exchange_weak() {
+            let a = <$atomic_type>::new(ptr::null_mut());
+            test_compare_exchange_ordering(|success, failure| {
+                a.compare_exchange_weak(ptr::null_mut(), ptr::null_mut(), success, failure)
+            });
+            for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                let a = <$atomic_type>::new(ptr::null_mut());
+                let x = &mut 1;
+                assert_eq!(a.compare_exchange_weak(x, x, success, failure), Err(ptr::null_mut()));
+                let mut old = a.load(Ordering::Relaxed);
+                loop {
+                    match a.compare_exchange_weak(old, x, success, failure) {
+                        Ok(_) => break,
+                        Err(x) => old = x,
+                    }
+                }
+                assert_eq!(a.load(Ordering::Relaxed), x as _);
+            }
+        }
+    };
+    ($atomic_type:ty) => {
+        __test_atomic_ptr!($atomic_type, single_thread);
+        // TODO: multi thread
+    };
+}
+
+macro_rules! __test_atomic_int_load_store_pub {
+    ($atomic_type:ty, $int_type:ident) => {
+        __test_atomic_pub_common!($atomic_type, $int_type);
+        use std::{boxed::Box, mem};
+        #[test]
+        fn impls() {
+            let a = <$atomic_type>::default();
+            let b = <$atomic_type>::from(0);
+            assert_eq!(a.load(Ordering::SeqCst), b.load(Ordering::SeqCst));
+            assert_eq!(std::format!("{:?}", a), std::format!("{:?}", a.load(Ordering::SeqCst)));
+
+            unsafe {
+                let ptr: *mut Align16<$int_type> = Box::into_raw(Box::new(Align16(0)));
+                assert!(ptr as usize % mem::align_of::<$atomic_type>() == 0);
+                {
+                    let a = <$atomic_type>::from_ptr(ptr.cast::<$int_type>());
+                    *a.as_ptr() = 1;
+                }
+                assert_eq!((*ptr).0, 1);
+                drop(Box::from_raw(ptr));
+            }
+        }
+    };
+}
+macro_rules! __test_atomic_int_pub {
+    ($atomic_type:ty, $int_type:ident) => {
+        #[test]
+        fn fetch_update() {
+            let a = <$atomic_type>::new(7);
+            test_compare_exchange_ordering(|set, fetch| a.fetch_update(set, fetch, |x| Some(x)));
+            for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                let a = <$atomic_type>::new(7);
+                assert_eq!(a.fetch_update(success, failure, |_| None), Err(7));
+                assert_eq!(a.fetch_update(success, failure, |x| Some(x + 1)), Ok(7));
+                assert_eq!(a.fetch_update(success, failure, |x| Some(x + 1)), Ok(8));
+                assert_eq!(a.load(Ordering::SeqCst), 9);
+            }
+        }
+        ::quickcheck::quickcheck! {
+            fn quickcheck_fetch_update(x: $int_type, y: $int_type) -> bool {
+                let z = loop {
+                    let z = fastrand::$int_type(..);
+                    if z != y {
+                        break z;
+                    }
+                };
+                for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                    let a = <$atomic_type>::new(x);
+                    assert_eq!(
+                        a.fetch_update(success, failure, |_| Some(y))
+                        .unwrap(),
+                        x
+                    );
+                    assert_eq!(
+                        a.fetch_update(success, failure, |_| Some(z))
+                        .unwrap(),
+                        y
+                    );
+                    assert_eq!(a.load(Ordering::Relaxed), z);
+                    assert_eq!(
+                        a.fetch_update(success, failure, |z| if z == y { Some(z) } else { None })
+                        .unwrap_err(),
+                        z
+                    );
+                    assert_eq!(a.load(Ordering::Relaxed), z);
+                }
+                true
+            }
+        }
+    };
+}
+macro_rules! __test_atomic_float_pub {
+    ($atomic_type:ty, $float_type:ident) => {
+        __test_atomic_pub_common!($atomic_type, $float_type);
+        use std::{boxed::Box, mem};
+        #[test]
+        fn fetch_update() {
+            let a = <$atomic_type>::new(7.0);
+            test_compare_exchange_ordering(|set, fetch| a.fetch_update(set, fetch, |x| Some(x)));
+            for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                let a = <$atomic_type>::new(7.0);
+                assert_eq!(a.fetch_update(success, failure, |_| None), Err(7.0));
+                assert_eq!(a.fetch_update(success, failure, |x| Some(x + 1.0)), Ok(7.0));
+                assert_eq!(a.fetch_update(success, failure, |x| Some(x + 1.0)), Ok(8.0));
+                assert_eq!(a.load(Ordering::SeqCst), 9.0);
+            }
+        }
+        #[test]
+        fn impls() {
+            let a = <$atomic_type>::default();
+            let b = <$atomic_type>::from(0.0);
+            assert_eq!(a.load(Ordering::SeqCst), b.load(Ordering::SeqCst));
+            assert_eq!(std::format!("{:?}", a), std::format!("{:?}", a.load(Ordering::SeqCst)));
+
+            unsafe {
+                let ptr: *mut Align16<$float_type> = Box::into_raw(Box::new(Align16(0.0)));
+                assert!(ptr as usize % mem::align_of::<$atomic_type>() == 0);
+                {
+                    let a = <$atomic_type>::from_ptr(ptr.cast::<$float_type>());
+                    *a.as_ptr() = 1.0;
+                }
+                assert_eq!((*ptr).0, 1.0);
+                drop(Box::from_raw(ptr));
+            }
+        }
+    };
+}
+macro_rules! __test_atomic_bool_pub {
+    ($atomic_type:ty) => {
+        __test_atomic_pub_common!($atomic_type, bool);
+        use std::{boxed::Box, mem};
+        #[test]
+        fn fetch_nand() {
+            let a = <$atomic_type>::new(true);
+            test_swap_ordering(|order| assert_eq!(a.fetch_nand(false, order), true));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(true);
+                assert_eq!(a.fetch_nand(false, order), true);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(true);
+                assert_eq!(a.fetch_nand(true, order), true);
+                assert_eq!(a.load(Ordering::Relaxed) as usize, 0);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(false);
+                assert_eq!(a.fetch_nand(false, order), false);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+                let a = <$atomic_type>::new(false);
+                assert_eq!(a.fetch_nand(true, order), false);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+            }
+        }
+        #[test]
+        fn fetch_not() {
+            let a = <$atomic_type>::new(true);
+            test_swap_ordering(|order| a.fetch_not(order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(true);
+                assert_eq!(a.fetch_not(order), true);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(false);
+                assert_eq!(a.fetch_not(order), false);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+            }
+        }
+        #[test]
+        fn not() {
+            let a = <$atomic_type>::new(true);
+            test_swap_ordering(|order| a.fetch_not(order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let a = <$atomic_type>::new(true);
+                a.not(order);
+                assert_eq!(a.load(Ordering::Relaxed), false);
+                let a = <$atomic_type>::new(false);
+                a.not(order);
+                assert_eq!(a.load(Ordering::Relaxed), true);
+            }
+        }
+        #[test]
+        fn fetch_update() {
+            let a = <$atomic_type>::new(false);
+            test_compare_exchange_ordering(|set, fetch| a.fetch_update(set, fetch, |x| Some(x)));
+            for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                let a = <$atomic_type>::new(false);
+                assert_eq!(a.fetch_update(success, failure, |_| None), Err(false));
+                assert_eq!(a.fetch_update(success, failure, |x| Some(!x)), Ok(false));
+                assert_eq!(a.fetch_update(success, failure, |x| Some(!x)), Ok(true));
+                assert_eq!(a.load(Ordering::SeqCst), false);
+            }
+        }
+        #[test]
+        fn impls() {
+            let a = <$atomic_type>::default();
+            let b = <$atomic_type>::from(false);
+            assert_eq!(a.load(Ordering::SeqCst), b.load(Ordering::SeqCst));
+            assert_eq!(std::format!("{:?}", a), std::format!("{:?}", a.load(Ordering::SeqCst)));
+
+            unsafe {
+                let ptr: *mut bool = Box::into_raw(Box::new(false));
+                assert!(ptr as usize % mem::align_of::<$atomic_type>() == 0);
+                {
+                    let a = <$atomic_type>::from_ptr(ptr);
+                    *a.as_ptr() = true;
+                }
+                assert_eq!((*ptr), true);
+                drop(Box::from_raw(ptr));
+            }
+        }
+    };
+}
+macro_rules! __test_atomic_ptr_pub {
+    ($atomic_type:ty) => {
+        __test_atomic_pub_common!($atomic_type, *mut u8);
+        use sptr::Strict;
+        use std::{boxed::Box, mem};
+        #[test]
+        fn fetch_update() {
+            let a = <$atomic_type>::new(ptr::null_mut());
+            test_compare_exchange_ordering(|set, fetch| a.fetch_update(set, fetch, |x| Some(x)));
+            for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+                let a = <$atomic_type>::new(ptr::null_mut());
+                assert_eq!(a.fetch_update(success, failure, |_| None), Err(ptr::null_mut()));
+                assert_eq!(
+                    a.fetch_update(success, failure, |_| Some(&a as *const _ as *mut _)),
+                    Ok(ptr::null_mut())
+                );
+                assert_eq!(a.load(Ordering::SeqCst), &a as *const _ as *mut _);
+            }
+        }
+        #[test]
+        fn impls() {
+            let a = <$atomic_type>::default();
+            let b = <$atomic_type>::from(ptr::null_mut());
+            assert_eq!(a.load(Ordering::SeqCst), b.load(Ordering::SeqCst));
+            assert_eq!(std::format!("{:?}", a), std::format!("{:?}", a.load(Ordering::SeqCst)));
+            assert_eq!(std::format!("{:p}", a), std::format!("{:p}", a.load(Ordering::SeqCst)));
+
+            unsafe {
+                let ptr: *mut Align16<*mut u8> = Box::into_raw(Box::new(Align16(ptr::null_mut())));
+                assert!(ptr as usize % mem::align_of::<$atomic_type>() == 0);
+                {
+                    let a = <$atomic_type>::from_ptr(ptr.cast::<*mut u8>());
+                    *a.as_ptr() = ptr::null_mut::<u8>().wrapping_add(1);
+                }
+                assert_eq!((*ptr).0, ptr::null_mut::<u8>().wrapping_add(1));
+                drop(Box::from_raw(ptr));
+            }
+        }
+        // https://github.com/rust-lang/rust/blob/1.70.0/library/core/tests/atomic.rs#L130-L213
+        #[test]
+        fn ptr_add_null() {
+            let atom = AtomicPtr::<i64>::new(core::ptr::null_mut());
+            assert_eq!(atom.fetch_ptr_add(1, Ordering::SeqCst).addr(), 0);
+            assert_eq!(atom.load(Ordering::SeqCst).addr(), 8);
+
+            assert_eq!(atom.fetch_byte_add(1, Ordering::SeqCst).addr(), 8);
+            assert_eq!(atom.load(Ordering::SeqCst).addr(), 9);
+
+            assert_eq!(atom.fetch_ptr_sub(1, Ordering::SeqCst).addr(), 9);
+            assert_eq!(atom.load(Ordering::SeqCst).addr(), 1);
+
+            assert_eq!(atom.fetch_byte_sub(1, Ordering::SeqCst).addr(), 1);
+            assert_eq!(atom.load(Ordering::SeqCst).addr(), 0);
+        }
+        #[test]
+        fn ptr_add_data() {
+            let num = 0i64;
+            let n = &num as *const i64 as *mut _;
+            let atom = AtomicPtr::<i64>::new(n);
+            assert_eq!(atom.fetch_ptr_add(1, Ordering::SeqCst), n);
+            assert_eq!(atom.load(Ordering::SeqCst), n.wrapping_add(1));
+
+            assert_eq!(atom.fetch_ptr_sub(1, Ordering::SeqCst), n.wrapping_add(1));
+            assert_eq!(atom.load(Ordering::SeqCst), n);
+            let bytes_from_n = |b| n.cast::<u8>().wrapping_add(b).cast::<i64>();
+
+            assert_eq!(atom.fetch_byte_add(1, Ordering::SeqCst), n);
+            assert_eq!(atom.load(Ordering::SeqCst), bytes_from_n(1));
+
+            assert_eq!(atom.fetch_byte_add(5, Ordering::SeqCst), bytes_from_n(1));
+            assert_eq!(atom.load(Ordering::SeqCst), bytes_from_n(6));
+
+            assert_eq!(atom.fetch_byte_sub(1, Ordering::SeqCst), bytes_from_n(6));
+            assert_eq!(atom.load(Ordering::SeqCst), bytes_from_n(5));
+
+            assert_eq!(atom.fetch_byte_sub(5, Ordering::SeqCst), bytes_from_n(5));
+            assert_eq!(atom.load(Ordering::SeqCst), n);
+        }
+        #[test]
+        fn ptr_bitops() {
+            let atom = AtomicPtr::<i64>::new(core::ptr::null_mut());
+            assert_eq!(atom.fetch_or(0b0111, Ordering::SeqCst).addr(), 0);
+            assert_eq!(atom.load(Ordering::SeqCst).addr(), 0b0111);
+
+            assert_eq!(atom.fetch_and(0b1101, Ordering::SeqCst).addr(), 0b0111);
+            assert_eq!(atom.load(Ordering::SeqCst).addr(), 0b0101);
+
+            assert_eq!(atom.fetch_xor(0b1111, Ordering::SeqCst).addr(), 0b0101);
+            assert_eq!(atom.load(Ordering::SeqCst).addr(), 0b1010);
+        }
+        #[test]
+        fn ptr_bitops_tagging() {
+            const MASK_TAG: usize = 0b1111;
+            const MASK_PTR: usize = !MASK_TAG;
+
+            #[repr(align(16))]
+            struct Tagme(u128);
+
+            let tagme = Tagme(1000);
+            let ptr = &tagme as *const Tagme as *mut Tagme;
+            let atom: AtomicPtr<Tagme> = AtomicPtr::new(ptr);
+
+            assert_eq!(ptr.addr() & MASK_TAG, 0);
+
+            assert_eq!(atom.fetch_or(0b0111, Ordering::SeqCst), ptr);
+            assert_eq!(atom.load(Ordering::SeqCst), ptr.map_addr(|a| a | 0b111));
+
+            assert_eq!(
+                atom.fetch_and(MASK_PTR | 0b0010, Ordering::SeqCst),
+                ptr.map_addr(|a| a | 0b111)
+            );
+            assert_eq!(atom.load(Ordering::SeqCst), ptr.map_addr(|a| a | 0b0010));
+
+            assert_eq!(atom.fetch_xor(0b1011, Ordering::SeqCst), ptr.map_addr(|a| a | 0b0010));
+            assert_eq!(atom.load(Ordering::SeqCst), ptr.map_addr(|a| a | 0b1001));
+
+            assert_eq!(atom.fetch_and(MASK_PTR, Ordering::SeqCst), ptr.map_addr(|a| a | 0b1001));
+            assert_eq!(atom.load(Ordering::SeqCst), ptr);
+        }
+        #[test]
+        fn bit_set() {
+            let a = <$atomic_type>::new(ptr::null_mut::<u64>().cast::<u8>().map_addr(|a| a | 1));
+            test_swap_ordering(|order| assert!(a.bit_set(0, order)));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let pointer = &mut 1u64 as *mut u64 as *mut u8;
+                let atom = <$atomic_type>::new(pointer);
+                // Tag the bottom bit of the pointer.
+                assert!(!atom.bit_set(0, order));
+                // Extract and untag.
+                let tagged = atom.load(Ordering::Relaxed);
+                assert_eq!(tagged.addr() & 1, 1);
+                assert_eq!(tagged.map_addr(|p| p & !1), pointer);
+            }
+        }
+        #[test]
+        fn bit_clear() {
+            let a = <$atomic_type>::new(ptr::null_mut::<u64>().cast::<u8>());
+            test_swap_ordering(|order| assert!(!a.bit_clear(0, order)));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let pointer = &mut 1u64 as *mut u64 as *mut u8;
+                // A tagged pointer
+                let atom = <$atomic_type>::new(pointer.map_addr(|a| a | 1));
+                assert!(atom.bit_set(0, order));
+                // Untag
+                assert!(atom.bit_clear(0, order));
+            }
+        }
+        #[test]
+        fn bit_toggle() {
+            let a = <$atomic_type>::new(ptr::null_mut::<u64>().cast::<u8>());
+            test_swap_ordering(|order| a.bit_toggle(0, order));
+            for &order in &test_helper::SWAP_ORDERINGS {
+                let pointer = &mut 1u64 as *mut u64 as *mut u8;
+                let atom = <$atomic_type>::new(pointer);
+                // Toggle a tag bit on the pointer.
+                atom.bit_toggle(0, order);
+                assert_eq!(atom.load(Ordering::Relaxed).addr() & 1, 1);
+            }
+        }
+    };
+}
+
+macro_rules! test_atomic_int_load_store {
+    ($int_type:ident) => {
+        paste::paste! {
+            #[allow(
+                clippy::alloc_instead_of_core,
+                clippy::std_instead_of_alloc,
+                clippy::std_instead_of_core,
+                clippy::undocumented_unsafe_blocks
+            )]
+            mod [<test_atomic_ $int_type>] {
+                use super::*;
+                __test_atomic_int_load_store!([<Atomic $int_type:camel>], $int_type);
+            }
+        }
+    };
+}
+macro_rules! test_atomic_bool_load_store {
+    () => {
+        #[allow(
+            clippy::alloc_instead_of_core,
+            clippy::std_instead_of_alloc,
+            clippy::std_instead_of_core,
+            clippy::undocumented_unsafe_blocks
+        )]
+        mod test_atomic_bool {
+            use super::*;
+            __test_atomic_bool_load_store!(AtomicBool);
+        }
+    };
+}
+macro_rules! test_atomic_ptr_load_store {
+    () => {
+        #[allow(
+            clippy::alloc_instead_of_core,
+            clippy::std_instead_of_alloc,
+            clippy::std_instead_of_core,
+            clippy::undocumented_unsafe_blocks
+        )]
+        mod test_atomic_ptr {
+            use super::*;
+            __test_atomic_ptr_load_store!(AtomicPtr<u8>);
+        }
+    };
+}
+
+macro_rules! test_atomic_int_single_thread {
+    ($int_type:ident) => {
+        paste::paste! {
+            #[allow(
+                clippy::alloc_instead_of_core,
+                clippy::std_instead_of_alloc,
+                clippy::std_instead_of_core,
+                clippy::undocumented_unsafe_blocks
+            )]
+            mod [<test_atomic_ $int_type>] {
+                use super::*;
+                __test_atomic_int_load_store!([<Atomic $int_type:camel>], $int_type, single_thread);
+                __test_atomic_int!([<Atomic $int_type:camel>], $int_type, single_thread);
+            }
+        }
+    };
+}
+macro_rules! test_atomic_bool_single_thread {
+    () => {
+        #[allow(
+            clippy::alloc_instead_of_core,
+            clippy::std_instead_of_alloc,
+            clippy::std_instead_of_core,
+            clippy::undocumented_unsafe_blocks
+        )]
+        mod test_atomic_bool {
+            use super::*;
+            __test_atomic_bool_load_store!(AtomicBool, single_thread);
+            __test_atomic_bool!(AtomicBool, single_thread);
+        }
+    };
+}
+macro_rules! test_atomic_ptr_single_thread {
+    () => {
+        #[allow(
+            clippy::alloc_instead_of_core,
+            clippy::std_instead_of_alloc,
+            clippy::std_instead_of_core,
+            clippy::undocumented_unsafe_blocks
+        )]
+        mod test_atomic_ptr {
+            use super::*;
+            __test_atomic_ptr_load_store!(AtomicPtr<u8>, single_thread);
+            __test_atomic_ptr!(AtomicPtr<u8>, single_thread);
+        }
+    };
+}
+
+macro_rules! test_atomic_int {
+    ($int_type:ident) => {
+        paste::paste! {
+            #[allow(
+                clippy::alloc_instead_of_core,
+                clippy::std_instead_of_alloc,
+                clippy::std_instead_of_core,
+                clippy::undocumented_unsafe_blocks
+            )]
+            mod [<test_atomic_ $int_type>] {
+                use super::*;
+                __test_atomic_int_load_store!([<Atomic $int_type:camel>], $int_type);
+                __test_atomic_int!([<Atomic $int_type:camel>], $int_type);
+            }
+        }
+    };
+}
+macro_rules! test_atomic_bool {
+    () => {
+        #[allow(
+            clippy::alloc_instead_of_core,
+            clippy::std_instead_of_alloc,
+            clippy::std_instead_of_core,
+            clippy::undocumented_unsafe_blocks
+        )]
+        mod test_atomic_bool {
+            use super::*;
+            __test_atomic_bool_load_store!(AtomicBool);
+            __test_atomic_bool!(AtomicBool);
+        }
+    };
+}
+macro_rules! test_atomic_ptr {
+    () => {
+        #[allow(
+            clippy::alloc_instead_of_core,
+            clippy::std_instead_of_alloc,
+            clippy::std_instead_of_core,
+            clippy::undocumented_unsafe_blocks
+        )]
+        #[allow(unstable_name_collisions)] // for sptr crate
+        mod test_atomic_ptr {
+            use super::*;
+            __test_atomic_ptr_load_store!(AtomicPtr<u8>);
+            __test_atomic_ptr!(AtomicPtr<u8>);
+        }
+    };
+}
+
+macro_rules! test_atomic_int_pub {
+    ($int_type:ident) => {
+        paste::paste! {
+            #[allow(
+                clippy::alloc_instead_of_core,
+                clippy::std_instead_of_alloc,
+                clippy::std_instead_of_core,
+                clippy::undocumented_unsafe_blocks
+            )]
+            mod [<test_atomic_ $int_type>] {
+                use super::*;
+                __test_atomic_int_load_store!([<Atomic $int_type:camel>], $int_type);
+                __test_atomic_int!([<Atomic $int_type:camel>], $int_type);
+                __test_atomic_int_load_store_pub!([<Atomic $int_type:camel>], $int_type);
+                __test_atomic_int_pub!([<Atomic $int_type:camel>], $int_type);
+            }
+        }
+    };
+}
+macro_rules! test_atomic_int_load_store_pub {
+    ($int_type:ident) => {
+        paste::paste! {
+            #[allow(
+                clippy::alloc_instead_of_core,
+                clippy::std_instead_of_alloc,
+                clippy::std_instead_of_core,
+                clippy::undocumented_unsafe_blocks
+            )]
+            mod [<test_atomic_ $int_type>] {
+                use super::*;
+                __test_atomic_int_load_store!([<Atomic $int_type:camel>], $int_type);
+                __test_atomic_int_load_store_pub!([<Atomic $int_type:camel>], $int_type);
+            }
+        }
+    };
+}
+#[cfg(feature = "float")]
+macro_rules! test_atomic_float_pub {
+    ($float_type:ident) => {
+        paste::paste! {
+            #[allow(
+                clippy::alloc_instead_of_core,
+                clippy::std_instead_of_alloc,
+                clippy::std_instead_of_core,
+                clippy::undocumented_unsafe_blocks
+            )]
+            mod [<test_atomic_ $float_type>] {
+                use super::*;
+                __test_atomic_float_load_store!([<Atomic $float_type:camel>], $float_type);
+                __test_atomic_float!([<Atomic $float_type:camel>], $float_type);
+                __test_atomic_float_pub!([<Atomic $float_type:camel>], $float_type);
+            }
+        }
+    };
+}
+macro_rules! test_atomic_bool_pub {
+    () => {
+        #[allow(
+            clippy::alloc_instead_of_core,
+            clippy::std_instead_of_alloc,
+            clippy::std_instead_of_core,
+            clippy::undocumented_unsafe_blocks
+        )]
+        mod test_atomic_bool {
+            use super::*;
+            __test_atomic_bool_load_store!(AtomicBool);
+            __test_atomic_bool!(AtomicBool);
+            __test_atomic_bool_pub!(AtomicBool);
+        }
+    };
+}
+macro_rules! test_atomic_ptr_pub {
+    () => {
+        #[allow(
+            clippy::alloc_instead_of_core,
+            clippy::std_instead_of_alloc,
+            clippy::std_instead_of_core,
+            clippy::undocumented_unsafe_blocks
+        )]
+        #[allow(unstable_name_collisions)] // for sptr crate
+        mod test_atomic_ptr {
+            use super::*;
+            __test_atomic_ptr_load_store!(AtomicPtr<u8>);
+            __test_atomic_ptr!(AtomicPtr<u8>);
+            __test_atomic_ptr_pub!(AtomicPtr<u8>);
+        }
+    };
+}
+
+// Asserts that `$a` and `$b` have performed equivalent operations.
+#[cfg(feature = "float")]
+macro_rules! assert_float_op_eq {
+    ($a:expr, $b:expr $(,)?) => {{
+        // See also:
+        // - https://github.com/rust-lang/unsafe-code-guidelines/issues/237.
+        // - https://github.com/rust-lang/portable-simd/issues/39.
+        let a = $a;
+        let b = $b;
+        if a.is_nan() && b.is_nan() // don't check sign of NaN: https://github.com/rust-lang/rust/issues/55131
+            || a.is_infinite()
+                && b.is_infinite()
+                && a.is_sign_positive() == b.is_sign_positive()
+                && a.is_sign_negative() == b.is_sign_negative()
+        {
+            // ok
+        } else {
+            assert_eq!(a, b);
+        }
+    }};
+}
+
+#[allow(clippy::disallowed_methods)] // set_var/remove_var is fine as we run tests with RUST_TEST_THREADS=1
+#[cfg_attr(not(portable_atomic_no_track_caller), track_caller)]
+pub(crate) fn assert_panic<T: std::fmt::Debug>(f: impl FnOnce() -> T) -> std::string::String {
+    let backtrace = std::env::var_os("RUST_BACKTRACE");
+    let hook = std::panic::take_hook();
+    std::env::set_var("RUST_BACKTRACE", "0"); // Suppress backtrace
+    std::panic::set_hook(std::boxed::Box::new(|_| {})); // Suppress panic msg
+    let res = std::panic::catch_unwind(std::panic::AssertUnwindSafe(f));
+    std::panic::set_hook(hook);
+    match backtrace {
+        Some(v) => std::env::set_var("RUST_BACKTRACE", v),
+        None => std::env::remove_var("RUST_BACKTRACE"),
+    }
+    let msg = res.unwrap_err();
+    msg.downcast_ref::<std::string::String>()
+        .cloned()
+        .unwrap_or_else(|| msg.downcast_ref::<&'static str>().copied().unwrap().into())
+}
+pub(crate) fn rand_load_ordering() -> Ordering {
+    test_helper::LOAD_ORDERINGS[fastrand::usize(0..test_helper::LOAD_ORDERINGS.len())]
+}
+pub(crate) fn test_load_ordering<T: std::fmt::Debug>(f: impl Fn(Ordering) -> T) {
+    for &order in &test_helper::LOAD_ORDERINGS {
+        f(order);
+    }
+
+    if !skip_should_panic_test() {
+        assert_eq!(
+            assert_panic(|| f(Ordering::Release)),
+            "there is no such thing as a release load"
+        );
+        assert_eq!(
+            assert_panic(|| f(Ordering::AcqRel)),
+            "there is no such thing as an acquire-release load"
+        );
+    }
+}
+pub(crate) fn rand_store_ordering() -> Ordering {
+    test_helper::STORE_ORDERINGS[fastrand::usize(0..test_helper::STORE_ORDERINGS.len())]
+}
+pub(crate) fn test_store_ordering<T: std::fmt::Debug>(f: impl Fn(Ordering) -> T) {
+    for &order in &test_helper::STORE_ORDERINGS {
+        f(order);
+    }
+
+    if !skip_should_panic_test() {
+        assert_eq!(
+            assert_panic(|| f(Ordering::Acquire)),
+            "there is no such thing as an acquire store"
+        );
+        assert_eq!(
+            assert_panic(|| f(Ordering::AcqRel)),
+            "there is no such thing as an acquire-release store"
+        );
+    }
+}
+pub(crate) fn rand_compare_exchange_ordering() -> (Ordering, Ordering) {
+    test_helper::COMPARE_EXCHANGE_ORDERINGS
+        [fastrand::usize(0..test_helper::COMPARE_EXCHANGE_ORDERINGS.len())]
+}
+pub(crate) fn test_compare_exchange_ordering<T: std::fmt::Debug>(
+    f: impl Fn(Ordering, Ordering) -> T,
+) {
+    for &(success, failure) in &test_helper::COMPARE_EXCHANGE_ORDERINGS {
+        f(success, failure);
+    }
+
+    if !skip_should_panic_test() {
+        for &order in &test_helper::SWAP_ORDERINGS {
+            let msg = assert_panic(|| f(order, Ordering::AcqRel));
+            assert!(
+                msg == "there is no such thing as an acquire-release failure ordering"
+                    || msg == "there is no such thing as an acquire-release load",
+                "{}",
+                msg
+            );
+            let msg = assert_panic(|| f(order, Ordering::Release));
+            assert!(
+                msg == "there is no such thing as a release failure ordering"
+                    || msg == "there is no such thing as a release load",
+                "{}",
+                msg
+            );
+        }
+    }
+}
+pub(crate) fn rand_swap_ordering() -> Ordering {
+    test_helper::SWAP_ORDERINGS[fastrand::usize(0..test_helper::SWAP_ORDERINGS.len())]
+}
+pub(crate) fn test_swap_ordering<T: std::fmt::Debug>(f: impl Fn(Ordering) -> T) {
+    for &order in &test_helper::SWAP_ORDERINGS {
+        f(order);
+    }
+}
+// for stress test generated by __test_atomic_* macros
+pub(crate) fn stress_test_config() -> (usize, usize) {
+    let iterations = if cfg!(miri) {
+        50
+    } else if cfg!(debug_assertions) {
+        5_000
+    } else {
+        25_000
+    };
+    let threads = if cfg!(debug_assertions) { 2 } else { fastrand::usize(2..=8) };
+    std::eprintln!("threads={}", threads);
+    (iterations, threads)
+}
+fn skip_should_panic_test() -> bool {
+    // Miri's panic handling is slow
+    // MSAN false positive: https://gist.github.com/taiki-e/dd6269a8ffec46284fdc764a4849f884
+    is_panic_abort()
+        || cfg!(miri)
+        || option_env!("CARGO_PROFILE_RELEASE_LTO").map_or(false, |v| v == "fat")
+            && build_context::SANITIZE.contains("memory")
+}
+
+// For -C panic=abort -Z panic_abort_tests: https://github.com/rust-lang/rust/issues/67650
+fn is_panic_abort() -> bool {
+    build_context::PANIC.contains("abort")
+}
+
+#[repr(C, align(16))]
+pub(crate) struct Align16<T>(pub(crate) T);
+
+// Test the cases that should not fail if the memory ordering is implemented correctly.
+// This is still not exhaustive and only tests a few cases.
+// This currently only supports 32-bit or more integers.
+macro_rules! __stress_test_acquire_release {
+    (should_pass, $int_type:ident, $write:ident, $load_order:ident, $store_order:ident) => {
+        paste::paste! {
+            #[test]
+            fn [<load_ $load_order:lower _ $write _ $store_order:lower>]() {
+                __stress_test_acquire_release!([<Atomic $int_type:camel>],
+                    $int_type, $write, $load_order, $store_order);
+            }
+        }
+    };
+    (can_panic, $int_type:ident, $write:ident, $load_order:ident, $store_order:ident) => {
+        paste::paste! {
+            // Currently, to make this test work well enough outside of Miri, tens of thousands
+            // of iterations are needed, but this test is slow in some environments.
+            // So, ignore on non-Miri environments by default. See also catch_unwind_on_weak_memory_arch.
+            #[test]
+            #[cfg_attr(not(miri), ignore)]
+            fn [<load_ $load_order:lower _ $write _ $store_order:lower>]() {
+                can_panic("a=", || __stress_test_acquire_release!([<Atomic $int_type:camel>],
+                    $int_type, $write, $load_order, $store_order));
+            }
+        }
+    };
+    ($atomic_type:ident, $int_type:ident, $write:ident, $load_order:ident, $store_order:ident) => {{
+        use super::*;
+        use crossbeam_utils::thread;
+        use std::{
+            convert::TryFrom,
+            sync::atomic::{AtomicUsize, Ordering},
+        };
+        let mut n: usize = if cfg!(miri) { 10 } else { 50_000 };
+        // This test is relatively fast because it spawns only one thread, but
+        // the iterations are limited to a maximum value of integers.
+        if $int_type::try_from(n).is_err() {
+            n = $int_type::MAX as usize;
+        }
+        let a = &$atomic_type::new(0);
+        let b = &AtomicUsize::new(0);
+        thread::scope(|s| {
+            s.spawn(|_| {
+                for i in 0..n {
+                    b.store(i, Ordering::Relaxed);
+                    a.$write(i as _, Ordering::$store_order);
+                }
+            });
+            loop {
+                let a = a.load(Ordering::$load_order);
+                let b = b.load(Ordering::Relaxed);
+                assert!(a as usize <= b, "a={},b={}", a, b);
+                if a as usize == n - 1 {
+                    break;
+                }
+            }
+        })
+        .unwrap();
+    }};
+}
+macro_rules! __stress_test_seqcst {
+    (should_pass, $int_type:ident, $write:ident, $load_order:ident, $store_order:ident) => {
+        paste::paste! {
+            // Currently, to make this test work well enough outside of Miri, tens of thousands
+            // of iterations are needed, but this test is very slow in some environments because
+            // it creates two threads for each iteration.
+            // So, ignore on QEMU by default.
+            #[test]
+            #[cfg_attr(qemu, ignore)]
+            fn [<load_ $load_order:lower _ $write _ $store_order:lower>]() {
+                __stress_test_seqcst!([<Atomic $int_type:camel>],
+                    $write, $load_order, $store_order);
+            }
+        }
+    };
+    (can_panic, $int_type:ident, $write:ident, $load_order:ident, $store_order:ident) => {
+        paste::paste! {
+            // Currently, to make this test work well enough outside of Miri, tens of thousands
+            // of iterations are needed, but this test is very slow in some environments because
+            // it creates two threads for each iteration.
+            // So, ignore on non-Miri environments by default. See also catch_unwind_on_non_seqcst_arch.
+            #[test]
+            #[cfg_attr(not(miri), ignore)]
+            fn [<load_ $load_order:lower _ $write _ $store_order:lower>]() {
+                can_panic("c=2", || __stress_test_seqcst!([<Atomic $int_type:camel>],
+                    $write, $load_order, $store_order));
+            }
+        }
+    };
+    ($atomic_type:ident, $write:ident, $load_order:ident, $store_order:ident) => {{
+        use super::*;
+        use crossbeam_utils::thread;
+        use std::sync::atomic::{AtomicUsize, Ordering};
+        let n: usize = if cfg!(miri) {
+            8
+        } else if cfg!(valgrind)
+            || build_context::SANITIZE.contains("address")
+            || build_context::SANITIZE.contains("memory")
+        {
+            50
+        } else if option_env!("GITHUB_ACTIONS").is_some() && cfg!(not(target_os = "linux")) {
+            // GitHub Actions' macOS and Windows runners are slow.
+            5_000
+        } else {
+            50_000
+        };
+        let a = &$atomic_type::new(0);
+        let b = &$atomic_type::new(0);
+        let c = &AtomicUsize::new(0);
+        let ready = &AtomicUsize::new(0);
+        thread::scope(|s| {
+            for n in 0..n {
+                a.store(0, Ordering::Relaxed);
+                b.store(0, Ordering::Relaxed);
+                c.store(0, Ordering::Relaxed);
+                let h_a = s.spawn(|_| {
+                    while ready.load(Ordering::Relaxed) == 0 {}
+                    a.$write(1, Ordering::$store_order);
+                    if b.load(Ordering::$load_order) == 0 {
+                        c.fetch_add(1, Ordering::Relaxed);
+                    }
+                });
+                let h_b = s.spawn(|_| {
+                    while ready.load(Ordering::Relaxed) == 0 {}
+                    b.$write(1, Ordering::$store_order);
+                    if a.load(Ordering::$load_order) == 0 {
+                        c.fetch_add(1, Ordering::Relaxed);
+                    }
+                });
+                ready.store(1, Ordering::Relaxed);
+                h_a.join().unwrap();
+                h_b.join().unwrap();
+                let c = c.load(Ordering::Relaxed);
+                assert!(c == 0 || c == 1, "c={},n={}", c, n);
+            }
+        })
+        .unwrap();
+    }};
+}
+// Catches unwinding panic on architectures with weak memory models.
+#[allow(dead_code, clippy::used_underscore_binding)]
+pub(crate) fn catch_unwind_on_weak_memory_arch(pat: &str, f: impl Fn()) {
+    // With x86 TSO, RISC-V TSO (optional, not default), SPARC TSO (optional, default),
+    // and IBM-370 memory models should never be a panic here.
+    // Miri emulates weak memory models regardless of target architectures.
+    if cfg!(all(
+        any(
+            target_arch = "x86",
+            target_arch = "x86_64",
+            target_arch = "s390x",
+            target_arch = "sparc",
+            target_arch = "sparc64",
+        ),
+        not(any(miri)),
+    )) {
+        f();
+    } else if !is_panic_abort() {
+        // This could be is_err on architectures with weak memory models.
+        // However, this does not necessarily mean that it will always be panic,
+        // and implementing it with stronger orderings is also okay.
+        match std::panic::catch_unwind(std::panic::AssertUnwindSafe(f)) {
+            Ok(()) => {
+                // panic!();
+            }
+            Err(msg) => {
+                let msg = msg
+                    .downcast_ref::<std::string::String>()
+                    .cloned()
+                    .unwrap_or_else(|| msg.downcast_ref::<&'static str>().copied().unwrap().into());
+                assert!(msg.contains(pat), "{}", msg);
+            }
+        }
+    }
+}
+// Catches unwinding panic on architectures with non-sequentially consistent memory models.
+#[allow(dead_code, clippy::used_underscore_binding)]
+pub(crate) fn catch_unwind_on_non_seqcst_arch(pat: &str, f: impl Fn()) {
+    if !is_panic_abort() {
+        // This could be Err on architectures with non-sequentially consistent memory models.
+        // However, this does not necessarily mean that it will always be panic,
+        // and implementing it with stronger orderings is also okay.
+        match std::panic::catch_unwind(std::panic::AssertUnwindSafe(f)) {
+            Ok(()) => {
+                // panic!();
+            }
+            Err(msg) => {
+                let msg = msg
+                    .downcast_ref::<std::string::String>()
+                    .cloned()
+                    .unwrap_or_else(|| msg.downcast_ref::<&'static str>().copied().unwrap().into());
+                assert!(msg.contains(pat), "{}", msg);
+            }
+        }
+    }
+}
+macro_rules! stress_test_load_store {
+    ($int_type:ident) => {
+        // debug mode is slow.
+        #[cfg(any(not(debug_assertions), miri))]
+        paste::paste! {
+            #[allow(
+                clippy::alloc_instead_of_core,
+                clippy::std_instead_of_alloc,
+                clippy::std_instead_of_core,
+                clippy::undocumented_unsafe_blocks
+            )]
+            mod [<stress_acquire_release_load_store_ $int_type>] {
+                use crate::tests::helper::catch_unwind_on_weak_memory_arch as can_panic;
+                __stress_test_acquire_release!(can_panic, $int_type, store, Relaxed, Relaxed);
+                __stress_test_acquire_release!(can_panic, $int_type, store, Relaxed, Release);
+                __stress_test_acquire_release!(can_panic, $int_type, store, Relaxed, SeqCst);
+                __stress_test_acquire_release!(can_panic, $int_type, store, Acquire, Relaxed);
+                __stress_test_acquire_release!(should_pass, $int_type, store, Acquire, Release);
+                __stress_test_acquire_release!(should_pass, $int_type, store, Acquire, SeqCst);
+                __stress_test_acquire_release!(can_panic, $int_type, store, SeqCst, Relaxed);
+                __stress_test_acquire_release!(should_pass, $int_type, store, SeqCst, Release);
+                __stress_test_acquire_release!(should_pass, $int_type, store, SeqCst, SeqCst);
+            }
+            #[allow(
+                clippy::alloc_instead_of_core,
+                clippy::std_instead_of_alloc,
+                clippy::std_instead_of_core,
+                clippy::undocumented_unsafe_blocks
+            )]
+            mod [<stress_seqcst_load_store_ $int_type>] {
+                use crate::tests::helper::catch_unwind_on_non_seqcst_arch as can_panic;
+                __stress_test_seqcst!(can_panic, $int_type, store, Relaxed, Relaxed);
+                __stress_test_seqcst!(can_panic, $int_type, store, Relaxed, Release);
+                __stress_test_seqcst!(can_panic, $int_type, store, Relaxed, SeqCst);
+                __stress_test_seqcst!(can_panic, $int_type, store, Acquire, Relaxed);
+                __stress_test_seqcst!(can_panic, $int_type, store, Acquire, Release);
+                __stress_test_seqcst!(can_panic, $int_type, store, Acquire, SeqCst);
+                __stress_test_seqcst!(can_panic, $int_type, store, SeqCst, Relaxed);
+                __stress_test_seqcst!(can_panic, $int_type, store, SeqCst, Release);
+                __stress_test_seqcst!(should_pass, $int_type, store, SeqCst, SeqCst);
+            }
+        }
+    };
+}
+macro_rules! stress_test {
+    ($int_type:ident) => {
+        stress_test_load_store!($int_type);
+        // debug mode is slow.
+        #[cfg(any(not(debug_assertions), miri))]
+        paste::paste! {
+            #[allow(
+                clippy::alloc_instead_of_core,
+                clippy::std_instead_of_alloc,
+                clippy::std_instead_of_core,
+                clippy::undocumented_unsafe_blocks
+            )]
+            mod [<stress_acquire_release_load_swap_ $int_type>] {
+                use crate::tests::helper::catch_unwind_on_weak_memory_arch as can_panic;
+                __stress_test_acquire_release!(can_panic, $int_type, swap, Relaxed, Relaxed);
+                __stress_test_acquire_release!(can_panic, $int_type, swap, Relaxed, Acquire);
+                __stress_test_acquire_release!(can_panic, $int_type, swap, Relaxed, Release);
+                __stress_test_acquire_release!(can_panic, $int_type, swap, Relaxed, AcqRel);
+                __stress_test_acquire_release!(can_panic, $int_type, swap, Relaxed, SeqCst);
+                __stress_test_acquire_release!(can_panic, $int_type, swap, Acquire, Relaxed);
+                __stress_test_acquire_release!(can_panic, $int_type, swap, Acquire, Acquire);
+                __stress_test_acquire_release!(should_pass, $int_type, swap, Acquire, Release);
+                __stress_test_acquire_release!(should_pass, $int_type, swap, Acquire, AcqRel);
+                __stress_test_acquire_release!(should_pass, $int_type, swap, Acquire, SeqCst);
+                __stress_test_acquire_release!(can_panic, $int_type, swap, SeqCst, Relaxed);
+                __stress_test_acquire_release!(can_panic, $int_type, swap, SeqCst, Acquire);
+                __stress_test_acquire_release!(should_pass, $int_type, swap, SeqCst, Release);
+                __stress_test_acquire_release!(should_pass, $int_type, swap, SeqCst, AcqRel);
+                __stress_test_acquire_release!(should_pass, $int_type, swap, SeqCst, SeqCst);
+            }
+            #[allow(
+                clippy::alloc_instead_of_core,
+                clippy::std_instead_of_alloc,
+                clippy::std_instead_of_core,
+                clippy::undocumented_unsafe_blocks
+            )]
+            mod [<stress_seqcst_load_swap_ $int_type>] {
+                use crate::tests::helper::catch_unwind_on_non_seqcst_arch as can_panic;
+                __stress_test_seqcst!(can_panic, $int_type, swap, Relaxed, Relaxed);
+                __stress_test_seqcst!(can_panic, $int_type, swap, Relaxed, Acquire);
+                __stress_test_seqcst!(can_panic, $int_type, swap, Relaxed, Release);
+                __stress_test_seqcst!(can_panic, $int_type, swap, Relaxed, AcqRel);
+                __stress_test_seqcst!(can_panic, $int_type, swap, Relaxed, SeqCst);
+                __stress_test_seqcst!(can_panic, $int_type, swap, Acquire, Relaxed);
+                __stress_test_seqcst!(can_panic, $int_type, swap, Acquire, Acquire);
+                __stress_test_seqcst!(can_panic, $int_type, swap, Acquire, Release);
+                __stress_test_seqcst!(can_panic, $int_type, swap, Acquire, AcqRel);
+                __stress_test_seqcst!(can_panic, $int_type, swap, Acquire, SeqCst);
+                __stress_test_seqcst!(can_panic, $int_type, swap, SeqCst, Relaxed);
+                __stress_test_seqcst!(can_panic, $int_type, swap, SeqCst, Acquire);
+                __stress_test_seqcst!(can_panic, $int_type, swap, SeqCst, Release);
+                __stress_test_seqcst!(can_panic, $int_type, swap, SeqCst, AcqRel);
+                __stress_test_seqcst!(should_pass, $int_type, swap, SeqCst, SeqCst);
+            }
+        }
+    };
+}
diff --git a/vendor/portable-atomic/src/tests/mod.rs b/vendor/portable-atomic/src/tests/mod.rs
new file mode 100644
index 0000000..63cdbbd
--- /dev/null
+++ b/vendor/portable-atomic/src/tests/mod.rs
@@ -0,0 +1,357 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+#![allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+
+#[macro_use]
+pub(crate) mod helper;
+
+#[allow(dead_code)]
+#[path = "../../version.rs"]
+mod version;
+
+use super::*;
+
+test_atomic_bool_pub!();
+test_atomic_ptr_pub!();
+
+test_atomic_int_pub!(isize);
+test_atomic_int_pub!(usize);
+test_atomic_int_pub!(i8);
+test_atomic_int_pub!(u8);
+test_atomic_int_pub!(i16);
+test_atomic_int_pub!(u16);
+test_atomic_int_pub!(i32);
+test_atomic_int_pub!(u32);
+test_atomic_int_pub!(i64);
+test_atomic_int_pub!(u64);
+test_atomic_int_pub!(i128);
+test_atomic_int_pub!(u128);
+
+#[cfg(feature = "float")]
+test_atomic_float_pub!(f32);
+#[cfg(feature = "float")]
+test_atomic_float_pub!(f64);
+
+#[deny(improper_ctypes)]
+extern "C" {
+    fn _atomic_bool_ffi_safety(_: AtomicBool);
+    fn _atomic_ptr_ffi_safety(_: AtomicPtr<u8>);
+    fn _atomic_isize_ffi_safety(_: AtomicIsize);
+    fn _atomic_usize_ffi_safety(_: AtomicUsize);
+    fn _atomic_i8_ffi_safety(_: AtomicI8);
+    fn _atomic_u8_ffi_safety(_: AtomicU8);
+    fn _atomic_i16_ffi_safety(_: AtomicI16);
+    fn _atomic_u16_ffi_safety(_: AtomicU16);
+    fn _atomic_i32_ffi_safety(_: AtomicI32);
+    fn _atomic_u32_ffi_safety(_: AtomicU32);
+    fn _atomic_i64_ffi_safety(_: AtomicI64);
+    fn _atomic_u64_ffi_safety(_: AtomicU64);
+    // TODO: 128-bit integers are not FFI safe
+    // https://github.com/rust-lang/unsafe-code-guidelines/issues/119
+    // https://github.com/rust-lang/rust/issues/54341
+    // fn _atomic_i128_ffi_safety(_: AtomicI128);
+    // fn _atomic_u128_ffi_safety(_: AtomicU128);
+    #[cfg(feature = "float")]
+    fn _atomic_f32_ffi_safety(_: AtomicF32);
+    #[cfg(feature = "float")]
+    fn _atomic_f64_ffi_safety(_: AtomicF64);
+}
+
+#[test]
+fn test_is_lock_free() {
+    assert!(AtomicI8::is_always_lock_free());
+    assert!(AtomicI8::is_lock_free());
+    assert!(AtomicU8::is_always_lock_free());
+    assert!(AtomicU8::is_lock_free());
+    assert!(AtomicI16::is_always_lock_free());
+    assert!(AtomicI16::is_lock_free());
+    assert!(AtomicU16::is_always_lock_free());
+    assert!(AtomicU16::is_lock_free());
+    assert!(AtomicI32::is_always_lock_free());
+    assert!(AtomicI32::is_lock_free());
+    assert!(AtomicU32::is_always_lock_free());
+    assert!(AtomicU32::is_lock_free());
+    #[cfg(not(portable_atomic_no_cfg_target_has_atomic))]
+    {
+        if cfg!(all(
+            feature = "fallback",
+            target_arch = "arm",
+            not(any(miri, portable_atomic_sanitize_thread)),
+            not(portable_atomic_no_asm),
+            any(target_os = "linux", target_os = "android"),
+            not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
+            not(portable_atomic_no_outline_atomics),
+            not(target_has_atomic = "64"),
+            not(portable_atomic_test_outline_atomics_detect_false),
+        )) {
+            assert!(!AtomicI64::is_always_lock_free());
+            assert!(AtomicI64::is_lock_free());
+            assert!(!AtomicU64::is_always_lock_free());
+            assert!(AtomicU64::is_lock_free());
+        } else if cfg!(target_has_atomic = "64") {
+            assert!(AtomicI64::is_always_lock_free());
+            assert!(AtomicI64::is_lock_free());
+            assert!(AtomicU64::is_always_lock_free());
+            assert!(AtomicU64::is_lock_free());
+        } else {
+            assert!(!AtomicI64::is_always_lock_free());
+            assert!(!AtomicI64::is_lock_free());
+            assert!(!AtomicU64::is_always_lock_free());
+            assert!(!AtomicU64::is_lock_free());
+        }
+    }
+    if cfg!(portable_atomic_no_asm) && cfg!(not(portable_atomic_unstable_asm)) {
+        assert!(!AtomicI128::is_always_lock_free());
+        assert!(!AtomicI128::is_lock_free());
+        assert!(!AtomicU128::is_always_lock_free());
+        assert!(!AtomicU128::is_lock_free());
+    } else if cfg!(any(
+        target_arch = "aarch64",
+        all(
+            target_arch = "x86_64",
+            any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
+        ),
+        all(
+            target_arch = "powerpc64",
+            portable_atomic_unstable_asm_experimental_arch,
+            any(
+                target_feature = "quadword-atomics",
+                portable_atomic_target_feature = "quadword-atomics",
+            ),
+        ),
+        all(target_arch = "s390x", portable_atomic_unstable_asm_experimental_arch),
+    )) {
+        assert!(AtomicI128::is_always_lock_free());
+        assert!(AtomicI128::is_lock_free());
+        assert!(AtomicU128::is_always_lock_free());
+        assert!(AtomicU128::is_lock_free());
+    } else {
+        assert!(!AtomicI128::is_always_lock_free());
+        assert!(!AtomicU128::is_always_lock_free());
+        #[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64")))]
+        {
+            assert!(!AtomicI128::is_lock_free());
+            assert!(!AtomicU128::is_lock_free());
+        }
+        #[cfg(target_arch = "x86_64")]
+        {
+            let has_cmpxchg16b = cfg!(all(
+                feature = "fallback",
+                not(portable_atomic_no_cmpxchg16b_target_feature),
+                not(portable_atomic_no_outline_atomics),
+                not(any(target_env = "sgx", miri)),
+                not(portable_atomic_test_outline_atomics_detect_false),
+            )) && std::is_x86_feature_detected!("cmpxchg16b");
+            assert_eq!(AtomicI128::is_lock_free(), has_cmpxchg16b);
+            assert_eq!(AtomicU128::is_lock_free(), has_cmpxchg16b);
+        }
+        #[cfg(target_arch = "powerpc64")]
+        {
+            // TODO(powerpc64): is_powerpc_feature_detected is unstable
+        }
+    }
+}
+
+// test version parsing code used in the build script.
+#[test]
+fn test_rustc_version() {
+    use version::Version;
+
+    // rustc 1.34 (rustup)
+    let v = Version::parse(
+        "rustc 1.34.2 (6c2484dc3 2019-05-13)
+binary: rustc
+commit-hash: 6c2484dc3c532c052f159264e970278d8b77cdc9
+commit-date: 2019-05-13
+host: x86_64-apple-darwin
+release: 1.34.2
+LLVM version: 8.0",
+    )
+    .unwrap();
+    assert_eq!(v, Version::stable(34, 8));
+
+    // rustc 1.67 (rustup)
+    let v = Version::parse(
+        "rustc 1.67.0 (fc594f156 2023-01-24)
+binary: rustc
+commit-hash: fc594f15669680fa70d255faec3ca3fb507c3405
+commit-date: 2023-01-24
+host: aarch64-apple-darwin
+release: 1.67.0
+LLVM version: 15.0.6",
+    )
+    .unwrap();
+    assert_eq!(v, Version::stable(67, 15));
+
+    // rustc 1.68-beta (rustup)
+    let v = Version::parse(
+        "rustc 1.68.0-beta.2 (10b73bf73 2023-02-01)
+binary: rustc
+commit-hash: 10b73bf73a6b770cd92ad8ff538173bc3298411c
+commit-date: 2023-02-01
+host: aarch64-apple-darwin
+release: 1.68.0-beta.2
+LLVM version: 15.0.6",
+    )
+    .unwrap();
+    // We do not distinguish between stable and beta because we are only
+    // interested in whether unstable features are potentially available.
+    assert_eq!(v, Version::stable(68, 15));
+
+    // rustc nightly-2019-01-27 (rustup)
+    let v = Version::parse(
+        "rustc 1.33.0-nightly (20c2cba61 2019-01-26)
+binary: rustc
+commit-hash: 20c2cba61dc83e612d25ed496025171caa3db30f
+commit-date: 2019-01-26
+host: x86_64-apple-darwin
+release: 1.33.0-nightly
+LLVM version: 8.0",
+    )
+    .unwrap();
+    assert_eq!(v.minor, 33);
+    assert!(v.nightly);
+    assert_eq!(v.llvm, 8);
+    assert_eq!(v.commit_date().year, 2019);
+    assert_eq!(v.commit_date().month, 1);
+    assert_eq!(v.commit_date().day, 26);
+
+    // rustc 1.69-nightly (rustup)
+    let v = Version::parse(
+        "rustc 1.69.0-nightly (bd39bbb4b 2023-02-07)
+binary: rustc
+commit-hash: bd39bbb4bb92df439bf6d85470e296cc6a47ffbd
+commit-date: 2023-02-07
+host: aarch64-apple-darwin
+release: 1.69.0-nightly
+LLVM version: 15.0.7",
+    )
+    .unwrap();
+    assert_eq!(v.minor, 69);
+    assert!(v.nightly);
+    assert_eq!(v.llvm, 15);
+    assert_eq!(v.commit_date().year, 2023);
+    assert_eq!(v.commit_date().month, 2);
+    assert_eq!(v.commit_date().day, 7);
+
+    // clippy-driver 1.69-nightly (rustup)
+    let v = Version::parse(
+        "rustc 1.69.0-nightly (bd39bbb4b 2023-02-07)
+binary: rustc
+commit-hash: bd39bbb4bb92df439bf6d85470e296cc6a47ffbd
+commit-date: 2023-02-07
+host: aarch64-apple-darwin
+release: 1.69.0-nightly
+LLVM version: 15.0.7",
+    )
+    .unwrap();
+    assert_eq!(v.minor, 69);
+    assert!(v.nightly);
+    assert_eq!(v.llvm, 15);
+    assert_eq!(v.commit_date().year, 2023);
+    assert_eq!(v.commit_date().month, 2);
+    assert_eq!(v.commit_date().day, 7);
+
+    // rustc 1.69-dev (from source: ./x.py build)
+    let v = Version::parse(
+        "rustc 1.69.0-dev
+binary: rustc
+commit-hash: unknown
+commit-date: unknown
+host: aarch64-unknown-linux-gnu
+release: 1.69.0-dev
+LLVM version: 16.0.0",
+    )
+    .unwrap();
+    assert_eq!(v.minor, 69);
+    assert!(v.nightly);
+    assert_eq!(v.llvm, 16);
+    assert_eq!(v.commit_date().year, 0);
+    assert_eq!(v.commit_date().month, 0);
+    assert_eq!(v.commit_date().day, 0);
+
+    // rustc 1.64 (debian 11: apt-get install cargo)
+    let v = Version::parse(
+        "rustc 1.48.0
+binary: rustc
+commit-hash: unknown
+commit-date: unknown
+host: aarch64-unknown-linux-gnu
+release: 1.48.0
+LLVM version: 11.0",
+    )
+    .unwrap();
+    assert_eq!(v, Version::stable(48, 11));
+
+    // rustc 1.67 (fedora: dnf install cargo)
+    let v = Version::parse(
+        "rustc 1.67.0 (fc594f156 2023-01-24) (Fedora 1.67.0-2.fc37)
+binary: rustc
+commit-hash: fc594f15669680fa70d255faec3ca3fb507c3405
+commit-date: 2023-01-24
+host: aarch64-unknown-linux-gnu
+release: 1.67.0
+LLVM version: 15.0.7",
+    )
+    .unwrap();
+    assert_eq!(v, Version::stable(67, 15));
+
+    // rustc 1.64 (alpine: apk add cargo)
+    let v = Version::parse(
+        "rustc 1.64.0
+binary: rustc
+commit-hash: unknown
+commit-date: unknown
+host: aarch64-alpine-linux-musl
+release: 1.64.0
+LLVM version: 15.0.3",
+    )
+    .unwrap();
+    assert_eq!(v, Version::stable(64, 15));
+}
+
+#[cfg(feature = "serde")]
+#[test]
+fn test_serde() {
+    use test_helper::serde::{assert_tokens, DebugPartialEq, Token};
+
+    macro_rules! t {
+        ($atomic_type:ty, $value_type:ident, $token_type:ident) => {
+            std::eprint!("test_serde {} ... ", stringify!($value_type));
+            assert_tokens(&DebugPartialEq(<$atomic_type>::new($value_type::MAX)), &[
+                Token::$token_type($value_type::MAX as _),
+            ]);
+            assert_tokens(&DebugPartialEq(<$atomic_type>::new($value_type::MIN)), &[
+                Token::$token_type($value_type::MIN as _),
+            ]);
+            std::eprintln!("ok");
+        };
+    }
+
+    assert_tokens(&DebugPartialEq(AtomicBool::new(true)), &[Token::Bool(true)]);
+    assert_tokens(&DebugPartialEq(AtomicBool::new(false)), &[Token::Bool(false)]);
+    t!(AtomicIsize, isize, I64);
+    t!(AtomicUsize, usize, U64);
+    t!(AtomicI8, i8, I8);
+    t!(AtomicU8, u8, U8);
+    t!(AtomicI16, i16, I16);
+    t!(AtomicU16, u16, U16);
+    t!(AtomicI32, i32, I32);
+    t!(AtomicU32, u32, U32);
+    t!(AtomicI64, i64, I64);
+    t!(AtomicU64, u64, U64);
+    // TODO: serde_test doesn't support Token::{I128,U128}: https://github.com/serde-rs/test/pull/6
+    // t!(AtomicI128, i128, I128);
+    // t!(AtomicU128, u128, U128);
+    #[cfg(feature = "float")]
+    t!(AtomicF32, f32, F32);
+    #[cfg(feature = "float")]
+    #[cfg(not(target_arch = "mips"))] // LLVM 17 (nightly-2023-08-09) bug: assertion failed at core/src/num/diy_float.rs:78:9
+    t!(AtomicF64, f64, F64);
+}
diff --git a/vendor/portable-atomic/src/utils.rs b/vendor/portable-atomic/src/utils.rs
new file mode 100644
index 0000000..f73e794
--- /dev/null
+++ b/vendor/portable-atomic/src/utils.rs
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+#![cfg_attr(not(all(test, feature = "float")), allow(dead_code, unused_macros))]
+
+#[macro_use]
+#[path = "gen/utils.rs"]
+mod gen;
+
+use core::sync::atomic::Ordering;
+
+macro_rules! static_assert {
+    ($cond:expr $(,)?) => {{
+        let [] = [(); true as usize - $crate::utils::_assert_is_bool($cond) as usize];
+    }};
+}
+pub(crate) const fn _assert_is_bool(v: bool) -> bool {
+    v
+}
+
+macro_rules! static_assert_layout {
+    ($atomic_type:ty, $value_type:ty) => {
+        static_assert!(
+            core::mem::align_of::<$atomic_type>() == core::mem::size_of::<$atomic_type>()
+        );
+        static_assert!(core::mem::size_of::<$atomic_type>() == core::mem::size_of::<$value_type>());
+    };
+}
+
+// #[doc = concat!(...)] requires Rust 1.54
+macro_rules! doc_comment {
+    ($doc:expr, $($tt:tt)*) => {
+        #[doc = $doc]
+        $($tt)*
+    };
+}
+
+// Adapted from https://github.com/BurntSushi/memchr/blob/2.4.1/src/memchr/x86/mod.rs#L9-L71.
+/// # Safety
+///
+/// - the caller must uphold the safety contract for the function returned by $detect_body.
+/// - the memory pointed by the function pointer returned by $detect_body must be visible from any threads.
+///
+/// The second requirement is always met if the function pointer is to the function definition.
+/// (Currently, all uses of this macro in our code are in this case.)
+#[allow(unused_macros)]
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm",
+    target_arch = "powerpc64",
+    all(target_arch = "x86_64", not(any(target_env = "sgx", miri))),
+))]
+macro_rules! ifunc {
+    (unsafe fn($($arg_pat:ident: $arg_ty:ty),*) $(-> $ret_ty:ty)? { $($detect_body:tt)* }) => {{
+        type FnTy = unsafe fn($($arg_ty),*) $(-> $ret_ty)?;
+        static FUNC: core::sync::atomic::AtomicPtr<()>
+            = core::sync::atomic::AtomicPtr::new(detect as *mut ());
+        #[cold]
+        unsafe fn detect($($arg_pat: $arg_ty),*) $(-> $ret_ty)? {
+            let func: FnTy = { $($detect_body)* };
+            FUNC.store(func as *mut (), core::sync::atomic::Ordering::Relaxed);
+            // SAFETY: the caller must uphold the safety contract for the function returned by $detect_body.
+            unsafe { func($($arg_pat),*) }
+        }
+        // SAFETY: `FnTy` is a function pointer, which is always safe to transmute with a `*mut ()`.
+        // (To force the caller to use unsafe block for this macro, do not use
+        // unsafe block here.)
+        let func = {
+            core::mem::transmute::<*mut (), FnTy>(FUNC.load(core::sync::atomic::Ordering::Relaxed))
+        };
+        // SAFETY: the caller must uphold the safety contract for the function returned by $detect_body.
+        // (To force the caller to use unsafe block for this macro, do not use
+        // unsafe block here.)
+        func($($arg_pat),*)
+    }};
+}
+
+#[allow(unused_macros)]
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "arm",
+    target_arch = "powerpc64",
+    all(target_arch = "x86_64", not(any(target_env = "sgx", miri))),
+))]
+macro_rules! fn_alias {
+    (
+        $(#[$($fn_attr:tt)*])*
+        $vis:vis unsafe fn($($arg_pat:ident: $arg_ty:ty),*) $(-> $ret_ty:ty)?;
+        $(#[$($alias_attr:tt)*])*
+        $new:ident = $from:ident($($last_args:tt)*);
+        $($rest:tt)*
+    ) => {
+        $(#[$($fn_attr)*])*
+        $(#[$($alias_attr)*])*
+        $vis unsafe fn $new($($arg_pat: $arg_ty),*) $(-> $ret_ty)? {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { $from($($arg_pat,)* $($last_args)*) }
+        }
+        fn_alias! {
+            $(#[$($fn_attr)*])*
+            $vis unsafe fn($($arg_pat: $arg_ty),*) $(-> $ret_ty)?;
+            $($rest)*
+        }
+    };
+    (
+        $(#[$($attr:tt)*])*
+        $vis:vis unsafe fn($($arg_pat:ident: $arg_ty:ty),*) $(-> $ret_ty:ty)?;
+    ) => {}
+}
+
+/// Make the given function const if the given condition is true.
+macro_rules! const_fn {
+    (
+        const_if: #[cfg($($cfg:tt)+)];
+        $(#[$($attr:tt)*])*
+        $vis:vis const fn $($rest:tt)*
+    ) => {
+        #[cfg($($cfg)+)]
+        $(#[$($attr)*])*
+        $vis const fn $($rest)*
+        #[cfg(not($($cfg)+))]
+        $(#[$($attr)*])*
+        $vis fn $($rest)*
+    };
+}
+
+/// Implements `core::fmt::Debug` and `serde::{Serialize, Deserialize}` (when serde
+/// feature is enabled) for atomic bool, integer, or float.
+macro_rules! impl_debug_and_serde {
+    ($atomic_type:ident) => {
+        impl fmt::Debug for $atomic_type {
+            #[allow(clippy::missing_inline_in_public_items)] // fmt is not hot path
+            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                // std atomic types use Relaxed in Debug::fmt: https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs#L2024
+                fmt::Debug::fmt(&self.load(Ordering::Relaxed), f)
+            }
+        }
+        #[cfg(feature = "serde")]
+        #[cfg_attr(portable_atomic_doc_cfg, doc(cfg(feature = "serde")))]
+        impl serde::ser::Serialize for $atomic_type {
+            #[allow(clippy::missing_inline_in_public_items)] // serde doesn't use inline on std atomic's Serialize/Deserialize impl
+            fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+            where
+                S: serde::ser::Serializer,
+            {
+                // https://github.com/serde-rs/serde/blob/v1.0.152/serde/src/ser/impls.rs#L958-L959
+                self.load(Ordering::Relaxed).serialize(serializer)
+            }
+        }
+        #[cfg(feature = "serde")]
+        #[cfg_attr(portable_atomic_doc_cfg, doc(cfg(feature = "serde")))]
+        impl<'de> serde::de::Deserialize<'de> for $atomic_type {
+            #[allow(clippy::missing_inline_in_public_items)] // serde doesn't use inline on std atomic's Serialize/Deserialize impl
+            fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+            where
+                D: serde::de::Deserializer<'de>,
+            {
+                serde::de::Deserialize::deserialize(deserializer).map(Self::new)
+            }
+        }
+    };
+}
+
+// We do not provide `nand` because it cannot be optimized on neither x86 nor MSP430.
+// https://godbolt.org/z/7TzjKqYvE
+macro_rules! impl_default_no_fetch_ops {
+    ($atomic_type:ident, bool) => {
+        impl $atomic_type {
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn and(&self, val: bool, order: Ordering) {
+                self.fetch_and(val, order);
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn or(&self, val: bool, order: Ordering) {
+                self.fetch_or(val, order);
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn xor(&self, val: bool, order: Ordering) {
+                self.fetch_xor(val, order);
+            }
+        }
+    };
+    ($atomic_type:ident, $int_type:ident) => {
+        impl $atomic_type {
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn add(&self, val: $int_type, order: Ordering) {
+                self.fetch_add(val, order);
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn sub(&self, val: $int_type, order: Ordering) {
+                self.fetch_sub(val, order);
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn and(&self, val: $int_type, order: Ordering) {
+                self.fetch_and(val, order);
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn or(&self, val: $int_type, order: Ordering) {
+                self.fetch_or(val, order);
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn xor(&self, val: $int_type, order: Ordering) {
+                self.fetch_xor(val, order);
+            }
+        }
+    };
+}
+macro_rules! impl_default_bit_opts {
+    ($atomic_type:ident, $int_type:ident) => {
+        impl $atomic_type {
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn bit_set(&self, bit: u32, order: Ordering) -> bool {
+                let mask = <$int_type>::wrapping_shl(1, bit);
+                self.fetch_or(mask, order) & mask != 0
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn bit_clear(&self, bit: u32, order: Ordering) -> bool {
+                let mask = <$int_type>::wrapping_shl(1, bit);
+                self.fetch_and(!mask, order) & mask != 0
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn bit_toggle(&self, bit: u32, order: Ordering) -> bool {
+                let mask = <$int_type>::wrapping_shl(1, bit);
+                self.fetch_xor(mask, order) & mask != 0
+            }
+        }
+    };
+}
+
+// This just outputs the input as is, but can be used like an item-level block by using it with cfg.
+macro_rules! items {
+    ($($tt:tt)*) => {
+        $($tt)*
+    };
+}
+
+// https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs#L3155
+#[inline]
+#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+pub(crate) fn assert_load_ordering(order: Ordering) {
+    match order {
+        Ordering::Acquire | Ordering::Relaxed | Ordering::SeqCst => {}
+        Ordering::Release => panic!("there is no such thing as a release load"),
+        Ordering::AcqRel => panic!("there is no such thing as an acquire-release load"),
+        _ => unreachable!("{:?}", order),
+    }
+}
+
+// https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs#L3140
+#[inline]
+#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+pub(crate) fn assert_store_ordering(order: Ordering) {
+    match order {
+        Ordering::Release | Ordering::Relaxed | Ordering::SeqCst => {}
+        Ordering::Acquire => panic!("there is no such thing as an acquire store"),
+        Ordering::AcqRel => panic!("there is no such thing as an acquire-release store"),
+        _ => unreachable!("{:?}", order),
+    }
+}
+
+// https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs#L3221
+#[inline]
+#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
+pub(crate) fn assert_compare_exchange_ordering(success: Ordering, failure: Ordering) {
+    match success {
+        Ordering::AcqRel
+        | Ordering::Acquire
+        | Ordering::Relaxed
+        | Ordering::Release
+        | Ordering::SeqCst => {}
+        _ => unreachable!("{:?}, {:?}", success, failure),
+    }
+    match failure {
+        Ordering::Acquire | Ordering::Relaxed | Ordering::SeqCst => {}
+        Ordering::Release => panic!("there is no such thing as a release failure ordering"),
+        Ordering::AcqRel => panic!("there is no such thing as an acquire-release failure ordering"),
+        _ => unreachable!("{:?}, {:?}", success, failure),
+    }
+}
+
+// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0418r2.html
+// https://github.com/rust-lang/rust/pull/98383
+#[allow(dead_code)]
+#[inline]
+pub(crate) fn upgrade_success_ordering(success: Ordering, failure: Ordering) -> Ordering {
+    match (success, failure) {
+        (Ordering::Relaxed, Ordering::Acquire) => Ordering::Acquire,
+        (Ordering::Release, Ordering::Acquire) => Ordering::AcqRel,
+        (_, Ordering::SeqCst) => Ordering::SeqCst,
+        _ => success,
+    }
+}
+
+/// Zero-extends the given 32-bit pointer to `MaybeUninit<u64>`.
+/// This is used for 64-bit architecture's 32-bit ABI (e.g., AArch64 ILP32 ABI).
+/// See ptr_reg! macro in src/gen/utils.rs for details.
+#[cfg(not(portable_atomic_no_asm_maybe_uninit))]
+#[cfg(target_pointer_width = "32")]
+#[allow(dead_code)]
+#[inline]
+pub(crate) fn zero_extend64_ptr(v: *mut ()) -> core::mem::MaybeUninit<u64> {
+    #[repr(C)]
+    struct ZeroExtended {
+        #[cfg(target_endian = "big")]
+        pad: *mut (),
+        v: *mut (),
+        #[cfg(target_endian = "little")]
+        pad: *mut (),
+    }
+    // SAFETY: we can safely transmute any 64-bit value to MaybeUninit<u64>.
+    unsafe { core::mem::transmute(ZeroExtended { v, pad: core::ptr::null_mut() }) }
+}
+
+#[allow(dead_code)]
+#[cfg(any(
+    target_arch = "aarch64",
+    target_arch = "powerpc64",
+    target_arch = "s390x",
+    target_arch = "x86_64",
+))]
+/// A 128-bit value represented as a pair of 64-bit values.
+///
+/// This type is `#[repr(C)]`, both fields have the same in-memory representation
+/// and are plain old data types, so access to the fields is always safe.
+#[derive(Clone, Copy)]
+#[repr(C)]
+pub(crate) union U128 {
+    pub(crate) whole: u128,
+    pub(crate) pair: Pair<u64>,
+}
+#[allow(dead_code)]
+#[cfg(target_arch = "arm")]
+/// A 64-bit value represented as a pair of 32-bit values.
+///
+/// This type is `#[repr(C)]`, both fields have the same in-memory representation
+/// and are plain old data types, so access to the fields is always safe.
+#[derive(Clone, Copy)]
+#[repr(C)]
+pub(crate) union U64 {
+    pub(crate) whole: u64,
+    pub(crate) pair: Pair<u32>,
+}
+#[allow(dead_code)]
+#[derive(Clone, Copy)]
+#[repr(C)]
+pub(crate) struct Pair<T: Copy> {
+    // little endian order
+    #[cfg(any(target_endian = "little", target_arch = "aarch64", target_arch = "arm"))]
+    pub(crate) lo: T,
+    pub(crate) hi: T,
+    // big endian order
+    #[cfg(not(any(target_endian = "little", target_arch = "aarch64", target_arch = "arm")))]
+    pub(crate) lo: T,
+}
+
+#[allow(dead_code)]
+type MinWord = u32;
+#[cfg(target_arch = "riscv32")]
+type RegSize = u32;
+#[cfg(target_arch = "riscv64")]
+type RegSize = u64;
+// Adapted from https://github.com/taiki-e/atomic-maybe-uninit/blob/v0.3.0/src/utils.rs#L210.
+// Helper for implementing sub-word atomic operations using word-sized LL/SC loop or CAS loop.
+//
+// Refs: https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/CodeGen/AtomicExpandPass.cpp#L699
+// (aligned_ptr, shift, mask)
+#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+#[allow(dead_code)]
+#[inline]
+pub(crate) fn create_sub_word_mask_values<T>(ptr: *mut T) -> (*mut MinWord, RegSize, RegSize) {
+    use core::mem;
+    const SHIFT_MASK: bool = !cfg!(any(
+        target_arch = "riscv32",
+        target_arch = "riscv64",
+        target_arch = "loongarch64",
+        target_arch = "s390x",
+    ));
+    let ptr_mask = mem::size_of::<MinWord>() - 1;
+    let aligned_ptr = strict::with_addr(ptr, ptr as usize & !ptr_mask) as *mut MinWord;
+    let ptr_lsb = if SHIFT_MASK {
+        ptr as usize & ptr_mask
+    } else {
+        // We use 32-bit wrapping shift instructions in asm on these platforms.
+        ptr as usize
+    };
+    let shift = if cfg!(any(target_endian = "little", target_arch = "s390x")) {
+        ptr_lsb.wrapping_mul(8)
+    } else {
+        (ptr_lsb ^ (mem::size_of::<MinWord>() - mem::size_of::<T>())).wrapping_mul(8)
+    };
+    let mut mask: RegSize = (1 << (mem::size_of::<T>() * 8)) - 1; // !(0 as T) as RegSize
+    if SHIFT_MASK {
+        mask <<= shift;
+    }
+    (aligned_ptr, shift as RegSize, mask)
+}
+
+/// Emulate strict provenance.
+///
+/// Once strict_provenance is stable, migrate to the standard library's APIs.
+#[cfg(any(miri, target_arch = "riscv32", target_arch = "riscv64"))]
+#[allow(dead_code)]
+pub(crate) mod strict {
+    /// Replace the address portion of this pointer with a new address.
+    #[inline]
+    #[must_use]
+    pub(crate) fn with_addr<T>(ptr: *mut T, addr: usize) -> *mut T {
+        // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
+        //
+        // In the mean-time, this operation is defined to be "as if" it was
+        // a wrapping_add, so we can emulate it as such. This should properly
+        // restore pointer provenance even under today's compiler.
+        let offset = addr.wrapping_sub(ptr as usize);
+
+        // This is the canonical desugaring of this operation.
+        (ptr as *mut u8).wrapping_add(offset) as *mut T
+    }
+
+    /// Run an operation of some kind on a pointer.
+    #[inline]
+    #[must_use]
+    pub(crate) fn map_addr<T>(ptr: *mut T, f: impl FnOnce(usize) -> usize) -> *mut T {
+        with_addr(ptr, f(ptr as usize))
+    }
+}