14 files changed, 7045 insertions, 0 deletions
diff --git a/vendor/portable-atomic/src/imp/atomic128/README.md b/vendor/portable-atomic/src/imp/atomic128/README.md
new file mode 100644
index 0000000..799a49b
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/README.md
@@ -0,0 +1,59 @@
+# Implementation of 128-bit atomics
+
+## 128-bit atomics instructions
+
+Here is the table of targets that support 128-bit atomics and the instructions used:
+
+| target_arch | load | store | CAS | RMW | note |
+| ----------- | ---- | ----- | --- | --- | ---- |
+| x86_64 | cmpxchg16b or vmovdqa | cmpxchg16b or vmovdqa | cmpxchg16b | cmpxchg16b | cmpxchg16b target feature required. vmovdqa requires Intel or AMD CPU with AVX. <br> Both compile-time and run-time detection are supported for cmpxchg16b. vmovdqa is currently run-time detection only. <br> Requires rustc 1.59+ when cmpxchg16b target feature is enabled at compile-time, otherwise requires rustc 1.69+ |
+| aarch64 | ldxp/stxp or casp or ldp/ldiapp | ldxp/stxp or casp or stp/stilp/swpp | ldxp/stxp or casp | ldxp/stxp or casp/swpp/ldclrp/ldsetp | casp requires lse target feature, ldp/stp requires lse2 target feature, ldiapp/stilp requires lse2 and rcpc3 target features, swpp/ldclrp/ldsetp requires lse128 target feature. <br> Both compile-time and run-time detection are supported for lse and lse2. Others are currently compile-time detection only. <br> Requires rustc 1.59+ |
+| powerpc64 | lq | stq | lqarx/stqcx. | lqarx/stqcx. | Requires target-cpu pwr8+ (powerpc64le is pwr8 by default). Both compile-time and run-time detection are supported (run-time detection is currently disabled by default). <br> Requires nightly |
+| s390x | lpq | stpq | cdsg | cdsg | Requires nightly |
+
+On compiler versions or platforms where these are not supported, the fallback implementation is used.
+
+See [aarch64.rs](aarch64.rs) module-level comments for more details on the instructions used on aarch64.
+
+## Comparison with core::intrinsics::atomic_\* (core::sync::atomic::Atomic{I,U}128)
+
+This directory has target-specific implementations with inline assembly ([aarch64.rs](aarch64.rs), [x86_64.rs](x86_64.rs), [powerpc64.rs](powerpc64.rs), [s390x.rs](s390x.rs)) and an implementation without inline assembly ([intrinsics.rs](intrinsics.rs)). The latter currently always needs nightly compilers and is only used for Miri and ThreadSanitizer, which do not support inline assembly.
+
+Implementations with inline assembly generate assemblies almost equivalent to the `core::intrinsics::atomic_*` (used in `core::sync::atomic::Atomic{I,U}128`) for many operations, but some operations may or may not generate more efficient code. For example:
+
+- On x86_64, implementation with inline assembly contains additional optimizations (e.g., [#16](https://github.com/taiki-e/portable-atomic/pull/16)) and is much faster for some operations.
+- On aarch64, implementation with inline assembly supports outline-atomics on more operating systems, and may be faster in environments where outline-atomics can improve performance.
+- On powerpc64 and s390x, LLVM does not support generating some 128-bit atomic operations (see [intrinsics.rs](intrinsics.rs) module-level comments), and we use CAS loop to implement them, so implementation with inline assembly may be faster for those operations.
+- In implementations without inline assembly, the compiler may reuse condition flags that have changed as a result of the operation, or use immediate values instead of registers, depending on the situation.
+
+As 128-bit atomics-related APIs stabilize in the standard library, implementations with inline assembly are planned to be updated to get the benefits of both.
+
+## Run-time feature detection
+
+[detect](detect) module has run-time feature detection implementations.
+
+Here is the table of targets that support run-time feature detection and the instruction or API used:
+
+| target_arch | target_os/target_env | instruction/API | features | note |
+| ----------- | -------------------- | --------------- | -------- | ---- |
+| x86_64      | all (except for sgx) | cpuid           | all      | Enabled by default |
+| aarch64     | linux                | getauxval       | all      | Only enabled by default on `*-linux-gnu*`, and `*-linux-musl*"` (default is static linking)/`*-linux-ohos*` (default is dynamic linking) with dynamic linking enabled. |
+| aarch64     | android              | getauxval       | all      | Enabled by default |
+| aarch64     | freebsd              | elf_aux_info    | lse, lse2 | Enabled by default |
+| aarch64     | netbsd               | sysctl          | all      | Enabled by default |
+| aarch64     | openbsd              | sysctl          | lse      | Enabled by default |
+| aarch64     | macos                | sysctl          | all      | Currently only used in tests because FEAT_LSE and FEAT_LSE2 are always available at compile-time. |
+| aarch64     | windows              | IsProcessorFeaturePresent | lse | Enabled by default |
+| aarch64     | fuchsia              | zx_system_get_features | lse | Enabled by default |
+| powerpc64   | linux                | getauxval       | all      | Disabled by default |
+| powerpc64   | freebsd              | elf_aux_info    | all      | Disabled by default |
+
+Run-time detection is enabled by default on most targets and can be disabled with `--cfg portable_atomic_no_outline_atomics`.
+
+On some targets, run-time detection is disabled by default mainly for compatibility with older versions of operating systems or incomplete build environments, and can be enabled by `--cfg portable_atomic_outline_atomics`. (When both cfg are enabled, `*_no_*` cfg is preferred.)
+
+For targets not included in the above table, run-time detection is always disabled and works the same as when `--cfg portable_atomic_no_outline_atomics` is set.
+
+See [detect/auxv.rs](detect/auxv.rs) module-level comments for more details on Linux/Android/FreeBSD.
+
+See also [docs on `portable_atomic_no_outline_atomics`](https://github.com/taiki-e/portable-atomic/blob/HEAD/README.md#optional-cfg-no-outline-atomics) in the top-level readme.
diff --git a/vendor/portable-atomic/src/imp/atomic128/aarch64.rs b/vendor/portable-atomic/src/imp/atomic128/aarch64.rs
new file mode 100644
index 0000000..32528a7
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/aarch64.rs
@@ -0,0 +1,1708 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic{I,U}128 implementation on AArch64.
+//
+// There are a few ways to implement 128-bit atomic operations in AArch64.
+//
+// - LDXP/STXP loop (DW LL/SC)
+// - CASP (DWCAS) added as FEAT_LSE (mandatory from armv8.1-a)
+// - LDP/STP (DW load/store) if FEAT_LSE2 (optional from armv8.2-a, mandatory from armv8.4-a) is available
+// - LDIAPP/STILP (DW acquire-load/release-store) added as FEAT_LRCPC3 (optional from armv8.9-a/armv9.4-a) (if FEAT_LSE2 is also available)
+// - LDCLRP/LDSETP/SWPP (DW RMW) added as FEAT_LSE128 (optional from armv9.4-a)
+//
+// If outline-atomics is not enabled and FEAT_LSE is not available at
+// compile-time, we use LDXP/STXP loop.
+// If outline-atomics is enabled and FEAT_LSE is not available at
+// compile-time, we use CASP for CAS if FEAT_LSE is available
+// at run-time, otherwise, use LDXP/STXP loop.
+// If FEAT_LSE is available at compile-time, we use CASP for load/store/CAS/RMW.
+// However, when portable_atomic_ll_sc_rmw cfg is set, use LDXP/STXP loop instead of CASP
+// loop for RMW (by default, it is set on Apple hardware; see build script for details).
+// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store.
+// If FEAT_LSE128 is available at compile-time, we use LDCLRP/LDSETP/SWPP for fetch_and/fetch_or/swap/{release,seqcst}-store.
+// If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store.
+//
+// Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2.
+//
+// Note that we do not separate LL and SC into separate functions, but handle
+// them within a single asm block. This is because it is theoretically possible
+// for the compiler to insert operations that might clear the reservation between
+// LL and SC. Considering the type of operations we are providing and the fact
+// that [progress64](https://github.com/ARM-software/progress64) uses such code,
+// this is probably not a problem for aarch64, but it seems that aarch64 doesn't
+// guarantee it and hexagon is the only architecture with hardware guarantees
+// that such code works. See also:
+//
+// - https://yarchive.net/comp/linux/cmpxchg_ll_sc_portability.html
+// - https://lists.llvm.org/pipermail/llvm-dev/2016-May/099490.html
+// - https://lists.llvm.org/pipermail/llvm-dev/2018-June/123993.html
+//
+// Also, even when using a CAS loop to implement atomic RMW, include the loop itself
+// in the asm block because it is more efficient for some codegen backends.
+// https://github.com/rust-lang/compiler-builtins/issues/339#issuecomment-1191260474
+//
+// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
+// this module and use intrinsics.rs instead.
+//
+// Refs:
+// - ARM Compiler armasm User Guide
+//   https://developer.arm.com/documentation/dui0801/latest
+// - Arm A-profile A64 Instruction Set Architecture
+//   https://developer.arm.com/documentation/ddi0602/latest
+// - Arm Architecture Reference Manual for A-profile architecture
+//   https://developer.arm.com/documentation/ddi0487/latest
+// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
+//
+// Generated asm:
+// - aarch64 https://godbolt.org/z/5Mz1E33vz
+// - aarch64 msvc https://godbolt.org/z/P53d1MsGY
+// - aarch64 (+lse) https://godbolt.org/z/qvaE8n79K
+// - aarch64 msvc (+lse) https://godbolt.org/z/dj4aYerfr
+// - aarch64 (+lse,+lse2) https://godbolt.org/z/1E15jjxah
+// - aarch64 (+lse,+lse2,+rcpc3) https://godbolt.org/z/YreM4n84o
+// - aarch64 (+lse2,+lse128) https://godbolt.org/z/Kfeqs54ox
+// - aarch64 (+lse2,+lse128,+rcpc3) https://godbolt.org/z/n6zhjE77s
+
+include!("macros.rs");
+
+// On musl with static linking, it seems that getauxval is not always available.
+// See detect/auxv.rs for more.
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(
+    test,
+    not(all(
+        any(target_feature = "lse2", portable_atomic_target_feature = "lse2"),
+        any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+    )),
+))]
+#[cfg(any(
+    all(
+        target_os = "linux",
+        any(
+            target_env = "gnu",
+            all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")),
+            portable_atomic_outline_atomics,
+        ),
+    ),
+    target_os = "android",
+    target_os = "freebsd",
+))]
+#[path = "detect/auxv.rs"]
+mod detect;
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg_attr(
+    target_os = "netbsd",
+    cfg(any(
+        test,
+        not(all(
+            any(target_feature = "lse2", portable_atomic_target_feature = "lse2"),
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+        )),
+    ))
+)]
+#[cfg_attr(
+    target_os = "openbsd",
+    cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))
+)]
+#[cfg(any(target_os = "netbsd", target_os = "openbsd"))]
+#[path = "detect/aarch64_aa64reg.rs"]
+mod detect;
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
+#[cfg(target_os = "fuchsia")]
+#[path = "detect/aarch64_fuchsia.rs"]
+mod detect;
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
+#[cfg(target_os = "windows")]
+#[path = "detect/aarch64_windows.rs"]
+mod detect;
+
+// test only
+#[cfg(test)]
+#[cfg(not(qemu))]
+#[cfg(not(valgrind))]
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(target_os = "linux", target_os = "android", target_os = "freebsd"))]
+#[path = "detect/aarch64_aa64reg.rs"]
+mod detect_aa64reg;
+#[cfg(test)]
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(target_os = "macos")]
+#[path = "detect/aarch64_macos.rs"]
+mod detect_macos;
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+use core::sync::atomic::Ordering;
+
+use crate::utils::{Pair, U128};
+
+#[cfg(any(
+    target_feature = "lse",
+    portable_atomic_target_feature = "lse",
+    not(portable_atomic_no_outline_atomics),
+))]
+macro_rules! debug_assert_lse {
+    () => {
+        #[cfg(all(
+            not(portable_atomic_no_outline_atomics),
+            any(
+                all(
+                    target_os = "linux",
+                    any(
+                        target_env = "gnu",
+                        all(
+                            any(target_env = "musl", target_env = "ohos"),
+                            not(target_feature = "crt-static"),
+                        ),
+                        portable_atomic_outline_atomics,
+                    ),
+                ),
+                target_os = "android",
+                target_os = "freebsd",
+                target_os = "netbsd",
+                target_os = "openbsd",
+                target_os = "fuchsia",
+                target_os = "windows",
+            ),
+        ))]
+        #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
+        {
+            debug_assert!(detect::detect().has_lse());
+        }
+    };
+}
+#[rustfmt::skip]
+#[cfg(any(
+    target_feature = "lse2",
+    portable_atomic_target_feature = "lse2",
+    not(portable_atomic_no_outline_atomics),
+))]
+macro_rules! debug_assert_lse2 {
+    () => {
+        #[cfg(all(
+            not(portable_atomic_no_outline_atomics),
+            any(
+                all(
+                    target_os = "linux",
+                    any(
+                        target_env = "gnu",
+                        all(
+                            any(target_env = "musl", target_env = "ohos"),
+                            not(target_feature = "crt-static"),
+                        ),
+                        portable_atomic_outline_atomics,
+                    ),
+                ),
+                target_os = "android",
+                target_os = "freebsd",
+                target_os = "netbsd",
+                // These don't support detection of FEAT_LSE2.
+                // target_os = "openbsd",
+                // target_os = "fuchsia",
+                // target_os = "windows",
+            ),
+        ))]
+        #[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))]
+        {
+            debug_assert!(detect::detect().has_lse2());
+        }
+    };
+}
+
+// Refs: https://developer.arm.com/documentation/100067/0612/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en
+//
+// This is similar to #[target_feature(enable = "lse")], except that there are
+// no compiler guarantees regarding (un)inlining, and the scope is within an asm
+// block rather than a function. We use this directive to support outline-atomics
+// on pre-1.61 rustc (aarch64_target_feature stabilized in Rust 1.61).
+//
+// The .arch_extension directive is effective until the end of the assembly block and
+// is not propagated to subsequent code, so the end_lse macro is unneeded.
+// https://godbolt.org/z/4oMEW8vWc
+// https://github.com/torvalds/linux/commit/e0d5896bd356cd577f9710a02d7a474cdf58426b
+// https://github.com/torvalds/linux/commit/dd1f6308b28edf0452dd5dc7877992903ec61e69
+// (It seems GCC effectively ignores this directive and always allow FEAT_LSE instructions: https://godbolt.org/z/W9W6rensG)
+//
+// The .arch directive has a similar effect, but we don't use it due to the following issue:
+// https://github.com/torvalds/linux/commit/dd1f6308b28edf0452dd5dc7877992903ec61e69
+//
+// This is also needed for compatibility with rustc_codegen_cranelift:
+// https://github.com/rust-lang/rustc_codegen_cranelift/issues/1400#issuecomment-1774599775
+//
+// Note: If FEAT_LSE is not available at compile-time, we must guarantee that
+// the function that uses it is not inlined into a function where it is not
+// clear whether FEAT_LSE is available. Otherwise, (even if we checked whether
+// FEAT_LSE is available at run-time) optimizations that reorder its
+// instructions across the if condition might introduce undefined behavior.
+// (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts)
+// However, our code uses the ifunc helper macro that works with function pointers,
+// so we don't have to worry about this unless calling without helper macro.
+#[cfg(any(
+    target_feature = "lse",
+    portable_atomic_target_feature = "lse",
+    not(portable_atomic_no_outline_atomics),
+))]
+macro_rules! start_lse {
+    () => {
+        ".arch_extension lse"
+    };
+}
+
+#[cfg(target_endian = "little")]
+macro_rules! select_le_or_be {
+    ($le:expr, $be:expr) => {
+        $le
+    };
+}
+#[cfg(target_endian = "big")]
+macro_rules! select_le_or_be {
+    ($le:expr, $be:expr) => {
+        $be
+    };
+}
+
+macro_rules! atomic_rmw {
+    ($op:ident, $order:ident) => {
+        atomic_rmw!($op, $order, write = $order)
+    };
+    ($op:ident, $order:ident, write = $write:ident) => {
+        match $order {
+            Ordering::Relaxed => $op!("", "", ""),
+            Ordering::Acquire => $op!("a", "", ""),
+            Ordering::Release => $op!("", "l", ""),
+            Ordering::AcqRel => $op!("a", "l", ""),
+            // In MSVC environments, SeqCst stores/writes needs fences after writes.
+            // https://reviews.llvm.org/D141748
+            #[cfg(target_env = "msvc")]
+            Ordering::SeqCst if $write == Ordering::SeqCst => $op!("a", "l", "dmb ish"),
+            // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
+            Ordering::SeqCst => $op!("a", "l", ""),
+            _ => unreachable!("{:?}", $order),
+        }
+    };
+}
+
+// cfg guarantee that the CPU supports FEAT_LSE2.
+#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))]
+use _atomic_load_ldp as atomic_load;
+#[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))]
+#[inline]
+unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
+    #[inline]
+    unsafe fn atomic_load_no_lse2(src: *mut u128, order: Ordering) -> u128 {
+        #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+        // SAFETY: the caller must uphold the safety contract.
+        // cfg guarantee that the CPU supports FEAT_LSE.
+        unsafe {
+            _atomic_load_casp(src, order)
+        }
+        #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe {
+            _atomic_load_ldxp_stxp(src, order)
+        }
+    }
+    #[cfg(not(all(
+        not(portable_atomic_no_outline_atomics),
+        any(
+            all(
+                target_os = "linux",
+                any(
+                    target_env = "gnu",
+                    all(
+                        any(target_env = "musl", target_env = "ohos"),
+                        not(target_feature = "crt-static"),
+                    ),
+                    portable_atomic_outline_atomics,
+                ),
+            ),
+            target_os = "android",
+            target_os = "freebsd",
+            target_os = "netbsd",
+            // These don't support detection of FEAT_LSE2.
+            // target_os = "openbsd",
+            // target_os = "fuchsia",
+            // target_os = "windows",
+        ),
+    )))]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        atomic_load_no_lse2(src, order)
+    }
+    #[cfg(all(
+        not(portable_atomic_no_outline_atomics),
+        any(
+            all(
+                target_os = "linux",
+                any(
+                    target_env = "gnu",
+                    all(
+                        any(target_env = "musl", target_env = "ohos"),
+                        not(target_feature = "crt-static"),
+                    ),
+                    portable_atomic_outline_atomics,
+                ),
+            ),
+            target_os = "android",
+            target_os = "freebsd",
+            target_os = "netbsd",
+            // These don't support detection of FEAT_LSE2.
+            // target_os = "openbsd",
+            // target_os = "fuchsia",
+            // target_os = "windows",
+        ),
+    ))]
+    {
+        fn_alias! {
+            // inline(never) is just a hint and also not strictly necessary
+            // because we use ifunc helper macro, but used for clarity.
+            #[inline(never)]
+            unsafe fn(src: *mut u128) -> u128;
+            atomic_load_lse2_relaxed = _atomic_load_ldp(Ordering::Relaxed);
+            atomic_load_lse2_acquire = _atomic_load_ldp(Ordering::Acquire);
+            atomic_load_lse2_seqcst = _atomic_load_ldp(Ordering::SeqCst);
+        }
+        fn_alias! {
+            unsafe fn(src: *mut u128) -> u128;
+            atomic_load_no_lse2_relaxed = atomic_load_no_lse2(Ordering::Relaxed);
+            atomic_load_no_lse2_acquire = atomic_load_no_lse2(Ordering::Acquire);
+            atomic_load_no_lse2_seqcst = atomic_load_no_lse2(Ordering::SeqCst);
+        }
+        // SAFETY: the caller must uphold the safety contract.
+        // and we've checked if FEAT_LSE2 is available.
+        unsafe {
+            match order {
+                Ordering::Relaxed => {
+                    ifunc!(unsafe fn(src: *mut u128) -> u128 {
+                        let cpuinfo = detect::detect();
+                        if cpuinfo.has_lse2() {
+                            atomic_load_lse2_relaxed
+                        } else {
+                            atomic_load_no_lse2_relaxed
+                        }
+                    })
+                }
+                Ordering::Acquire => {
+                    ifunc!(unsafe fn(src: *mut u128) -> u128 {
+                        let cpuinfo = detect::detect();
+                        if cpuinfo.has_lse2() {
+                            atomic_load_lse2_acquire
+                        } else {
+                            atomic_load_no_lse2_acquire
+                        }
+                    })
+                }
+                Ordering::SeqCst => {
+                    ifunc!(unsafe fn(src: *mut u128) -> u128 {
+                        let cpuinfo = detect::detect();
+                        if cpuinfo.has_lse2() {
+                            atomic_load_lse2_seqcst
+                        } else {
+                            atomic_load_no_lse2_seqcst
+                        }
+                    })
+                }
+                _ => unreachable!("{:?}", order),
+            }
+        }
+    }
+}
+// If CPU supports FEAT_LSE2, LDP/LDIAPP is single-copy atomic reads,
+// otherwise it is two single-copy atomic reads.
+// Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile
+#[cfg(any(
+    target_feature = "lse2",
+    portable_atomic_target_feature = "lse2",
+    not(portable_atomic_no_outline_atomics),
+))]
+#[inline]
+unsafe fn _atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+    debug_assert_lse2!();
+
+    // SAFETY: the caller must guarantee that `dst` is valid for reads,
+    // 16-byte aligned, that there are no concurrent non-atomic operations.
+    //
+    // Refs:
+    // - LDP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDP--A64-
+    unsafe {
+        let (out_lo, out_hi);
+        macro_rules! atomic_load_relaxed {
+            ($acquire:tt $(, $readonly:tt)?) => {
+                asm!(
+                    "ldp {out_lo}, {out_hi}, [{src}]",
+                    $acquire,
+                    src = in(reg) ptr_reg!(src),
+                    out_hi = lateout(reg) out_hi,
+                    out_lo = lateout(reg) out_lo,
+                    options(nostack, preserves_flags $(, $readonly)?),
+                )
+            };
+        }
+        match order {
+            Ordering::Relaxed => atomic_load_relaxed!("", readonly),
+            #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))]
+            Ordering::Acquire => {
+                // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
+                // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-Pair-of-registers-
+                asm!(
+                    "ldiapp {out_lo}, {out_hi}, [{src}]",
+                    src = in(reg) ptr_reg!(src),
+                    out_hi = lateout(reg) out_hi,
+                    out_lo = lateout(reg) out_lo,
+                    options(nostack, preserves_flags),
+                );
+            }
+            #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
+            Ordering::Acquire => atomic_load_relaxed!("dmb ishld"),
+            Ordering::SeqCst => {
+                asm!(
+                    // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp.
+                    // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891 for details.
+                    "ldar {tmp}, [{src}]",
+                    "ldp {out_lo}, {out_hi}, [{src}]",
+                    "dmb ishld",
+                    src = in(reg) ptr_reg!(src),
+                    out_hi = lateout(reg) out_hi,
+                    out_lo = lateout(reg) out_lo,
+                    tmp = out(reg) _,
+                    options(nostack, preserves_flags),
+                );
+            }
+            _ => unreachable!("{:?}", order),
+        }
+        U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
+    }
+}
+// Do not use _atomic_compare_exchange_casp because it needs extra MOV to implement load.
+#[cfg(any(test, not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))))]
+#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+#[inline]
+unsafe fn _atomic_load_casp(src: *mut u128, order: Ordering) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+    debug_assert_lse!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantee that the CPU supports FEAT_LSE.
+    unsafe {
+        let (out_lo, out_hi);
+        macro_rules! atomic_load {
+            ($acquire:tt, $release:tt) => {
+                asm!(
+                    start_lse!(),
+                    concat!("casp", $acquire, $release, " x2, x3, x2, x3, [{src}]"),
+                    src = in(reg) ptr_reg!(src),
+                    // must be allocated to even/odd register pair
+                    inout("x2") 0_u64 => out_lo,
+                    inout("x3") 0_u64 => out_hi,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        match order {
+            Ordering::Relaxed => atomic_load!("", ""),
+            Ordering::Acquire => atomic_load!("a", ""),
+            Ordering::SeqCst => atomic_load!("a", "l"),
+            _ => unreachable!("{:?}", order),
+        }
+        U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
+    }
+}
+#[cfg(any(
+    test,
+    all(
+        not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")),
+        not(any(target_feature = "lse", portable_atomic_target_feature = "lse")),
+    ),
+))]
+#[inline]
+unsafe fn _atomic_load_ldxp_stxp(src: *mut u128, order: Ordering) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let (mut out_lo, mut out_hi);
+        macro_rules! atomic_load {
+            ($acquire:tt, $release:tt) => {
+                asm!(
+                    "2:",
+                        concat!("ld", $acquire, "xp {out_lo}, {out_hi}, [{src}]"),
+                        concat!("st", $release, "xp {r:w}, {out_lo}, {out_hi}, [{src}]"),
+                        // 0 if the store was successful, 1 if no store was performed
+                        "cbnz {r:w}, 2b",
+                    src = in(reg) ptr_reg!(src),
+                    out_lo = out(reg) out_lo,
+                    out_hi = out(reg) out_hi,
+                    r = out(reg) _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        match order {
+            Ordering::Relaxed => atomic_load!("", ""),
+            Ordering::Acquire => atomic_load!("a", ""),
+            Ordering::SeqCst => atomic_load!("a", "l"),
+            _ => unreachable!("{:?}", order),
+        }
+        U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
+    }
+}
+
+// cfg guarantee that the CPU supports FEAT_LSE2.
+#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))]
+use _atomic_store_stp as atomic_store;
+#[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))]
+#[inline]
+unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
+    #[inline]
+    unsafe fn atomic_store_no_lse2(dst: *mut u128, val: u128, order: Ordering) {
+        // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
+        // we use CAS-based atomic RMW.
+        #[cfg(all(
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+            not(portable_atomic_ll_sc_rmw),
+        ))]
+        // SAFETY: the caller must uphold the safety contract.
+        // cfg guarantee that the CPU supports FEAT_LSE.
+        unsafe {
+            _atomic_swap_casp(dst, val, order);
+        }
+        #[cfg(not(all(
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+            not(portable_atomic_ll_sc_rmw),
+        )))]
+        // SAFETY: the caller must uphold the safety contract.
+        unsafe {
+            _atomic_store_ldxp_stxp(dst, val, order);
+        }
+    }
+    #[cfg(not(all(
+        not(portable_atomic_no_outline_atomics),
+        any(
+            all(
+                target_os = "linux",
+                any(
+                    target_env = "gnu",
+                    all(
+                        any(target_env = "musl", target_env = "ohos"),
+                        not(target_feature = "crt-static"),
+                    ),
+                    portable_atomic_outline_atomics,
+                ),
+            ),
+            target_os = "android",
+            target_os = "freebsd",
+            target_os = "netbsd",
+            // These don't support detection of FEAT_LSE2.
+            // target_os = "openbsd",
+            // target_os = "fuchsia",
+            // target_os = "windows",
+        ),
+    )))]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        atomic_store_no_lse2(dst, val, order);
+    }
+    #[cfg(all(
+        not(portable_atomic_no_outline_atomics),
+        any(
+            all(
+                target_os = "linux",
+                any(
+                    target_env = "gnu",
+                    all(
+                        any(target_env = "musl", target_env = "ohos"),
+                        not(target_feature = "crt-static"),
+                    ),
+                    portable_atomic_outline_atomics,
+                ),
+            ),
+            target_os = "android",
+            target_os = "freebsd",
+            target_os = "netbsd",
+            // These don't support detection of FEAT_LSE2.
+            // target_os = "openbsd",
+            // target_os = "fuchsia",
+            // target_os = "windows",
+        ),
+    ))]
+    {
+        fn_alias! {
+            // inline(never) is just a hint and also not strictly necessary
+            // because we use ifunc helper macro, but used for clarity.
+            #[inline(never)]
+            unsafe fn(dst: *mut u128, val: u128);
+            atomic_store_lse2_relaxed = _atomic_store_stp(Ordering::Relaxed);
+            atomic_store_lse2_release = _atomic_store_stp(Ordering::Release);
+            atomic_store_lse2_seqcst = _atomic_store_stp(Ordering::SeqCst);
+        }
+        fn_alias! {
+            unsafe fn(dst: *mut u128, val: u128);
+            atomic_store_no_lse2_relaxed = atomic_store_no_lse2(Ordering::Relaxed);
+            atomic_store_no_lse2_release = atomic_store_no_lse2(Ordering::Release);
+            atomic_store_no_lse2_seqcst = atomic_store_no_lse2(Ordering::SeqCst);
+        }
+        // SAFETY: the caller must uphold the safety contract.
+        // and we've checked if FEAT_LSE2 is available.
+        unsafe {
+            match order {
+                Ordering::Relaxed => {
+                    ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                        let cpuinfo = detect::detect();
+                        if cpuinfo.has_lse2() {
+                            atomic_store_lse2_relaxed
+                        } else {
+                            atomic_store_no_lse2_relaxed
+                        }
+                    });
+                }
+                Ordering::Release => {
+                    ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                        let cpuinfo = detect::detect();
+                        if cpuinfo.has_lse2() {
+                            atomic_store_lse2_release
+                        } else {
+                            atomic_store_no_lse2_release
+                        }
+                    });
+                }
+                Ordering::SeqCst => {
+                    ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                        let cpuinfo = detect::detect();
+                        if cpuinfo.has_lse2() {
+                            atomic_store_lse2_seqcst
+                        } else {
+                            atomic_store_no_lse2_seqcst
+                        }
+                    });
+                }
+                _ => unreachable!("{:?}", order),
+            }
+        }
+    }
+}
+// If CPU supports FEAT_LSE2, STP/STILP is single-copy atomic writes,
+// otherwise it is two single-copy atomic writes.
+// Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile
+#[cfg(any(
+    target_feature = "lse2",
+    portable_atomic_target_feature = "lse2",
+    not(portable_atomic_no_outline_atomics),
+))]
+#[inline]
+unsafe fn _atomic_store_stp(dst: *mut u128, val: u128, order: Ordering) {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_lse2!();
+
+    // SAFETY: the caller must guarantee that `dst` is valid for writes,
+    // 16-byte aligned, that there are no concurrent non-atomic operations.
+    //
+    // Refs:
+    // - STP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STP--A64-
+    unsafe {
+        #[rustfmt::skip]
+        macro_rules! atomic_store {
+            ($acquire:tt, $release:tt) => {{
+                let val = U128 { whole: val };
+                asm!(
+                    $release,
+                    "stp {val_lo}, {val_hi}, [{dst}]",
+                    $acquire,
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = in(reg) val.pair.lo,
+                    val_hi = in(reg) val.pair.hi,
+                    options(nostack, preserves_flags),
+                );
+            }};
+        }
+        match order {
+            Ordering::Relaxed => atomic_store!("", ""),
+            #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))]
+            Ordering::Release => {
+                let val = U128 { whole: val };
+                // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
+                // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/STILP--Store-Release-ordered-Pair-of-registers-
+                asm!(
+                    "stilp {val_lo}, {val_hi}, [{dst}]",
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = in(reg) val.pair.lo,
+                    val_hi = in(reg) val.pair.hi,
+                    options(nostack, preserves_flags),
+                );
+            }
+            #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
+            #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+            Ordering::Release => {
+                // Use swpp if stp requires fences.
+                // https://reviews.llvm.org/D143506
+                // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
+                _atomic_swap_swpp(dst, val, order);
+            }
+            #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
+            #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+            Ordering::Release => atomic_store!("", "dmb ish"),
+            #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+            Ordering::SeqCst => {
+                // Use swpp if stp requires fences.
+                // https://reviews.llvm.org/D143506
+                // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
+                _atomic_swap_swpp(dst, val, order);
+            }
+            #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+            Ordering::SeqCst => atomic_store!("dmb ish", "dmb ish"),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+// Do not use _atomic_swap_ldxp_stxp because it needs extra registers to implement store.
+#[cfg(any(
+    test,
+    not(all(
+        any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+        not(portable_atomic_ll_sc_rmw),
+    ))
+))]
+#[inline]
+unsafe fn _atomic_store_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let val = U128 { whole: val };
+        macro_rules! store {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    "2:",
+                        concat!("ld", $acquire, "xp xzr, {tmp}, [{dst}]"),
+                        concat!("st", $release, "xp {tmp:w}, {val_lo}, {val_hi}, [{dst}]"),
+                        // 0 if the store was successful, 1 if no store was performed
+                        "cbnz {tmp:w}, 2b",
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = in(reg) val.pair.lo,
+                    val_hi = in(reg) val.pair.hi,
+                    tmp = out(reg) _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(store, order);
+    }
+}
+
+#[inline]
+unsafe fn atomic_compare_exchange(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> Result<u128, u128> {
+    #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantee that the CPU supports FEAT_LSE.
+    let prev = unsafe { _atomic_compare_exchange_casp(dst, old, new, success, failure) };
+    #[cfg(not(all(
+        not(portable_atomic_no_outline_atomics),
+        any(
+            all(
+                target_os = "linux",
+                any(
+                    target_env = "gnu",
+                    all(
+                        any(target_env = "musl", target_env = "ohos"),
+                        not(target_feature = "crt-static"),
+                    ),
+                    portable_atomic_outline_atomics,
+                ),
+            ),
+            target_os = "android",
+            target_os = "freebsd",
+            target_os = "netbsd",
+            target_os = "openbsd",
+            target_os = "fuchsia",
+            target_os = "windows",
+        ),
+    )))]
+    #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
+    // SAFETY: the caller must uphold the safety contract.
+    let prev = unsafe { _atomic_compare_exchange_ldxp_stxp(dst, old, new, success, failure) };
+    #[cfg(all(
+        not(portable_atomic_no_outline_atomics),
+        any(
+            all(
+                target_os = "linux",
+                any(
+                    target_env = "gnu",
+                    all(
+                        any(target_env = "musl", target_env = "ohos"),
+                        not(target_feature = "crt-static"),
+                    ),
+                    portable_atomic_outline_atomics,
+                ),
+            ),
+            target_os = "android",
+            target_os = "freebsd",
+            target_os = "netbsd",
+            target_os = "openbsd",
+            target_os = "fuchsia",
+            target_os = "windows",
+        ),
+    ))]
+    #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
+    let prev = {
+        fn_alias! {
+            // inline(never) is just a hint and also not strictly necessary
+            // because we use ifunc helper macro, but used for clarity.
+            #[inline(never)]
+            unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128;
+            atomic_compare_exchange_casp_relaxed
+                = _atomic_compare_exchange_casp(Ordering::Relaxed, Ordering::Relaxed);
+            atomic_compare_exchange_casp_acquire
+                = _atomic_compare_exchange_casp(Ordering::Acquire, Ordering::Acquire);
+            atomic_compare_exchange_casp_release
+                = _atomic_compare_exchange_casp(Ordering::Release, Ordering::Relaxed);
+            atomic_compare_exchange_casp_acqrel
+                = _atomic_compare_exchange_casp(Ordering::AcqRel, Ordering::Acquire);
+            // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
+            #[cfg(target_env = "msvc")]
+            atomic_compare_exchange_casp_seqcst
+                = _atomic_compare_exchange_casp(Ordering::SeqCst, Ordering::SeqCst);
+        }
+        fn_alias! {
+            unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128;
+            atomic_compare_exchange_ldxp_stxp_relaxed
+                = _atomic_compare_exchange_ldxp_stxp(Ordering::Relaxed, Ordering::Relaxed);
+            atomic_compare_exchange_ldxp_stxp_acquire
+                = _atomic_compare_exchange_ldxp_stxp(Ordering::Acquire, Ordering::Acquire);
+            atomic_compare_exchange_ldxp_stxp_release
+                = _atomic_compare_exchange_ldxp_stxp(Ordering::Release, Ordering::Relaxed);
+            atomic_compare_exchange_ldxp_stxp_acqrel
+                = _atomic_compare_exchange_ldxp_stxp(Ordering::AcqRel, Ordering::Acquire);
+            // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
+            #[cfg(target_env = "msvc")]
+            atomic_compare_exchange_ldxp_stxp_seqcst
+                = _atomic_compare_exchange_ldxp_stxp(Ordering::SeqCst, Ordering::SeqCst);
+        }
+        // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+        // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+        // and we've checked if FEAT_LSE is available.
+        unsafe {
+            let success = crate::utils::upgrade_success_ordering(success, failure);
+            match success {
+                Ordering::Relaxed => {
+                    ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+                        if detect::detect().has_lse() {
+                            atomic_compare_exchange_casp_relaxed
+                        } else {
+                            atomic_compare_exchange_ldxp_stxp_relaxed
+                        }
+                    })
+                }
+                Ordering::Acquire => {
+                    ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+                        if detect::detect().has_lse() {
+                            atomic_compare_exchange_casp_acquire
+                        } else {
+                            atomic_compare_exchange_ldxp_stxp_acquire
+                        }
+                    })
+                }
+                Ordering::Release => {
+                    ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+                        if detect::detect().has_lse() {
+                            atomic_compare_exchange_casp_release
+                        } else {
+                            atomic_compare_exchange_ldxp_stxp_release
+                        }
+                    })
+                }
+                // AcqRel and SeqCst RMWs are equivalent in both implementations in non-MSVC environments.
+                #[cfg(not(target_env = "msvc"))]
+                Ordering::AcqRel | Ordering::SeqCst => {
+                    ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+                        if detect::detect().has_lse() {
+                            atomic_compare_exchange_casp_acqrel
+                        } else {
+                            atomic_compare_exchange_ldxp_stxp_acqrel
+                        }
+                    })
+                }
+                #[cfg(target_env = "msvc")]
+                Ordering::AcqRel => {
+                    ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+                        if detect::detect().has_lse() {
+                            atomic_compare_exchange_casp_acqrel
+                        } else {
+                            atomic_compare_exchange_ldxp_stxp_acqrel
+                        }
+                    })
+                }
+                #[cfg(target_env = "msvc")]
+                Ordering::SeqCst => {
+                    ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
+                        if detect::detect().has_lse() {
+                            atomic_compare_exchange_casp_seqcst
+                        } else {
+                            atomic_compare_exchange_ldxp_stxp_seqcst
+                        }
+                    })
+                }
+                _ => unreachable!("{:?}", success),
+            }
+        }
+    };
+    if prev == old {
+        Ok(prev)
+    } else {
+        Err(prev)
+    }
+}
+#[cfg(any(
+    target_feature = "lse",
+    portable_atomic_target_feature = "lse",
+    not(portable_atomic_no_outline_atomics),
+))]
+#[inline]
+unsafe fn _atomic_compare_exchange_casp(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_lse!();
+    let order = crate::utils::upgrade_success_ordering(success, failure);
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+    // and the CPU supports FEAT_LSE.
+    //
+    // Refs:
+    // - https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/CASPA--CASPAL--CASP--CASPL--CASPAL--CASP--CASPL--A64-
+    // - https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-Swap-Pair-of-words-or-doublewords-in-memory-
+    unsafe {
+        let old = U128 { whole: old };
+        let new = U128 { whole: new };
+        let (prev_lo, prev_hi);
+        macro_rules! cmpxchg {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    start_lse!(),
+                    concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"),
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    // must be allocated to even/odd register pair
+                    inout("x6") old.pair.lo => prev_lo,
+                    inout("x7") old.pair.hi => prev_hi,
+                    // must be allocated to even/odd register pair
+                    in("x4") new.pair.lo,
+                    in("x5") new.pair.hi,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(cmpxchg, order, write = success);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
+#[inline]
+unsafe fn _atomic_compare_exchange_ldxp_stxp(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+    let order = crate::utils::upgrade_success_ordering(success, failure);
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
+    //
+    // Refs:
+    // - LDXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDXP--A64-
+    // - LDAXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDAXP--A64-
+    // - STXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STXP--A64-
+    // - STLXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STLXP--A64-
+    //
+    // Note: Load-Exclusive pair (by itself) does not guarantee atomicity; to complete an atomic
+    // operation (even load/store), a corresponding Store-Exclusive pair must succeed.
+    // See Arm Architecture Reference Manual for A-profile architecture
+    // Section B2.2.1 "Requirements for single-copy atomicity", and
+    // Section B2.9 "Synchronization and semaphores" for more.
+    unsafe {
+        let old = U128 { whole: old };
+        let new = U128 { whole: new };
+        let (mut prev_lo, mut prev_hi);
+        macro_rules! cmpxchg {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    "2:",
+                        concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
+                        "cmp {prev_lo}, {old_lo}",
+                        "cset {r:w}, ne",
+                        "cmp {prev_hi}, {old_hi}",
+                        "cinc {r:w}, {r:w}, ne",
+                        "cbz {r:w}, 3f",
+                        concat!("st", $release, "xp {r:w}, {prev_lo}, {prev_hi}, [{dst}]"),
+                        // 0 if the store was successful, 1 if no store was performed
+                        "cbnz {r:w}, 2b",
+                        "b 4f",
+                    "3:",
+                        concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"),
+                        // 0 if the store was successful, 1 if no store was performed
+                        "cbnz {r:w}, 2b",
+                    "4:",
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    old_lo = in(reg) old.pair.lo,
+                    old_hi = in(reg) old.pair.hi,
+                    new_lo = in(reg) new.pair.lo,
+                    new_hi = in(reg) new.pair.hi,
+                    prev_lo = out(reg) prev_lo,
+                    prev_hi = out(reg) prev_hi,
+                    r = out(reg) _,
+                    // Do not use `preserves_flags` because CMP modifies the condition flags.
+                    options(nostack),
+                )
+            };
+        }
+        atomic_rmw!(cmpxchg, order, write = success);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+
+// casp is always strong, and ldxp requires a corresponding (succeed) stxp for
+// its atomicity (see code comment in _atomic_compare_exchange_ldxp_stxp).
+// (i.e., aarch64 doesn't have 128-bit weak CAS)
+use self::atomic_compare_exchange as atomic_compare_exchange_weak;
+
+// If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
+// we use CAS-based atomic RMW.
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+#[cfg(all(
+    any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+    not(portable_atomic_ll_sc_rmw),
+))]
+use _atomic_swap_casp as atomic_swap;
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+#[cfg(not(all(
+    any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+    not(portable_atomic_ll_sc_rmw),
+)))]
+use _atomic_swap_ldxp_stxp as atomic_swap;
+#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+use _atomic_swap_swpp as atomic_swap;
+#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+#[inline]
+unsafe fn _atomic_swap_swpp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+    // and the CPU supports FEAT_LSE128.
+    //
+    // Refs:
+    // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/SWPP--SWPPA--SWPPAL--SWPPL--Swap-quadword-in-memory-?lang=en
+    unsafe {
+        let val = U128 { whole: val };
+        let (prev_lo, prev_hi);
+        macro_rules! swap {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"),
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = inout(reg) val.pair.lo => prev_lo,
+                    val_hi = inout(reg) val.pair.hi => prev_hi,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(swap, order);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+// Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap.
+#[cfg(any(test, not(portable_atomic_ll_sc_rmw)))]
+#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+#[inline]
+unsafe fn _atomic_swap_casp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_lse!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantee that the CPU supports FEAT_LSE.
+    unsafe {
+        let val = U128 { whole: val };
+        let (mut prev_lo, mut prev_hi);
+        macro_rules! swap {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    start_lse!(),
+                    // If FEAT_LSE2 is not supported, this works like byte-wise atomic.
+                    // This is not single-copy atomic reads, but this is ok because subsequent
+                    // CAS will check for consistency.
+                    "ldp x4, x5, [{dst}]",
+                    "2:",
+                        // casp writes the current value to the first register pair,
+                        // so copy the `out`'s value for later comparison.
+                        "mov {tmp_lo}, x4",
+                        "mov {tmp_hi}, x5",
+                        concat!("casp", $acquire, $release, " x4, x5, x2, x3, [{dst}]"),
+                        "cmp {tmp_hi}, x5",
+                        "ccmp {tmp_lo}, x4, #0, eq",
+                        "b.ne 2b",
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    tmp_lo = out(reg) _,
+                    tmp_hi = out(reg) _,
+                    // must be allocated to even/odd register pair
+                    out("x4") prev_lo,
+                    out("x5") prev_hi,
+                    // must be allocated to even/odd register pair
+                    in("x2") val.pair.lo,
+                    in("x3") val.pair.hi,
+                    // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
+                    options(nostack),
+                )
+            };
+        }
+        atomic_rmw!(swap, order);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+// Do not use atomic_rmw_ll_sc_3 because it needs extra MOV to implement swap.
+#[cfg(any(
+    test,
+    not(all(
+        any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+        not(portable_atomic_ll_sc_rmw),
+    ))
+))]
+#[inline]
+unsafe fn _atomic_swap_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let val = U128 { whole: val };
+        let (mut prev_lo, mut prev_hi);
+        macro_rules! swap {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    "2:",
+                        concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
+                        concat!("st", $release, "xp {r:w}, {val_lo}, {val_hi}, [{dst}]"),
+                        // 0 if the store was successful, 1 if no store was performed
+                        "cbnz {r:w}, 2b",
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = in(reg) val.pair.lo,
+                    val_hi = in(reg) val.pair.hi,
+                    prev_lo = out(reg) prev_lo,
+                    prev_hi = out(reg) prev_hi,
+                    r = out(reg) _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(swap, order);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+
+/// Atomic RMW by LL/SC loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - val_lo/val_hi pair: val argument (read-only for `$op`)
+/// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`)
+/// - new_lo/new_hi pair: new value that will be stored by sc
+macro_rules! atomic_rmw_ll_sc_3 {
+    ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => {
+        // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
+        // we use CAS-based atomic RMW generated by atomic_rmw_cas_3! macro instead.
+        #[cfg(not(all(
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+            not(portable_atomic_ll_sc_rmw),
+        )))]
+        use $name as $reexport_name;
+        #[cfg(any(
+            test,
+            not(all(
+                any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+                not(portable_atomic_ll_sc_rmw),
+            ))
+        ))]
+        #[inline]
+        unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                let val = U128 { whole: val };
+                let (mut prev_lo, mut prev_hi);
+                macro_rules! op {
+                    ($acquire:tt, $release:tt, $fence:tt) => {
+                        asm!(
+                            "2:",
+                                concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
+                                $($op)*
+                                concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"),
+                                // 0 if the store was successful, 1 if no store was performed
+                                "cbnz {r:w}, 2b",
+                            $fence,
+                            dst = in(reg) ptr_reg!(dst),
+                            val_lo = in(reg) val.pair.lo,
+                            val_hi = in(reg) val.pair.hi,
+                            prev_lo = out(reg) prev_lo,
+                            prev_hi = out(reg) prev_hi,
+                            new_lo = out(reg) _,
+                            new_hi = out(reg) _,
+                            r = out(reg) _,
+                            options(nostack $(, $preserves_flags)?),
+                        )
+                    };
+                }
+                atomic_rmw!(op, order);
+                U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+            }
+        }
+    };
+}
+/// Atomic RMW by CAS loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - val_lo/val_hi pair: val argument (read-only for `$op`)
+/// - x6/x7 pair: previous value loaded (read-only for `$op`)
+/// - x4/x5 pair: new value that will be stored
+macro_rules! atomic_rmw_cas_3 {
+    ($name:ident as $reexport_name:ident, $($op:tt)*) => {
+        // If FEAT_LSE is not available at compile-time or portable_atomic_ll_sc_rmw cfg is set,
+        // we use LL/SC-based atomic RMW generated by atomic_rmw_ll_sc_3! macro instead.
+        #[cfg(all(
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+            not(portable_atomic_ll_sc_rmw),
+        ))]
+        use $name as $reexport_name;
+        #[cfg(any(test, not(portable_atomic_ll_sc_rmw)))]
+        #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+        #[inline]
+        unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            debug_assert_lse!();
+            // SAFETY: the caller must uphold the safety contract.
+            // cfg guarantee that the CPU supports FEAT_LSE.
+            unsafe {
+                let val = U128 { whole: val };
+                let (mut prev_lo, mut prev_hi);
+                macro_rules! op {
+                    ($acquire:tt, $release:tt, $fence:tt) => {
+                        asm!(
+                            start_lse!(),
+                            // If FEAT_LSE2 is not supported, this works like byte-wise atomic.
+                            // This is not single-copy atomic reads, but this is ok because subsequent
+                            // CAS will check for consistency.
+                            "ldp x6, x7, [{dst}]",
+                            "2:",
+                                // casp writes the current value to the first register pair,
+                                // so copy the `out`'s value for later comparison.
+                                "mov {tmp_lo}, x6",
+                                "mov {tmp_hi}, x7",
+                                $($op)*
+                                concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"),
+                                "cmp {tmp_hi}, x7",
+                                "ccmp {tmp_lo}, x6, #0, eq",
+                                "b.ne 2b",
+                            $fence,
+                            dst = in(reg) ptr_reg!(dst),
+                            val_lo = in(reg) val.pair.lo,
+                            val_hi = in(reg) val.pair.hi,
+                            tmp_lo = out(reg) _,
+                            tmp_hi = out(reg) _,
+                            // must be allocated to even/odd register pair
+                            out("x6") prev_lo,
+                            out("x7") prev_hi,
+                            // must be allocated to even/odd register pair
+                            out("x4") _,
+                            out("x5") _,
+                            // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
+                            options(nostack),
+                        )
+                    };
+                }
+                atomic_rmw!(op, order);
+                U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+            }
+        }
+    };
+}
+
+/// Atomic RMW by LL/SC loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`)
+/// - new_lo/new_hi pair: new value that will be stored by sc
+macro_rules! atomic_rmw_ll_sc_2 {
+    ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => {
+        // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
+        // we use CAS-based atomic RMW generated by atomic_rmw_cas_2! macro instead.
+        #[cfg(not(all(
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+            not(portable_atomic_ll_sc_rmw),
+        )))]
+        use $name as $reexport_name;
+        #[cfg(any(
+            test,
+            not(all(
+                any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+                not(portable_atomic_ll_sc_rmw),
+            ))
+        ))]
+        #[inline]
+        unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                let (mut prev_lo, mut prev_hi);
+                macro_rules! op {
+                    ($acquire:tt, $release:tt, $fence:tt) => {
+                        asm!(
+                            "2:",
+                                concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
+                                $($op)*
+                                concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"),
+                                // 0 if the store was successful, 1 if no store was performed
+                                "cbnz {r:w}, 2b",
+                            $fence,
+                            dst = in(reg) ptr_reg!(dst),
+                            prev_lo = out(reg) prev_lo,
+                            prev_hi = out(reg) prev_hi,
+                            new_lo = out(reg) _,
+                            new_hi = out(reg) _,
+                            r = out(reg) _,
+                            options(nostack $(, $preserves_flags)?),
+                        )
+                    };
+                }
+                atomic_rmw!(op, order);
+                U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+            }
+        }
+    };
+}
+/// Atomic RMW by CAS loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - x6/x7 pair: previous value loaded (read-only for `$op`)
+/// - x4/x5 pair: new value that will be stored
+macro_rules! atomic_rmw_cas_2 {
+    ($name:ident as $reexport_name:ident, $($op:tt)*) => {
+        // If FEAT_LSE is not available at compile-time or portable_atomic_ll_sc_rmw cfg is set,
+        // we use LL/SC-based atomic RMW generated by atomic_rmw_ll_sc_3! macro instead.
+        #[cfg(all(
+            any(target_feature = "lse", portable_atomic_target_feature = "lse"),
+            not(portable_atomic_ll_sc_rmw),
+        ))]
+        use $name as $reexport_name;
+        #[cfg(any(test, not(portable_atomic_ll_sc_rmw)))]
+        #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
+        #[inline]
+        unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            debug_assert_lse!();
+            // SAFETY: the caller must uphold the safety contract.
+            // cfg guarantee that the CPU supports FEAT_LSE.
+            unsafe {
+                let (mut prev_lo, mut prev_hi);
+                macro_rules! op {
+                    ($acquire:tt, $release:tt, $fence:tt) => {
+                        asm!(
+                            start_lse!(),
+                            // If FEAT_LSE2 is not supported, this works like byte-wise atomic.
+                            // This is not single-copy atomic reads, but this is ok because subsequent
+                            // CAS will check for consistency.
+                            "ldp x6, x7, [{dst}]",
+                            "2:",
+                                // casp writes the current value to the first register pair,
+                                // so copy the `out`'s value for later comparison.
+                                "mov {tmp_lo}, x6",
+                                "mov {tmp_hi}, x7",
+                                $($op)*
+                                concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"),
+                                "cmp {tmp_hi}, x7",
+                                "ccmp {tmp_lo}, x6, #0, eq",
+                                "b.ne 2b",
+                            $fence,
+                            dst = in(reg) ptr_reg!(dst),
+                            tmp_lo = out(reg) _,
+                            tmp_hi = out(reg) _,
+                            // must be allocated to even/odd register pair
+                            out("x6") prev_lo,
+                            out("x7") prev_hi,
+                            // must be allocated to even/odd register pair
+                            out("x4") _,
+                            out("x5") _,
+                            // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
+                            options(nostack),
+                        )
+                    };
+                }
+                atomic_rmw!(op, order);
+                U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+            }
+        }
+    };
+}
+
+// Do not use `preserves_flags` because ADDS modifies the condition flags.
+atomic_rmw_ll_sc_3! {
+    _atomic_add_ldxp_stxp as atomic_add,
+    select_le_or_be!("adds {new_lo}, {prev_lo}, {val_lo}", "adds {new_hi}, {prev_hi}, {val_hi}"),
+    select_le_or_be!("adc {new_hi}, {prev_hi}, {val_hi}", "adc {new_lo}, {prev_lo}, {val_lo}"),
+}
+atomic_rmw_cas_3! {
+    _atomic_add_casp as atomic_add,
+    select_le_or_be!("adds x4, x6, {val_lo}", "adds x5, x7, {val_hi}"),
+    select_le_or_be!("adc x5, x7, {val_hi}", "adc x4, x6, {val_lo}"),
+}
+
+// Do not use `preserves_flags` because SUBS modifies the condition flags.
+atomic_rmw_ll_sc_3! {
+    _atomic_sub_ldxp_stxp as atomic_sub,
+    select_le_or_be!("subs {new_lo}, {prev_lo}, {val_lo}", "subs {new_hi}, {prev_hi}, {val_hi}"),
+    select_le_or_be!("sbc {new_hi}, {prev_hi}, {val_hi}", "sbc {new_lo}, {prev_lo}, {val_lo}"),
+}
+atomic_rmw_cas_3! {
+    _atomic_sub_casp as atomic_sub,
+    select_le_or_be!("subs x4, x6, {val_lo}", "subs x5, x7, {val_hi}"),
+    select_le_or_be!("sbc x5, x7, {val_hi}", "sbc x4, x6, {val_lo}"),
+}
+
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+atomic_rmw_ll_sc_3! {
+    _atomic_and_ldxp_stxp as atomic_and (preserves_flags),
+    "and {new_lo}, {prev_lo}, {val_lo}",
+    "and {new_hi}, {prev_hi}, {val_hi}",
+}
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+atomic_rmw_cas_3! {
+    _atomic_and_casp as atomic_and,
+    "and x4, x6, {val_lo}",
+    "and x5, x7, {val_hi}",
+}
+#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+#[inline]
+unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+    // and the CPU supports FEAT_LSE128.
+    //
+    // Refs:
+    // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDCLRP--LDCLRPA--LDCLRPAL--LDCLRPL--Atomic-bit-clear-on-quadword-in-memory-?lang=en
+    unsafe {
+        let val = U128 { whole: !val };
+        let (prev_lo, prev_hi);
+        macro_rules! and {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    concat!("ldclrp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"),
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = inout(reg) val.pair.lo => prev_lo,
+                    val_hi = inout(reg) val.pair.hi => prev_hi,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(and, order);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+
+atomic_rmw_ll_sc_3! {
+    _atomic_nand_ldxp_stxp as atomic_nand (preserves_flags),
+    "and {new_lo}, {prev_lo}, {val_lo}",
+    "mvn {new_lo}, {new_lo}",
+    "and {new_hi}, {prev_hi}, {val_hi}",
+    "mvn {new_hi}, {new_hi}",
+}
+atomic_rmw_cas_3! {
+    _atomic_nand_casp as atomic_nand,
+    "and x4, x6, {val_lo}",
+    "mvn x4, x4",
+    "and x5, x7, {val_hi}",
+    "mvn x5, x5",
+}
+
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+atomic_rmw_ll_sc_3! {
+    _atomic_or_ldxp_stxp as atomic_or (preserves_flags),
+    "orr {new_lo}, {prev_lo}, {val_lo}",
+    "orr {new_hi}, {prev_hi}, {val_hi}",
+}
+#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
+atomic_rmw_cas_3! {
+    _atomic_or_casp as atomic_or,
+    "orr x4, x6, {val_lo}",
+    "orr x5, x7, {val_hi}",
+}
+#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
+#[inline]
+unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+    // and the CPU supports FEAT_LSE128.
+    //
+    // Refs:
+    // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDSETP--LDSETPA--LDSETPAL--LDSETPL--Atomic-bit-set-on-quadword-in-memory-?lang=en
+    unsafe {
+        let val = U128 { whole: val };
+        let (prev_lo, prev_hi);
+        macro_rules! or {
+            ($acquire:tt, $release:tt, $fence:tt) => {
+                asm!(
+                    concat!("ldsetp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"),
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    val_lo = inout(reg) val.pair.lo => prev_lo,
+                    val_hi = inout(reg) val.pair.hi => prev_hi,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(or, order);
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+
+atomic_rmw_ll_sc_3! {
+    _atomic_xor_ldxp_stxp as atomic_xor (preserves_flags),
+    "eor {new_lo}, {prev_lo}, {val_lo}",
+    "eor {new_hi}, {prev_hi}, {val_hi}",
+}
+atomic_rmw_cas_3! {
+    _atomic_xor_casp as atomic_xor,
+    "eor x4, x6, {val_lo}",
+    "eor x5, x7, {val_hi}",
+}
+
+atomic_rmw_ll_sc_2! {
+    _atomic_not_ldxp_stxp as atomic_not (preserves_flags),
+    "mvn {new_lo}, {prev_lo}",
+    "mvn {new_hi}, {prev_hi}",
+}
+atomic_rmw_cas_2! {
+    _atomic_not_casp as atomic_not,
+    "mvn x4, x6",
+    "mvn x5, x7",
+}
+
+// Do not use `preserves_flags` because NEGS modifies the condition flags.
+atomic_rmw_ll_sc_2! {
+    _atomic_neg_ldxp_stxp as atomic_neg,
+    select_le_or_be!("negs {new_lo}, {prev_lo}", "negs {new_hi}, {prev_hi}"),
+    select_le_or_be!("ngc {new_hi}, {prev_hi}", "ngc {new_lo}, {prev_lo}"),
+}
+atomic_rmw_cas_2! {
+    _atomic_neg_casp as atomic_neg,
+    select_le_or_be!("negs x4, x6", "negs x5, x7"),
+    select_le_or_be!("ngc x5, x7", "ngc x4, x6"),
+}
+
+// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
+atomic_rmw_ll_sc_3! {
+    _atomic_max_ldxp_stxp as atomic_max,
+    select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
+    select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
+    "csel {new_hi}, {prev_hi}, {val_hi}, lt", // select hi 64-bit
+    "csel {new_lo}, {prev_lo}, {val_lo}, lt", // select lo 64-bit
+}
+atomic_rmw_cas_3! {
+    _atomic_max_casp as atomic_max,
+    select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
+    select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
+    "csel x5, x7, {val_hi}, lt", // select hi 64-bit
+    "csel x4, x6, {val_lo}, lt", // select lo 64-bit
+}
+
+// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
+atomic_rmw_ll_sc_3! {
+    _atomic_umax_ldxp_stxp as atomic_umax,
+    select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
+    select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
+    "csel {new_hi}, {prev_hi}, {val_hi}, lo", // select hi 64-bit
+    "csel {new_lo}, {prev_lo}, {val_lo}, lo", // select lo 64-bit
+}
+atomic_rmw_cas_3! {
+    _atomic_umax_casp as atomic_umax,
+    select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
+    select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
+    "csel x5, x7, {val_hi}, lo", // select hi 64-bit
+    "csel x4, x6, {val_lo}, lo", // select lo 64-bit
+}
+
+// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
+atomic_rmw_ll_sc_3! {
+    _atomic_min_ldxp_stxp as atomic_min,
+    select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
+    select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
+    "csel {new_hi}, {prev_hi}, {val_hi}, ge", // select hi 64-bit
+    "csel {new_lo}, {prev_lo}, {val_lo}, ge", // select lo 64-bit
+}
+atomic_rmw_cas_3! {
+    _atomic_min_casp as atomic_min,
+    select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
+    select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
+    "csel x5, x7, {val_hi}, ge", // select hi 64-bit
+    "csel x4, x6, {val_lo}, ge", // select lo 64-bit
+}
+
+// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
+atomic_rmw_ll_sc_3! {
+    _atomic_umin_ldxp_stxp as atomic_umin,
+    select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
+    select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
+    "csel {new_hi}, {prev_hi}, {val_hi}, hs", // select hi 64-bit
+    "csel {new_lo}, {prev_lo}, {val_lo}, hs", // select lo 64-bit
+}
+atomic_rmw_cas_3! {
+    _atomic_umin_casp as atomic_umin,
+    select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
+    select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
+    "csel x5, x7, {val_hi}, hs", // select hi 64-bit
+    "csel x4, x6, {val_lo}, hs", // select lo 64-bit
+}
+
+#[inline]
+const fn is_lock_free() -> bool {
+    IS_ALWAYS_LOCK_FREE
+}
+const IS_ALWAYS_LOCK_FREE: bool = true;
+
+atomic128!(AtomicI128, i128, atomic_max, atomic_min);
+atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    test_atomic_int!(i128);
+    test_atomic_int!(u128);
+
+    // load/store/swap implementation is not affected by signedness, so it is
+    // enough to test only unsigned types.
+    stress_test!(u128);
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_aa64reg.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_aa64reg.rs
new file mode 100644
index 0000000..4cbdb51
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_aa64reg.rs
@@ -0,0 +1,628 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Run-time feature detection on aarch64 Linux/FreeBSD/NetBSD/OpenBSD by parsing system registers.
+//
+// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection on NetBSD/OpenBSD.
+// https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/mod.rs
+// https://github.com/rust-lang/stdarch/pull/1374
+//
+// Refs:
+// - https://developer.arm.com/documentation/ddi0601/latest/AArch64-Registers
+// - https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt
+// - https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/aarch64.rs
+//
+// Supported platforms:
+// - Linux 4.11+ (emulate mrs instruction)
+//   https://github.com/torvalds/linux/commit/77c97b4ee21290f5f083173d957843b615abbff2
+// - FreeBSD 12.0+ (emulate mrs instruction)
+//   https://github.com/freebsd/freebsd-src/commit/398810619cb32abf349f8de23f29510b2ee0839b
+// - NetBSD 9.0+ (through sysctl)
+//   https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb
+// - OpenBSD 7.1+ (through sysctl)
+//   https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8
+//
+// For now, this module is only used on NetBSD/OpenBSD.
+// On Linux/FreeBSD, this module is test-only:
+// - On Linux, this approach requires a higher kernel version than Rust supports,
+//   and also does not work with qemu-user (as of QEMU 7.2) and Valgrind.
+//   (Looking into HWCAP_CPUID in auxvec, it appears that Valgrind is setting it
+//   to false correctly, but qemu-user is setting it to true.)
+// - On FreeBSD, this approach does not work on FreeBSD 12 on QEMU (confirmed on
+//   FreeBSD 12.{2,3,4}), and we got SIGILL (worked on FreeBSD 13 and 14).
+
+include!("common.rs");
+
+#[cfg_attr(test, derive(Debug, PartialEq))]
+struct AA64Reg {
+    aa64isar0: u64,
+    #[cfg(test)]
+    aa64isar1: u64,
+    // OpenBSD has an API to get this, but currently always returns 0.
+    // https://github.com/openbsd/src/blob/6a233889798dc3ecb18acc52dce1e57862af2957/sys/arch/arm64/arm64/machdep.c#L371-L377
+    #[cfg_attr(target_os = "openbsd", cfg(test))]
+    aa64mmfr2: u64,
+}
+
+#[cold]
+fn _detect(info: &mut CpuInfo) {
+    let AA64Reg {
+        aa64isar0,
+        #[cfg(test)]
+        aa64isar1,
+        #[cfg_attr(target_os = "openbsd", cfg(test))]
+        aa64mmfr2,
+    } = imp::aa64reg();
+
+    // ID_AA64ISAR0_EL1, Instruction Set Attribute Register 0
+    // https://developer.arm.com/documentation/ddi0601/2023-06/AArch64-Registers/ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0?lang=en
+    let atomic = extract(aa64isar0, 23, 20);
+    if atomic >= 2 {
+        info.set(CpuInfo::HAS_LSE);
+        // we currently only use FEAT_LSE and FEAT_LSE2 in outline-atomics.
+        #[cfg(test)]
+        {
+            if atomic >= 3 {
+                info.set(CpuInfo::HAS_LSE128);
+            }
+        }
+    }
+    // we currently only use FEAT_LSE and FEAT_LSE2 in outline-atomics.
+    #[cfg(test)]
+    {
+        // ID_AA64ISAR1_EL1, Instruction Set Attribute Register 1
+        // https://developer.arm.com/documentation/ddi0601/2023-06/AArch64-Registers/ID-AA64ISAR1-EL1--AArch64-Instruction-Set-Attribute-Register-1?lang=en
+        if extract(aa64isar1, 23, 20) >= 3 {
+            info.set(CpuInfo::HAS_RCPC3);
+        }
+    }
+    // OpenBSD has an API to get this, but currently always returns 0.
+    // https://github.com/openbsd/src/blob/6a233889798dc3ecb18acc52dce1e57862af2957/sys/arch/arm64/arm64/machdep.c#L371-L377
+    #[cfg_attr(target_os = "openbsd", cfg(test))]
+    {
+        // ID_AA64MMFR2_EL1, AArch64 Memory Model Feature Register 2
+        // https://developer.arm.com/documentation/ddi0601/2023-06/AArch64-Registers/ID-AA64MMFR2-EL1--AArch64-Memory-Model-Feature-Register-2?lang=en
+        if extract(aa64mmfr2, 35, 32) >= 1 {
+            info.set(CpuInfo::HAS_LSE2);
+        }
+    }
+}
+
+fn extract(x: u64, high: usize, low: usize) -> u64 {
+    (x >> low) & ((1 << (high - low + 1)) - 1)
+}
+
+#[cfg(not(any(target_os = "netbsd", target_os = "openbsd")))]
+mod imp {
+    // This module is test-only. See parent module docs for details.
+
+    #[cfg(not(portable_atomic_no_asm))]
+    use core::arch::asm;
+
+    use super::AA64Reg;
+
+    pub(super) fn aa64reg() -> AA64Reg {
+        // SAFETY: This is safe on FreeBSD 12.0+. FreeBSD 11 was EoL on 2021-09-30.
+        // Note that stdarch has been doing the same thing since before FreeBSD 11 was EoL.
+        // https://github.com/rust-lang/stdarch/pull/611
+        unsafe {
+            let aa64isar0: u64;
+            asm!(
+                "mrs {0}, ID_AA64ISAR0_EL1",
+                out(reg) aa64isar0,
+                options(pure, nomem, nostack, preserves_flags)
+            );
+            #[cfg(test)]
+            let aa64isar1: u64;
+            #[cfg(test)]
+            {
+                asm!(
+                    "mrs {0}, ID_AA64ISAR1_EL1",
+                    out(reg) aa64isar1,
+                    options(pure, nomem, nostack, preserves_flags)
+                );
+            }
+            let aa64mmfr2: u64;
+            asm!(
+                "mrs {0}, ID_AA64MMFR2_EL1",
+                out(reg) aa64mmfr2,
+                options(pure, nomem, nostack, preserves_flags)
+            );
+            AA64Reg {
+                aa64isar0,
+                #[cfg(test)]
+                aa64isar1,
+                aa64mmfr2,
+            }
+        }
+    }
+}
+#[cfg(target_os = "netbsd")]
+mod imp {
+    // NetBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl.
+    // https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb
+    // https://github.com/golang/sys/commit/ef9fd89ba245e184bdd308f7f2b4f3c551fa5b0f
+
+    use core::ptr;
+
+    use super::AA64Reg;
+
+    // core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
+    #[allow(non_camel_case_types)]
+    pub(super) mod ffi {
+        pub(crate) use super::super::c_types::{c_char, c_int, c_size_t, c_void};
+
+        extern "C" {
+            // Defined in sys/sysctl.h.
+            // https://man.netbsd.org/sysctl.3
+            // https://github.com/NetBSD/src/blob/167403557cf60bed09a63fc84d941a1a4bd7d52e/sys/sys/sysctl.h
+            // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/bsd/netbsdlike/netbsd/mod.rs#L2582
+            pub(crate) fn sysctlbyname(
+                name: *const c_char,
+                old_p: *mut c_void,
+                old_len_p: *mut c_size_t,
+                new_p: *const c_void,
+                new_len: c_size_t,
+            ) -> c_int;
+        }
+
+        // Defined in aarch64/armreg.h.
+        // https://github.com/NetBSD/src/blob/167403557cf60bed09a63fc84d941a1a4bd7d52e/sys/arch/aarch64/include/armreg.h#L1626
+        #[derive(Clone, Copy)]
+        #[repr(C)]
+        pub(crate) struct aarch64_sysctl_cpu_id {
+            // NetBSD 9.0+
+            // https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb
+            pub(crate) midr: u64,
+            pub(crate) revidr: u64,
+            pub(crate) mpidr: u64,
+            pub(crate) aa64dfr0: u64,
+            pub(crate) aa64dfr1: u64,
+            pub(crate) aa64isar0: u64,
+            pub(crate) aa64isar1: u64,
+            pub(crate) aa64mmfr0: u64,
+            pub(crate) aa64mmfr1: u64,
+            pub(crate) aa64mmfr2: u64,
+            pub(crate) aa64pfr0: u64,
+            pub(crate) aa64pfr1: u64,
+            pub(crate) aa64zfr0: u64,
+            pub(crate) mvfr0: u32,
+            pub(crate) mvfr1: u32,
+            pub(crate) mvfr2: u32,
+            // NetBSD 10.0+
+            // https://github.com/NetBSD/src/commit/0c7bdc13f0e332cccec56e307f023b4888638973
+            pub(crate) pad: u32,
+            pub(crate) clidr: u64,
+            pub(crate) ctr: u64,
+        }
+    }
+
+    pub(super) unsafe fn sysctl_cpu_id(name: &[u8]) -> Option<AA64Reg> {
+        const OUT_LEN: ffi::c_size_t =
+            core::mem::size_of::<ffi::aarch64_sysctl_cpu_id>() as ffi::c_size_t;
+
+        debug_assert_eq!(name.last(), Some(&0), "{:?}", name);
+        debug_assert_eq!(name.iter().filter(|&&v| v == 0).count(), 1, "{:?}", name);
+
+        // SAFETY: all fields of aarch64_sysctl_cpu_id are zero-able and we use
+        // the result when machdep.cpuN.cpu_id sysctl was successful.
+        let mut buf: ffi::aarch64_sysctl_cpu_id = unsafe { core::mem::zeroed() };
+        let mut out_len = OUT_LEN;
+        // SAFETY:
+        // - the caller must guarantee that `name` is ` machdep.cpuN.cpu_id` in a C string.
+        // - `out_len` does not exceed the size of the value at `buf`.
+        // - `sysctlbyname` is thread-safe.
+        let res = unsafe {
+            ffi::sysctlbyname(
+                name.as_ptr().cast::<ffi::c_char>(),
+                (&mut buf as *mut ffi::aarch64_sysctl_cpu_id).cast::<ffi::c_void>(),
+                &mut out_len,
+                ptr::null_mut(),
+                0,
+            )
+        };
+        if res != 0 {
+            return None;
+        }
+        Some(AA64Reg {
+            aa64isar0: buf.aa64isar0,
+            #[cfg(test)]
+            aa64isar1: buf.aa64isar1,
+            aa64mmfr2: buf.aa64mmfr2,
+        })
+    }
+
+    pub(super) fn aa64reg() -> AA64Reg {
+        // Get system registers for cpu0.
+        // If failed, returns default because machdep.cpuN.cpu_id sysctl is not available.
+        // machdep.cpuN.cpu_id sysctl was added on NetBSD 9.0 so it is not available on older versions.
+        // SAFETY: we passed a valid name in a C string.
+        // It is ok to check only cpu0, even if there are more CPUs.
+        // https://github.com/NetBSD/src/commit/bd9707e06ea7d21b5c24df6dfc14cb37c2819416
+        // https://github.com/golang/sys/commit/ef9fd89ba245e184bdd308f7f2b4f3c551fa5b0f
+        match unsafe { sysctl_cpu_id(b"machdep.cpu0.cpu_id\0") } {
+            Some(cpu_id) => cpu_id,
+            None => AA64Reg {
+                aa64isar0: 0,
+                #[cfg(test)]
+                aa64isar1: 0,
+                aa64mmfr2: 0,
+            },
+        }
+    }
+}
+#[cfg(target_os = "openbsd")]
+mod imp {
+    // OpenBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl.
+    // https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8
+    // https://github.com/golang/go/commit/cd54ef1f61945459486e9eea2f016d99ef1da925
+
+    use core::ptr;
+
+    use super::AA64Reg;
+
+    // core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
+    #[allow(non_camel_case_types)]
+    pub(super) mod ffi {
+        pub(crate) use super::super::c_types::{c_int, c_size_t, c_uint, c_void};
+
+        // Defined in sys/sysctl.h.
+        // https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/sys/sysctl.h#L82
+        pub(crate) const CTL_MACHDEP: c_int = 7;
+        // Defined in machine/cpu.h.
+        // https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/arch/arm64/include/cpu.h#L25-L40
+        pub(crate) const CPU_ID_AA64ISAR0: c_int = 2;
+        #[cfg(test)]
+        pub(crate) const CPU_ID_AA64ISAR1: c_int = 3;
+        // OpenBSD has an API to get this, but currently always returns 0.
+        // https://github.com/openbsd/src/blob/6a233889798dc3ecb18acc52dce1e57862af2957/sys/arch/arm64/arm64/machdep.c#L371-L377
+        #[cfg(test)]
+        pub(crate) const CPU_ID_AA64MMFR2: c_int = 7;
+
+        extern "C" {
+            // Defined in sys/sysctl.h.
+            // https://man.openbsd.org/sysctl.2
+            // https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/sys/sysctl.h
+            // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/bsd/netbsdlike/openbsd/mod.rs#L1817-L1824
+            pub(crate) fn sysctl(
+                name: *const c_int,
+                name_len: c_uint,
+                old_p: *mut c_void,
+                old_len_p: *mut c_size_t,
+                new_p: *mut c_void,
+                new_len: c_size_t,
+            ) -> c_int;
+        }
+    }
+
+    // ID_AA64ISAR0_EL1 and ID_AA64ISAR1_EL1 are supported on OpenBSD 7.1+.
+    // https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8
+    // Others are supported on OpenBSD 7.3+.
+    // https://github.com/openbsd/src/commit/c7654cd65262d532212f65123ee3905ba200365c
+    // sysctl returns an unsupported error if operation is not supported,
+    // so we can safely use this function on older versions of OpenBSD.
+    pub(super) fn aa64reg() -> AA64Reg {
+        let aa64isar0 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR0]).unwrap_or(0);
+        #[cfg(test)]
+        let aa64isar1 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR1]).unwrap_or(0);
+        #[cfg(test)]
+        let aa64mmfr2 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64MMFR2]).unwrap_or(0);
+        AA64Reg {
+            aa64isar0,
+            #[cfg(test)]
+            aa64isar1,
+            #[cfg(test)]
+            aa64mmfr2,
+        }
+    }
+
+    fn sysctl64(mib: &[ffi::c_int]) -> Option<u64> {
+        const OUT_LEN: ffi::c_size_t = core::mem::size_of::<u64>() as ffi::c_size_t;
+        let mut out = 0_u64;
+        let mut out_len = OUT_LEN;
+        #[allow(clippy::cast_possible_truncation)]
+        // SAFETY:
+        // - `mib.len()` does not exceed the size of `mib`.
+        // - `out_len` does not exceed the size of `out`.
+        // - `sysctl` is thread-safe.
+        let res = unsafe {
+            ffi::sysctl(
+                mib.as_ptr(),
+                mib.len() as ffi::c_uint,
+                (&mut out as *mut u64).cast::<ffi::c_void>(),
+                &mut out_len,
+                ptr::null_mut(),
+                0,
+            )
+        };
+        if res == -1 {
+            return None;
+        }
+        debug_assert_eq!(out_len, OUT_LEN);
+        Some(out)
+    }
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests {
+    use std::{
+        process::Command,
+        string::{String, ToString},
+    };
+
+    use super::*;
+
+    #[test]
+    fn test_aa64reg() {
+        let AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } = imp::aa64reg();
+        std::eprintln!("aa64isar0={}", aa64isar0);
+        std::eprintln!("aa64isar1={}", aa64isar1);
+        std::eprintln!("aa64mmfr2={}", aa64mmfr2);
+        if cfg!(target_os = "openbsd") {
+            let output = Command::new("sysctl").arg("machdep").output().unwrap();
+            assert!(output.status.success());
+            let stdout = String::from_utf8(output.stdout).unwrap();
+            // OpenBSD 7.1+
+            assert_eq!(
+                stdout.lines().find_map(|s| s.strip_prefix("machdep.id_aa64isar0=")).unwrap_or("0"),
+                aa64isar0.to_string(),
+            );
+            assert_eq!(
+                stdout.lines().find_map(|s| s.strip_prefix("machdep.id_aa64isar1=")).unwrap_or("0"),
+                aa64isar1.to_string(),
+            );
+            // OpenBSD 7.3+
+            assert_eq!(
+                stdout.lines().find_map(|s| s.strip_prefix("machdep.id_aa64mmfr2=")).unwrap_or("0"),
+                aa64mmfr2.to_string(),
+            );
+        }
+        if detect().test(CpuInfo::HAS_LSE) {
+            let atomic = extract(aa64isar0, 23, 20);
+            if detect().test(CpuInfo::HAS_LSE128) {
+                assert_eq!(atomic, 3);
+            } else {
+                assert_eq!(atomic, 2);
+            }
+        }
+        if detect().test(CpuInfo::HAS_LSE2) {
+            assert_eq!(extract(aa64mmfr2, 35, 32), 1);
+        }
+        if detect().test(CpuInfo::HAS_RCPC3) {
+            assert_eq!(extract(aa64isar1, 23, 20), 3);
+        }
+    }
+
+    #[allow(clippy::cast_possible_wrap)]
+    #[cfg(target_os = "netbsd")]
+    #[test]
+    fn test_netbsd() {
+        use c_types::*;
+        use core::{arch::asm, mem, ptr};
+        use imp::ffi;
+        use test_helper::sys;
+
+        // Call syscall using asm instead of libc.
+        // Note that NetBSD does not guarantee the stability of raw syscall as
+        // much as Linux does (It may actually be stable enough, though: https://lists.llvm.org/pipermail/llvm-dev/2019-June/133393.html).
+        //
+        // This is currently used only for testing.
+        unsafe fn sysctl_cpu_id_asm_syscall(name: &[&[u8]]) -> Result<AA64Reg, c_int> {
+            // https://github.com/golang/go/blob/4badad8d477ffd7a6b762c35bc69aed82faface7/src/syscall/asm_netbsd_arm64.s
+            #[inline]
+            unsafe fn sysctl(
+                name: *const c_int,
+                name_len: c_uint,
+                old_p: *mut c_void,
+                old_len_p: *mut c_size_t,
+                new_p: *const c_void,
+                new_len: c_size_t,
+            ) -> Result<c_int, c_int> {
+                #[allow(clippy::cast_possible_truncation)]
+                // SAFETY: the caller must uphold the safety contract.
+                unsafe {
+                    let mut n = sys::SYS___sysctl as u64;
+                    let r: i64;
+                    asm!(
+                        "svc 0",
+                        "b.cc 2f",
+                        "mov x17, x0",
+                        "mov x0, #-1",
+                        "2:",
+                        inout("x17") n,
+                        inout("x0") ptr_reg!(name) => r,
+                        inout("x1") name_len as u64 => _,
+                        in("x2") ptr_reg!(old_p),
+                        in("x3") ptr_reg!(old_len_p),
+                        in("x4") ptr_reg!(new_p),
+                        in("x5") new_len as u64,
+                        options(nostack),
+                    );
+                    if r as c_int == -1 {
+                        Err(n as c_int)
+                    } else {
+                        Ok(r as c_int)
+                    }
+                }
+            }
+
+            // https://github.com/golang/sys/blob/4badad8d477ffd7a6b762c35bc69aed82faface7/cpu/cpu_netbsd_arm64.go.
+            use std::{vec, vec::Vec};
+            fn sysctl_nodes(mib: &mut Vec<i32>) -> Result<Vec<sys::sysctlnode>, i32> {
+                mib.push(sys::CTL_QUERY);
+                let mut q_node = sys::sysctlnode {
+                    sysctl_flags: sys::SYSCTL_VERS_1,
+                    ..unsafe { mem::zeroed() }
+                };
+                let qp = (&mut q_node as *mut sys::sysctlnode).cast::<ffi::c_void>();
+                let sz = mem::size_of::<sys::sysctlnode>();
+                let mut olen = 0;
+                #[allow(clippy::cast_possible_truncation)]
+                unsafe {
+                    sysctl(mib.as_ptr(), mib.len() as c_uint, ptr::null_mut(), &mut olen, qp, sz)?;
+                }
+
+                let mut nodes = Vec::<sys::sysctlnode>::with_capacity(olen / sz);
+                let np = nodes.as_mut_ptr().cast::<ffi::c_void>();
+                #[allow(clippy::cast_possible_truncation)]
+                unsafe {
+                    sysctl(mib.as_ptr(), mib.len() as c_uint, np, &mut olen, qp, sz)?;
+                    nodes.set_len(olen / sz);
+                }
+
+                mib.pop(); // pop CTL_QUERY
+                Ok(nodes)
+            }
+            fn name_to_mib(parts: &[&[u8]]) -> Result<Vec<i32>, i32> {
+                let mut mib = vec![];
+                for (part_no, &part) in parts.iter().enumerate() {
+                    let nodes = sysctl_nodes(&mut mib)?;
+                    for node in nodes {
+                        let mut n = vec![];
+                        for b in node.sysctl_name {
+                            if b != 0 {
+                                n.push(b);
+                            }
+                        }
+                        if n == part {
+                            mib.push(node.sysctl_num);
+                            break;
+                        }
+                    }
+                    if mib.len() != part_no + 1 {
+                        return Err(0);
+                    }
+                }
+
+                Ok(mib)
+            }
+
+            const OUT_LEN: ffi::c_size_t =
+                core::mem::size_of::<ffi::aarch64_sysctl_cpu_id>() as ffi::c_size_t;
+
+            let mib = name_to_mib(name)?;
+
+            let mut buf: ffi::aarch64_sysctl_cpu_id = unsafe { core::mem::zeroed() };
+            let mut out_len = OUT_LEN;
+            #[allow(clippy::cast_possible_truncation)]
+            unsafe {
+                sysctl(
+                    mib.as_ptr(),
+                    mib.len() as c_uint,
+                    (&mut buf as *mut ffi::aarch64_sysctl_cpu_id).cast::<ffi::c_void>(),
+                    &mut out_len,
+                    ptr::null_mut(),
+                    0,
+                )?;
+            }
+            Ok(AA64Reg {
+                aa64isar0: buf.aa64isar0,
+                #[cfg(test)]
+                aa64isar1: buf.aa64isar1,
+                #[cfg(test)]
+                aa64mmfr2: buf.aa64mmfr2,
+            })
+        }
+
+        unsafe {
+            assert_eq!(
+                imp::sysctl_cpu_id(b"machdep.cpu0.cpu_id\0").unwrap(),
+                sysctl_cpu_id_asm_syscall(&[b"machdep", b"cpu0", b"cpu_id"]).unwrap()
+            );
+        }
+    }
+
+    // Static assertions for FFI bindings.
+    // This checks that FFI bindings defined in this crate, FFI bindings defined
+    // in libc, and FFI bindings generated for the platform's latest header file
+    // using bindgen have compatible signatures (or the same values if constants).
+    // Since this is static assertion, we can detect problems with
+    // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
+    // without actually running tests on these platforms.
+    // See also tools/codegen/src/ffi.rs.
+    // TODO(codegen): auto-generate this test
+    #[cfg(target_os = "netbsd")]
+    #[allow(
+        clippy::cast_possible_wrap,
+        clippy::cast_sign_loss,
+        clippy::no_effect_underscore_binding,
+        clippy::used_underscore_binding
+    )]
+    const _: fn() = || {
+        use core::mem::size_of;
+        use imp::ffi;
+        use test_helper::{libc, sys};
+        let mut _sysctlbyname: unsafe extern "C" fn(
+            *const ffi::c_char,
+            *mut ffi::c_void,
+            *mut ffi::c_size_t,
+            *const ffi::c_void,
+            ffi::c_size_t,
+        ) -> ffi::c_int = ffi::sysctlbyname;
+        _sysctlbyname = libc::sysctlbyname;
+        _sysctlbyname = sys::sysctlbyname;
+        // libc doesn't have this
+        // static_assert!(
+        //     size_of::<ffi::aarch64_sysctl_cpu_id>() == size_of::<libc::aarch64_sysctl_cpu_id>()
+        // );
+        static_assert!(
+            size_of::<ffi::aarch64_sysctl_cpu_id>() == size_of::<sys::aarch64_sysctl_cpu_id>()
+        );
+        let ffi: ffi::aarch64_sysctl_cpu_id = unsafe { core::mem::zeroed() };
+        let _ = sys::aarch64_sysctl_cpu_id {
+            ac_midr: ffi.midr,
+            ac_revidr: ffi.revidr,
+            ac_mpidr: ffi.mpidr,
+            ac_aa64dfr0: ffi.aa64dfr0,
+            ac_aa64dfr1: ffi.aa64dfr1,
+            ac_aa64isar0: ffi.aa64isar0,
+            ac_aa64isar1: ffi.aa64isar1,
+            ac_aa64mmfr0: ffi.aa64mmfr0,
+            ac_aa64mmfr1: ffi.aa64mmfr1,
+            ac_aa64mmfr2: ffi.aa64mmfr2,
+            ac_aa64pfr0: ffi.aa64pfr0,
+            ac_aa64pfr1: ffi.aa64pfr1,
+            ac_aa64zfr0: ffi.aa64zfr0,
+            ac_mvfr0: ffi.mvfr0,
+            ac_mvfr1: ffi.mvfr1,
+            ac_mvfr2: ffi.mvfr2,
+            ac_pad: ffi.pad,
+            ac_clidr: ffi.clidr,
+            ac_ctr: ffi.ctr,
+        };
+    };
+    #[cfg(target_os = "openbsd")]
+    #[allow(
+        clippy::cast_possible_wrap,
+        clippy::cast_sign_loss,
+        clippy::no_effect_underscore_binding
+    )]
+    const _: fn() = || {
+        use imp::ffi;
+        use test_helper::{libc, sys};
+        let mut _sysctl: unsafe extern "C" fn(
+            *const ffi::c_int,
+            ffi::c_uint,
+            *mut ffi::c_void,
+            *mut ffi::c_size_t,
+            *mut ffi::c_void,
+            ffi::c_size_t,
+        ) -> ffi::c_int = ffi::sysctl;
+        _sysctl = libc::sysctl;
+        _sysctl = sys::sysctl;
+        static_assert!(ffi::CTL_MACHDEP == libc::CTL_MACHDEP);
+        static_assert!(ffi::CTL_MACHDEP == sys::CTL_MACHDEP as ffi::c_int);
+        // static_assert!(ffi::CPU_ID_AA64ISAR0 == libc::CPU_ID_AA64ISAR0); // libc doesn't have this
+        static_assert!(ffi::CPU_ID_AA64ISAR0 == sys::CPU_ID_AA64ISAR0 as ffi::c_int);
+        // static_assert!(ffi::CPU_ID_AA64ISAR1 == libc::CPU_ID_AA64ISAR1); // libc doesn't have this
+        static_assert!(ffi::CPU_ID_AA64ISAR1 == sys::CPU_ID_AA64ISAR1 as ffi::c_int);
+        // static_assert!(ffi::CPU_ID_AA64MMFR2 == libc::CPU_ID_AA64MMFR2); // libc doesn't have this
+        static_assert!(ffi::CPU_ID_AA64MMFR2 == sys::CPU_ID_AA64MMFR2 as ffi::c_int);
+    };
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_fuchsia.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_fuchsia.rs
new file mode 100644
index 0000000..978418c
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_fuchsia.rs
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Run-time feature detection on aarch64 Fuchsia by using zx_system_get_features.
+//
+// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection on Fuchsia.
+// https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/mod.rs
+//
+// Refs:
+// - https://fuchsia.dev/fuchsia-src/reference/syscalls/system_get_features
+// - https://github.com/llvm/llvm-project/commit/4e731abc55681751b5d736b613f7720e50eb1ad4
+
+include!("common.rs");
+
+#[allow(non_camel_case_types)]
+mod ffi {
+    // https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/types.h
+    pub(crate) type zx_status_t = i32;
+
+    // https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/errors.h
+    pub(crate) const ZX_OK: zx_status_t = 0;
+    // https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/features.h
+    pub(crate) const ZX_FEATURE_KIND_CPU: u32 = 0;
+    pub(crate) const ZX_ARM64_FEATURE_ISA_ATOMICS: u32 = 1 << 8;
+
+    #[link(name = "zircon")]
+    extern "C" {
+        // https://fuchsia.dev/fuchsia-src/reference/syscalls/system_get_features
+        pub(crate) fn zx_system_get_features(kind: u32, features: *mut u32) -> zx_status_t;
+    }
+}
+
+fn zx_system_get_features(kind: u32) -> u32 {
+    let mut out = 0_u32;
+    // SAFETY: the pointer is valid because we got it from a reference.
+    let res = unsafe { ffi::zx_system_get_features(kind, &mut out) };
+    if res != ffi::ZX_OK {
+        return 0;
+    }
+    out
+}
+
+#[cold]
+fn _detect(info: &mut CpuInfo) {
+    let features = zx_system_get_features(ffi::ZX_FEATURE_KIND_CPU);
+    if features & ffi::ZX_ARM64_FEATURE_ISA_ATOMICS != 0 {
+        info.set(CpuInfo::HAS_LSE);
+    }
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_fuchsia() {
+        let features = zx_system_get_features(ffi::ZX_FEATURE_KIND_CPU);
+        assert_ne!(features, 0);
+        std::eprintln!("features: {:b}", features);
+    }
+
+    // Static assertions for FFI bindings.
+    // This checks that FFI bindings defined in this crate and FFI bindings
+    // generated for the platform's latest header file using bindgen have
+    // compatible signatures (or the same values if constants).
+    // Since this is static assertion, we can detect problems with
+    // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
+    // without actually running tests on these platforms.
+    // See also tools/codegen/src/ffi.rs.
+    // TODO(codegen): auto-generate this test
+    #[allow(
+        clippy::cast_possible_wrap,
+        clippy::cast_sign_loss,
+        clippy::cast_possible_truncation,
+        clippy::no_effect_underscore_binding
+    )]
+    const _: fn() = || {
+        use test_helper::sys;
+        // TODO(codegen): zx_system_get_features
+        let _: ffi::zx_status_t = 0 as sys::zx_status_t;
+        static_assert!(ffi::ZX_OK == sys::ZX_OK as ffi::zx_status_t);
+        static_assert!(ffi::ZX_FEATURE_KIND_CPU == sys::ZX_FEATURE_KIND_CPU);
+        static_assert!(ffi::ZX_ARM64_FEATURE_ISA_ATOMICS == sys::ZX_ARM64_FEATURE_ISA_ATOMICS);
+    };
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_macos.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_macos.rs
new file mode 100644
index 0000000..d6bf9d0
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_macos.rs
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Run-time feature detection on aarch64 macOS by using sysctl.
+//
+// This module is currently only enabled on tests because aarch64 macOS always supports FEAT_LSE and FEAT_LSE2.
+// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/include/llvm/TargetParser/AArch64TargetParser.h#L494
+//
+// If macOS supporting Armv9.4-a becomes popular in the future, this module will
+// be used to support outline-atomics for FEAT_LSE128/FEAT_LRCPC3.
+//
+// Refs: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
+//
+// Note that iOS doesn't support sysctl:
+// - https://developer.apple.com/forums/thread/9440
+// - https://nabla-c0d3.github.io/blog/2015/06/16/ios9-security-privacy
+
+include!("common.rs");
+
+use core::ptr;
+
+// core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
+#[allow(non_camel_case_types)]
+mod ffi {
+    pub(crate) use super::c_types::{c_char, c_int, c_size_t, c_void};
+
+    extern "C" {
+        // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname
+        // https://github.com/apple-oss-distributions/xnu/blob/5c2921b07a2480ab43ec66f5b9e41cb872bc554f/bsd/sys/sysctl.h
+        // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/bsd/apple/mod.rs#L5167-L5173
+        pub(crate) fn sysctlbyname(
+            name: *const c_char,
+            old_p: *mut c_void,
+            old_len_p: *mut c_size_t,
+            new_p: *mut c_void,
+            new_len: c_size_t,
+        ) -> c_int;
+    }
+}
+
+unsafe fn sysctlbyname32(name: &[u8]) -> Option<u32> {
+    const OUT_LEN: ffi::c_size_t = core::mem::size_of::<u32>() as ffi::c_size_t;
+
+    debug_assert_eq!(name.last(), Some(&0), "{:?}", name);
+    debug_assert_eq!(name.iter().filter(|&&v| v == 0).count(), 1, "{:?}", name);
+
+    let mut out = 0_u32;
+    let mut out_len = OUT_LEN;
+    // SAFETY:
+    // - the caller must guarantee that `name` a valid C string.
+    // - `out_len` does not exceed the size of `out`.
+    // - `sysctlbyname` is thread-safe.
+    let res = unsafe {
+        ffi::sysctlbyname(
+            name.as_ptr().cast::<ffi::c_char>(),
+            (&mut out as *mut u32).cast::<ffi::c_void>(),
+            &mut out_len,
+            ptr::null_mut(),
+            0,
+        )
+    };
+    if res != 0 {
+        return None;
+    }
+    debug_assert_eq!(out_len, OUT_LEN);
+    Some(out)
+}
+
+#[cold]
+fn _detect(info: &mut CpuInfo) {
+    // hw.optional.armv8_1_atomics is available on macOS 11+ (note: aarch64 support was added on macOS 11),
+    // hw.optional.arm.FEAT_* are only available on macOS 12+.
+    // Query both names in case future versions of macOS remove the old name.
+    // https://github.com/golang/go/commit/c15593197453b8bf90fc3a9080ba2afeaf7934ea
+    // https://github.com/google/boringssl/commit/91e0b11eba517d83b910b20fe3740eeb39ecb37e
+    // SAFETY: we passed a valid C string.
+    if unsafe {
+        sysctlbyname32(b"hw.optional.arm.FEAT_LSE\0").unwrap_or(0) != 0
+            || sysctlbyname32(b"hw.optional.armv8_1_atomics\0").unwrap_or(0) != 0
+    } {
+        info.set(CpuInfo::HAS_LSE);
+    }
+    // SAFETY: we passed a valid C string.
+    if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LSE2\0").unwrap_or(0) != 0 } {
+        info.set(CpuInfo::HAS_LSE2);
+    }
+    // we currently only use FEAT_LSE and FEAT_LSE2 in outline-atomics.
+    #[cfg(test)]
+    {
+        // SAFETY: we passed a valid C string.
+        if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LSE128\0").unwrap_or(0) != 0 } {
+            info.set(CpuInfo::HAS_LSE128);
+        }
+        // SAFETY: we passed a valid C string.
+        if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC3\0").unwrap_or(0) != 0 } {
+            info.set(CpuInfo::HAS_RCPC3);
+        }
+    }
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_macos() {
+        unsafe {
+            assert_eq!(sysctlbyname32(b"hw.optional.armv8_1_atomics\0"), Some(1));
+            assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LSE\0"), Some(1));
+            assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LSE2\0"), Some(1));
+            assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LSE128\0"), None);
+            assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::NotFound);
+            assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC\0"), Some(1));
+            assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC2\0"), Some(1));
+            assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC3\0"), None);
+            assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::NotFound);
+        }
+    }
+
+    // Static assertions for FFI bindings.
+    // This checks that FFI bindings defined in this crate, FFI bindings defined
+    // in libc, and FFI bindings generated for the platform's latest header file
+    // using bindgen have compatible signatures (or the same values if constants).
+    // Since this is static assertion, we can detect problems with
+    // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
+    // without actually running tests on these platforms.
+    // See also tools/codegen/src/ffi.rs.
+    // TODO(codegen): auto-generate this test
+    #[allow(
+        clippy::cast_possible_wrap,
+        clippy::cast_sign_loss,
+        clippy::no_effect_underscore_binding
+    )]
+    const _: fn() = || {
+        use test_helper::{libc, sys};
+        let mut _sysctlbyname: unsafe extern "C" fn(
+            *const ffi::c_char,
+            *mut ffi::c_void,
+            *mut ffi::c_size_t,
+            *mut ffi::c_void,
+            ffi::c_size_t,
+        ) -> ffi::c_int = ffi::sysctlbyname;
+        _sysctlbyname = libc::sysctlbyname;
+        _sysctlbyname = sys::sysctlbyname;
+    };
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_windows.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_windows.rs
new file mode 100644
index 0000000..6ace866
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_windows.rs
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Run-time feature detection on aarch64 Windows by using IsProcessorFeaturePresent.
+//
+// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection of FEAT_LSE on Windows.
+// https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/windows/aarch64.rs
+// https://github.com/rust-lang/stdarch/pull/1373
+//
+// Refs: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
+
+include!("common.rs");
+
+// windows-sys requires Rust 1.56
+#[allow(clippy::upper_case_acronyms)]
+mod ffi {
+    pub(crate) type DWORD = u32;
+    pub(crate) type BOOL = i32;
+
+    pub(crate) const FALSE: BOOL = 0;
+    // Defined in winnt.h of Windows SDK.
+    pub(crate) const PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE: DWORD = 34;
+
+    extern "system" {
+        // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
+        pub(crate) fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL;
+    }
+}
+
+#[cold]
+fn _detect(info: &mut CpuInfo) {
+    // SAFETY: calling IsProcessorFeaturePresent is safe, and FALSE is also
+    // returned if the HAL does not support detection of the specified feature.
+    if unsafe {
+        ffi::IsProcessorFeaturePresent(ffi::PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != ffi::FALSE
+    } {
+        info.set(CpuInfo::HAS_LSE);
+    }
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // Static assertions for FFI bindings.
+    // This checks that FFI bindings defined in this crate and FFI bindings defined
+    // in windows-sys have compatible signatures (or the same values if constants).
+    // Since this is static assertion, we can detect problems with
+    // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
+    // without actually running tests on these platforms.
+    // (Unlike libc, windows-sys programmatically generates bindings from Windows
+    // API metadata, so it should be enough to check compatibility with the
+    // windows-sys' signatures/values.)
+    // See also tools/codegen/src/ffi.rs.
+    // TODO(codegen): auto-generate this test
+    #[allow(
+        clippy::cast_possible_wrap,
+        clippy::cast_sign_loss,
+        clippy::cast_possible_truncation,
+        clippy::no_effect_underscore_binding
+    )]
+    const _: fn() = || {
+        use test_helper::windows_sys;
+        let _: ffi::DWORD = 0 as windows_sys::Win32::System::Threading::PROCESSOR_FEATURE_ID;
+        let _: ffi::BOOL = 0 as windows_sys::Win32::Foundation::BOOL;
+        let mut _sysctl: unsafe extern "system" fn(ffi::DWORD) -> ffi::BOOL =
+            ffi::IsProcessorFeaturePresent;
+        _sysctl = windows_sys::Win32::System::Threading::IsProcessorFeaturePresent;
+        static_assert!(ffi::FALSE == windows_sys::Win32::Foundation::FALSE);
+        static_assert!(
+            ffi::PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
+                == windows_sys::Win32::System::Threading::PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
+        );
+    };
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/auxv.rs b/vendor/portable-atomic/src/imp/atomic128/detect/auxv.rs
new file mode 100644
index 0000000..1be3095
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/auxv.rs
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Run-time feature detection on aarch64/powerpc64 Linux/Android/FreeBSD by parsing ELF auxiliary vectors.
+//
+// # Linux/Android
+//
+// As of nightly-2023-01-23, is_aarch64_feature_detected always uses dlsym by default
+// on aarch64 Linux/Android, but on the following platforms, so we can safely assume
+// getauxval is linked to the binary.
+//
+// - On glibc (*-linux-gnu*), [aarch64 support is available on glibc 2.17+](https://sourceware.org/legacy-ml/libc-announce/2012/msg00001.html)
+//   and is newer than [glibc 2.16 that added getauxval](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html).
+// - On musl (*-linux-musl*, *-linux-ohos*), [aarch64 support is available on musl 1.1.7+](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.7#n1422)
+//   and is newer than [musl 1.1.0 that added getauxval](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.0#n1197).
+//   https://github.com/rust-lang/rust/commit/9a04ae4997493e9260352064163285cddc43de3c
+// - On bionic (*-android*), [64-bit architecture support is available on Android 5.0+ (API level 21+)](https://android-developers.googleblog.com/2014/10/whats-new-in-android-50-lollipop.html)
+//   and is newer than [Android 4.3 (API level 18) that added getauxval](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49).
+//
+// However, on musl with static linking, it seems that getauxval is not always available, independent of version requirements: https://github.com/rust-lang/rust/issues/89626
+// (That problem may have been fixed in https://github.com/rust-lang/rust/commit/9a04ae4997493e9260352064163285cddc43de3c,
+// but even in the version containing that patch, [there is report](https://github.com/rust-lang/rust/issues/89626#issuecomment-1242636038)
+// of the same error.)
+//
+// On other Linux targets, we cannot assume that getauxval is always available, so we don't enable
+// outline-atomics by default (can be enabled by `--cfg portable_atomic_outline_atomics`).
+//
+// - On musl with static linking. See the above for more.
+//   Also, in this case, dlsym(getauxval) always returns null.
+// - On uClibc-ng (*-linux-uclibc*, *-l4re-uclibc*), [uClibc-ng 1.0.43 (released in 2023-04-05) added getauxval](https://github.com/wbx-github/uclibc-ng/commit/d869bb1600942c01a77539128f9ba5b5b55ad647).
+// - On Picolibc, [Picolibc 1.4.6 added getauxval stub](https://github.com/picolibc/picolibc#picolibc-version-146).
+//
+// See also https://github.com/rust-lang/stdarch/pull/1375
+//
+// See tests::test_linux_like and aarch64_aa64reg.rs for (test-only) alternative implementations.
+//
+// # FreeBSD
+//
+// As of nightly-2023-01-23, is_aarch64_feature_detected always uses mrs on
+// aarch64 FreeBSD. However, they do not work on FreeBSD 12 on QEMU (confirmed
+// on FreeBSD 12.{2,3,4}), and we got SIGILL (worked on FreeBSD 13 and 14).
+//
+// So use elf_aux_info instead of mrs like compiler-rt does.
+// https://man.freebsd.org/elf_aux_info(3)
+// https://reviews.llvm.org/D109330
+//
+// elf_aux_info is available on FreeBSD 12.0+ and 11.4+:
+// https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470
+// https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h
+// On FreeBSD, [aarch64 support is available on FreeBSD 11.0+](https://www.freebsd.org/releases/11.0R/relnotes/#hardware-arm),
+// but FreeBSD 11 (11.4) was EoL on 2021-09-30, and FreeBSD 11.3 was EoL on 2020-09-30:
+// https://www.freebsd.org/security/unsupported
+// See also https://github.com/rust-lang/stdarch/pull/611#issuecomment-445464613
+//
+// See tests::test_freebsd and aarch64_aa64reg.rs for (test-only) alternative implementations.
+//
+// # PowerPC64
+//
+// On PowerPC64, outline-atomics is currently disabled by default mainly for
+// compatibility with older versions of operating systems
+// (can be enabled by `--cfg portable_atomic_outline_atomics`).
+
+include!("common.rs");
+
+use os::ffi;
+#[cfg(any(target_os = "linux", target_os = "android"))]
+mod os {
+    // core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
+    #[cfg_attr(test, allow(dead_code))]
+    pub(super) mod ffi {
+        pub(crate) use super::super::c_types::c_ulong;
+        #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+        pub(crate) use super::super::c_types::{c_char, c_int};
+
+        extern "C" {
+            // https://man7.org/linux/man-pages/man3/getauxval.3.html
+            // https://github.com/bminor/glibc/blob/801af9fafd4689337ebf27260aa115335a0cb2bc/misc/sys/auxv.h
+            // https://github.com/bminor/musl/blob/7d756e1c04de6eb3f2b3d3e1141a218bb329fcfb/include/sys/auxv.h
+            // https://github.com/wbx-github/uclibc-ng/blob/cdb07d2cd52af39feb425e6d36c02b30916b9f0a/include/sys/auxv.h
+            // https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h
+            // https://github.com/picolibc/picolibc/blob/7a8a58aeaa5946cb662577a518051091b691af3a/newlib/libc/picolib/getauxval.c
+            // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/linux/gnu/mod.rs#L1201
+            // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/linux/musl/mod.rs#L744
+            // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/android/b64/mod.rs#L333
+            pub(crate) fn getauxval(type_: c_ulong) -> c_ulong;
+
+            // Defined in sys/system_properties.h.
+            // https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/system_properties.h
+            // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/android/mod.rs#L3471
+            #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+            pub(crate) fn __system_property_get(name: *const c_char, value: *mut c_char) -> c_int;
+        }
+
+        // https://github.com/torvalds/linux/blob/v6.1/include/uapi/linux/auxvec.h
+        #[cfg(any(test, target_arch = "aarch64"))]
+        pub(crate) const AT_HWCAP: c_ulong = 16;
+        #[cfg(any(test, target_arch = "powerpc64"))]
+        pub(crate) const AT_HWCAP2: c_ulong = 26;
+
+        // Defined in sys/system_properties.h.
+        // https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/system_properties.h
+        #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+        pub(crate) const PROP_VALUE_MAX: c_int = 92;
+    }
+
+    pub(super) fn getauxval(type_: ffi::c_ulong) -> ffi::c_ulong {
+        #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+        {
+            // Samsung Exynos 9810 has a bug that big and little cores have different
+            // ISAs. And on older Android (pre-9), the kernel incorrectly reports
+            // that features available only on some cores are available on all cores.
+            // https://reviews.llvm.org/D114523
+            let mut arch = [0_u8; ffi::PROP_VALUE_MAX as usize];
+            // SAFETY: we've passed a valid C string and a buffer with max length.
+            let len = unsafe {
+                ffi::__system_property_get(
+                    b"ro.arch\0".as_ptr().cast::<ffi::c_char>(),
+                    arch.as_mut_ptr().cast::<ffi::c_char>(),
+                )
+            };
+            // On Exynos, ro.arch is not available on Android 12+, but it is fine
+            // because Android 9+ includes the fix.
+            if len > 0 && arch.starts_with(b"exynos9810") {
+                return 0;
+            }
+        }
+
+        // SAFETY: `getauxval` is thread-safe. See also the module level docs.
+        unsafe { ffi::getauxval(type_) }
+    }
+}
+#[cfg(target_os = "freebsd")]
+mod os {
+    // core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
+    #[cfg_attr(test, allow(dead_code))]
+    pub(super) mod ffi {
+        pub(crate) use super::super::c_types::{c_int, c_ulong, c_void};
+
+        extern "C" {
+            // Defined in sys/auxv.h.
+            // https://man.freebsd.org/elf_aux_info(3)
+            // https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/sys/auxv.h
+            pub(crate) fn elf_aux_info(aux: c_int, buf: *mut c_void, buf_len: c_int) -> c_int;
+        }
+
+        // Defined in sys/elf_common.h.
+        // https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/sys/elf_common.h
+        #[cfg(any(test, target_arch = "aarch64"))]
+        pub(crate) const AT_HWCAP: c_int = 25;
+        #[cfg(any(test, target_arch = "powerpc64"))]
+        pub(crate) const AT_HWCAP2: c_int = 26;
+    }
+
+    pub(super) fn getauxval(aux: ffi::c_int) -> ffi::c_ulong {
+        #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)]
+        const OUT_LEN: ffi::c_int = core::mem::size_of::<ffi::c_ulong>() as ffi::c_int;
+        let mut out: ffi::c_ulong = 0;
+        // SAFETY:
+        // - the pointer is valid because we got it from a reference.
+        // - `OUT_LEN` is the same as the size of `out`.
+        // - `elf_aux_info` is thread-safe.
+        unsafe {
+            let res = ffi::elf_aux_info(
+                aux,
+                (&mut out as *mut ffi::c_ulong).cast::<ffi::c_void>(),
+                OUT_LEN,
+            );
+            // If elf_aux_info fails, `out` will be left at zero (which is the proper default value).
+            debug_assert!(res == 0 || out == 0);
+        }
+        out
+    }
+}
+
+// Basically, Linux and FreeBSD use the same hwcap values.
+// FreeBSD supports a subset of the hwcap values supported by Linux.
+use arch::_detect;
+#[cfg(target_arch = "aarch64")]
+mod arch {
+    use super::{ffi, os, CpuInfo};
+
+    // Linux
+    // https://github.com/torvalds/linux/blob/1c41041124bd14dd6610da256a3da4e5b74ce6b1/arch/arm64/include/uapi/asm/hwcap.h
+    // FreeBSD
+    // Defined in machine/elf.h.
+    // https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/arm64/include/elf.h
+    // available on FreeBSD 13.0+ and 12.2+
+    // https://github.com/freebsd/freebsd-src/blob/release/13.0.0/sys/arm64/include/elf.h
+    // https://github.com/freebsd/freebsd-src/blob/release/12.2.0/sys/arm64/include/elf.h
+    pub(super) const HWCAP_ATOMICS: ffi::c_ulong = 1 << 8;
+    pub(super) const HWCAP_USCAT: ffi::c_ulong = 1 << 25;
+    #[cfg(any(target_os = "linux", target_os = "android"))]
+    #[cfg(target_pointer_width = "64")]
+    #[cfg(test)]
+    pub(super) const HWCAP2_LRCPC3: ffi::c_ulong = 1 << 46;
+    #[cfg(any(target_os = "linux", target_os = "android"))]
+    #[cfg(target_pointer_width = "64")]
+    #[cfg(test)]
+    pub(super) const HWCAP2_LSE128: ffi::c_ulong = 1 << 47;
+
+    #[cold]
+    pub(super) fn _detect(info: &mut CpuInfo) {
+        let hwcap = os::getauxval(ffi::AT_HWCAP);
+
+        if hwcap & HWCAP_ATOMICS != 0 {
+            info.set(CpuInfo::HAS_LSE);
+        }
+        if hwcap & HWCAP_USCAT != 0 {
+            info.set(CpuInfo::HAS_LSE2);
+        }
+        #[cfg(any(target_os = "linux", target_os = "android"))]
+        #[cfg(target_pointer_width = "64")]
+        #[cfg(test)]
+        {
+            let hwcap2 = os::getauxval(ffi::AT_HWCAP2);
+            if hwcap2 & HWCAP2_LRCPC3 != 0 {
+                info.set(CpuInfo::HAS_RCPC3);
+            }
+            if hwcap2 & HWCAP2_LSE128 != 0 {
+                info.set(CpuInfo::HAS_LSE128);
+            }
+        }
+    }
+}
+#[cfg(target_arch = "powerpc64")]
+mod arch {
+    use super::{ffi, os, CpuInfo};
+
+    // Linux
+    // https://github.com/torvalds/linux/blob/v6.1/arch/powerpc/include/uapi/asm/cputable.h
+    // FreeBSD
+    // Defined in machine/cpu.h.
+    // https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/powerpc/include/cpu.h
+    // available on FreeBSD 11.0+
+    // https://github.com/freebsd/freebsd-src/commit/b0bf7fcd298133457991b27625bbed766e612730
+    pub(super) const PPC_FEATURE2_ARCH_2_07: ffi::c_ulong = 0x80000000;
+
+    #[cold]
+    pub(super) fn _detect(info: &mut CpuInfo) {
+        let hwcap2 = os::getauxval(ffi::AT_HWCAP2);
+
+        // power8
+        if hwcap2 & PPC_FEATURE2_ARCH_2_07 != 0 {
+            info.set(CpuInfo::HAS_QUADWORD_ATOMICS);
+        }
+    }
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[cfg(any(target_os = "linux", target_os = "android"))]
+    #[cfg(target_pointer_width = "64")]
+    #[test]
+    fn test_linux_like() {
+        use c_types::*;
+        use core::{arch::asm, mem};
+        use std::vec;
+        use test_helper::{libc, sys};
+
+        // Linux kernel 6.4 has added a way to read auxv without depending on either libc or mrs trap.
+        // https://github.com/torvalds/linux/commit/ddc65971bb677aa9f6a4c21f76d3133e106f88eb
+        //
+        // This is currently used only for testing.
+        fn getauxval_pr_get_auxv(type_: ffi::c_ulong) -> Result<ffi::c_ulong, c_int> {
+            #[cfg(target_arch = "aarch64")]
+            unsafe fn prctl_get_auxv(out: *mut c_void, len: usize) -> Result<usize, c_int> {
+                let r: i64;
+                unsafe {
+                    asm!(
+                        "svc 0",
+                        in("x8") sys::__NR_prctl as u64,
+                        inout("x0") sys::PR_GET_AUXV as u64 => r,
+                        in("x1") ptr_reg!(out),
+                        in("x2") len as u64,
+                        // arg4 and arg5 must be zero.
+                        in("x3") 0_u64,
+                        in("x4") 0_u64,
+                        options(nostack, preserves_flags)
+                    );
+                }
+                #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
+                if (r as c_int) < 0 {
+                    Err(r as c_int)
+                } else {
+                    Ok(r as usize)
+                }
+            }
+            #[cfg(target_arch = "powerpc64")]
+            unsafe fn prctl_get_auxv(out: *mut c_void, len: usize) -> Result<usize, c_int> {
+                let r: i64;
+                unsafe {
+                    asm!(
+                        "sc",
+                        "bns+ 2f",
+                        "neg %r3, %r3",
+                        "2:",
+                        inout("r0") sys::__NR_prctl as u64 => _,
+                        inout("r3") sys::PR_GET_AUXV as u64 => r,
+                        inout("r4") ptr_reg!(out) => _,
+                        inout("r5") len as u64 => _,
+                        // arg4 and arg5 must be zero.
+                        inout("r6") 0_u64 => _,
+                        inout("r7") 0_u64 => _,
+                        out("r8") _,
+                        out("r9") _,
+                        out("r10") _,
+                        out("r11") _,
+                        out("r12") _,
+                        out("cr0") _,
+                        options(nostack, preserves_flags)
+                    );
+                }
+                #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
+                if (r as c_int) < 0 {
+                    Err(r as c_int)
+                } else {
+                    Ok(r as usize)
+                }
+            }
+
+            let mut auxv = vec![unsafe { mem::zeroed::<sys::Elf64_auxv_t>() }; 38];
+
+            let old_len = auxv.len() * mem::size_of::<sys::Elf64_auxv_t>();
+
+            // SAFETY:
+            // - `out_len` does not exceed the size of `auxv`.
+            let _len = unsafe { prctl_get_auxv(auxv.as_mut_ptr().cast::<c_void>(), old_len)? };
+
+            for aux in &auxv {
+                if aux.a_type == type_ {
+                    // SAFETY: aux.a_un is #[repr(C)] union and all fields have
+                    // the same size and can be safely transmuted to integers.
+                    return Ok(unsafe { aux.a_un.a_val });
+                }
+            }
+            Err(0)
+        }
+
+        unsafe {
+            let mut u = mem::zeroed();
+            assert_eq!(libc::uname(&mut u), 0);
+            let release = std::ffi::CStr::from_ptr(u.release.as_ptr());
+            let release = core::str::from_utf8(release.to_bytes()).unwrap();
+            let mut digits = release.split('.');
+            let major = digits.next().unwrap().parse::<u32>().unwrap();
+            let minor = digits.next().unwrap().parse::<u32>().unwrap();
+            if (major, minor) < (6, 4) {
+                std::eprintln!("kernel version: {major}.{minor} (no pr_get_auxv)");
+                assert_eq!(getauxval_pr_get_auxv(ffi::AT_HWCAP).unwrap_err(), -22);
+                assert_eq!(getauxval_pr_get_auxv(ffi::AT_HWCAP2).unwrap_err(), -22);
+            } else {
+                std::eprintln!("kernel version: {major}.{minor} (has pr_get_auxv)");
+                assert_eq!(
+                    os::getauxval(ffi::AT_HWCAP),
+                    getauxval_pr_get_auxv(ffi::AT_HWCAP).unwrap()
+                );
+                assert_eq!(
+                    os::getauxval(ffi::AT_HWCAP2),
+                    getauxval_pr_get_auxv(ffi::AT_HWCAP2).unwrap()
+                );
+            }
+        }
+    }
+
+    #[allow(clippy::cast_sign_loss)]
+    #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+    #[test]
+    fn test_android() {
+        unsafe {
+            let mut arch = [1; ffi::PROP_VALUE_MAX as usize];
+            let len = ffi::__system_property_get(
+                b"ro.arch\0".as_ptr().cast::<ffi::c_char>(),
+                arch.as_mut_ptr().cast::<ffi::c_char>(),
+            );
+            assert!(len >= 0);
+            std::eprintln!("len={}", len);
+            std::eprintln!("arch={:?}", arch);
+            std::eprintln!(
+                "arch={:?}",
+                core::str::from_utf8(core::slice::from_raw_parts(arch.as_ptr(), len as usize))
+                    .unwrap()
+            );
+        }
+    }
+
+    #[allow(clippy::cast_possible_wrap)]
+    #[cfg(target_os = "freebsd")]
+    #[test]
+    fn test_freebsd() {
+        use c_types::*;
+        use core::{arch::asm, mem, ptr};
+        use test_helper::sys;
+
+        // This is almost equivalent to what elf_aux_info does.
+        // https://man.freebsd.org/elf_aux_info(3)
+        // On FreeBSD, [aarch64 support is available on FreeBSD 11.0+](https://www.freebsd.org/releases/11.0R/relnotes/#hardware-arm),
+        // but elf_aux_info is available on FreeBSD 12.0+ and 11.4+:
+        // https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470
+        // https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h
+        // so use sysctl instead of elf_aux_info.
+        // Note that FreeBSD 11 (11.4) was EoL on 2021-09-30, and FreeBSD 11.3 was EoL on 2020-09-30:
+        // https://www.freebsd.org/security/unsupported
+        //
+        // std_detect uses this way, but it appears to be somewhat incorrect
+        // (the type of arg4 of sysctl, auxv is smaller than AT_COUNT, etc.).
+        // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/freebsd/auxvec.rs#L52
+        //
+        // This is currently used only for testing.
+        // If you want us to use this implementation for compatibility with the older FreeBSD
+        // version that came to EoL a few years ago, please open an issue.
+        fn getauxval_sysctl_libc(type_: ffi::c_int) -> ffi::c_ulong {
+            let mut auxv: [sys::Elf64_Auxinfo; sys::AT_COUNT as usize] = unsafe { mem::zeroed() };
+
+            let mut len = core::mem::size_of_val(&auxv) as c_size_t;
+
+            // SAFETY: calling getpid is safe.
+            let pid = unsafe { sys::getpid() };
+            let mib = [
+                sys::CTL_KERN as c_int,
+                sys::KERN_PROC as c_int,
+                sys::KERN_PROC_AUXV as c_int,
+                pid,
+            ];
+
+            #[allow(clippy::cast_possible_truncation)]
+            // SAFETY:
+            // - `mib.len()` does not exceed the size of `mib`.
+            // - `len` does not exceed the size of `auxv`.
+            // - `sysctl` is thread-safe.
+            let res = unsafe {
+                sys::sysctl(
+                    mib.as_ptr(),
+                    mib.len() as c_uint,
+                    auxv.as_mut_ptr().cast::<c_void>(),
+                    &mut len,
+                    ptr::null_mut(),
+                    0,
+                )
+            };
+
+            if res != -1 {
+                for aux in &auxv {
+                    if aux.a_type == type_ as c_long {
+                        // SAFETY: aux.a_un is #[repr(C)] union and all fields have
+                        // the same size and can be safely transmuted to integers.
+                        return unsafe { aux.a_un.a_val as c_ulong };
+                    }
+                }
+            }
+            0
+        }
+        // Similar to the above, but call syscall using asm instead of libc.
+        // Note that FreeBSD does not guarantee the stability of raw syscall as
+        // much as Linux does (It may actually be stable enough, though:
+        // https://lists.llvm.org/pipermail/llvm-dev/2019-June/133393.html,
+        // https://github.com/ziglang/zig/issues/16590).
+        //
+        // This is currently used only for testing.
+        fn getauxval_sysctl_asm_syscall(type_: ffi::c_int) -> Result<ffi::c_ulong, c_int> {
+            #[allow(non_camel_case_types)]
+            type pid_t = c_int;
+
+            // https://github.com/freebsd/freebsd-src/blob/9888a79adad22ba06b5aff17d05abac0029c537a/lib/libc/aarch64/SYS.h
+            // https://github.com/golang/go/blob/4badad8d477ffd7a6b762c35bc69aed82faface7/src/syscall/asm_freebsd_arm64.s
+            #[cfg(target_arch = "aarch64")]
+            #[inline]
+            fn getpid() -> pid_t {
+                #[allow(clippy::cast_possible_truncation)]
+                // SAFETY: calling getpid is safe.
+                unsafe {
+                    let n = sys::SYS_getpid;
+                    let r: i64;
+                    asm!(
+                        "svc 0",
+                        in("x8") n as u64,
+                        out("x0") r,
+                        options(nostack, readonly),
+                    );
+                    r as pid_t
+                }
+            }
+            #[cfg(target_arch = "aarch64")]
+            #[inline]
+            unsafe fn sysctl(
+                name: *const c_int,
+                name_len: c_uint,
+                old_p: *mut c_void,
+                old_len_p: *mut c_size_t,
+                new_p: *const c_void,
+                new_len: c_size_t,
+            ) -> Result<c_int, c_int> {
+                #[allow(clippy::cast_possible_truncation)]
+                // SAFETY: the caller must uphold the safety contract.
+                unsafe {
+                    let mut n = sys::SYS___sysctl as u64;
+                    let r: i64;
+                    asm!(
+                        "svc 0",
+                        "b.cc 2f",
+                        "mov x8, x0",
+                        "mov x0, #-1",
+                        "2:",
+                        inout("x8") n,
+                        inout("x0") ptr_reg!(name) => r,
+                        inout("x1") name_len as u64 => _,
+                        in("x2") ptr_reg!(old_p),
+                        in("x3") ptr_reg!(old_len_p),
+                        in("x4") ptr_reg!(new_p),
+                        in("x5") new_len as u64,
+                        options(nostack),
+                    );
+                    if r as c_int == -1 {
+                        Err(n as c_int)
+                    } else {
+                        Ok(r as c_int)
+                    }
+                }
+            }
+
+            // https://github.com/freebsd/freebsd-src/blob/9888a79adad22ba06b5aff17d05abac0029c537a/lib/libc/powerpc64/SYS.h
+            #[cfg(target_arch = "powerpc64")]
+            #[inline]
+            fn getpid() -> pid_t {
+                #[allow(clippy::cast_possible_truncation)]
+                // SAFETY: calling getpid is safe.
+                unsafe {
+                    let n = sys::SYS_getpid;
+                    let r: i64;
+                    asm!(
+                        "sc",
+                        inout("r0") n as u64 => _,
+                        out("r3") r,
+                        out("r4") _,
+                        out("r5") _,
+                        out("r6") _,
+                        out("r7") _,
+                        out("r8") _,
+                        out("r9") _,
+                        out("r10") _,
+                        out("r11") _,
+                        out("r12") _,
+                        out("cr0") _,
+                        options(nostack, preserves_flags, readonly),
+                    );
+                    r as pid_t
+                }
+            }
+            #[cfg(target_arch = "powerpc64")]
+            #[inline]
+            unsafe fn sysctl(
+                name: *const c_int,
+                name_len: c_uint,
+                old_p: *mut c_void,
+                old_len_p: *mut c_size_t,
+                new_p: *const c_void,
+                new_len: c_size_t,
+            ) -> Result<c_int, c_int> {
+                #[allow(clippy::cast_possible_truncation)]
+                // SAFETY: the caller must uphold the safety contract.
+                unsafe {
+                    let mut n = sys::SYS___sysctl as u64;
+                    let r: i64;
+                    asm!(
+                        "sc",
+                        "bns+ 2f",
+                        "mr %r0, %r3",
+                        "li %r3, -1",
+                        "2:",
+                        inout("r0") n,
+                        inout("r3") ptr_reg!(name) => r,
+                        inout("r4") name_len as u64 => _,
+                        inout("r5") ptr_reg!(old_p) => _,
+                        inout("r6") ptr_reg!(old_len_p) => _,
+                        inout("r7") ptr_reg!(new_p) => _,
+                        inout("r8") new_len as u64 => _,
+                        out("r9") _,
+                        out("r10") _,
+                        out("r11") _,
+                        out("r12") _,
+                        out("cr0") _,
+                        options(nostack, preserves_flags)
+                    );
+                    if r as c_int == -1 {
+                        Err(n as c_int)
+                    } else {
+                        Ok(r as c_int)
+                    }
+                }
+            }
+
+            let mut auxv: [sys::Elf64_Auxinfo; sys::AT_COUNT as usize] = unsafe { mem::zeroed() };
+
+            let mut len = core::mem::size_of_val(&auxv) as c_size_t;
+
+            let pid = getpid();
+            let mib = [
+                sys::CTL_KERN as c_int,
+                sys::KERN_PROC as c_int,
+                sys::KERN_PROC_AUXV as c_int,
+                pid,
+            ];
+
+            #[allow(clippy::cast_possible_truncation)]
+            // SAFETY:
+            // - `mib.len()` does not exceed the size of `mib`.
+            // - `len` does not exceed the size of `auxv`.
+            // - `sysctl` is thread-safe.
+            unsafe {
+                sysctl(
+                    mib.as_ptr(),
+                    mib.len() as c_uint,
+                    auxv.as_mut_ptr().cast::<c_void>(),
+                    &mut len,
+                    ptr::null_mut(),
+                    0,
+                )?;
+            }
+
+            for aux in &auxv {
+                if aux.a_type == type_ as c_long {
+                    // SAFETY: aux.a_un is #[repr(C)] union and all fields have
+                    // the same size and can be safely transmuted to integers.
+                    return Ok(unsafe { aux.a_un.a_val as c_ulong });
+                }
+            }
+            Err(0)
+        }
+
+        assert_eq!(os::getauxval(ffi::AT_HWCAP), getauxval_sysctl_libc(ffi::AT_HWCAP));
+        assert_eq!(os::getauxval(ffi::AT_HWCAP2), getauxval_sysctl_libc(ffi::AT_HWCAP2));
+        assert_eq!(
+            os::getauxval(ffi::AT_HWCAP),
+            getauxval_sysctl_asm_syscall(ffi::AT_HWCAP).unwrap()
+        );
+        assert_eq!(
+            os::getauxval(ffi::AT_HWCAP2),
+            // AT_HWCAP2 is only available on FreeBSD 13+, at least for aarch64.
+            getauxval_sysctl_asm_syscall(ffi::AT_HWCAP2).unwrap_or(0)
+        );
+    }
+
+    // Static assertions for FFI bindings.
+    // This checks that FFI bindings defined in this crate, FFI bindings defined
+    // in libc, and FFI bindings generated for the platform's latest header file
+    // using bindgen have compatible signatures (or the same values if constants).
+    // Since this is static assertion, we can detect problems with
+    // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh)
+    // without actually running tests on these platforms.
+    // See also tools/codegen/src/ffi.rs.
+    // TODO(codegen): auto-generate this test
+    #[allow(
+        clippy::cast_possible_wrap,
+        clippy::cast_sign_loss,
+        clippy::cast_possible_truncation,
+        clippy::no_effect_underscore_binding
+    )]
+    const _: fn() = || {
+        use test_helper::{libc, sys};
+        #[cfg(not(target_os = "freebsd"))]
+        type AtType = ffi::c_ulong;
+        #[cfg(target_os = "freebsd")]
+        type AtType = ffi::c_int;
+        #[cfg(any(target_os = "linux", target_os = "android"))]
+        {
+            let mut _getauxval: unsafe extern "C" fn(ffi::c_ulong) -> ffi::c_ulong = ffi::getauxval;
+            _getauxval = libc::getauxval;
+            _getauxval = sys::getauxval;
+        }
+        #[cfg(all(target_arch = "aarch64", target_os = "android"))]
+        {
+            let mut ___system_property_get: unsafe extern "C" fn(
+                *const ffi::c_char,
+                *mut ffi::c_char,
+            ) -> ffi::c_int = ffi::__system_property_get;
+            ___system_property_get = libc::__system_property_get;
+            ___system_property_get = sys::__system_property_get;
+            static_assert!(ffi::PROP_VALUE_MAX == libc::PROP_VALUE_MAX);
+            static_assert!(ffi::PROP_VALUE_MAX == sys::PROP_VALUE_MAX as ffi::c_int);
+        }
+        #[cfg(target_os = "freebsd")]
+        {
+            let mut _elf_aux_info: unsafe extern "C" fn(
+                ffi::c_int,
+                *mut ffi::c_void,
+                ffi::c_int,
+            ) -> ffi::c_int = ffi::elf_aux_info;
+            _elf_aux_info = libc::elf_aux_info;
+            _elf_aux_info = sys::elf_aux_info;
+        }
+        #[cfg(not(target_os = "freebsd"))] // libc doesn't have this on FreeBSD
+        static_assert!(ffi::AT_HWCAP == libc::AT_HWCAP);
+        static_assert!(ffi::AT_HWCAP == sys::AT_HWCAP as AtType);
+        #[cfg(not(target_os = "freebsd"))] // libc doesn't have this on FreeBSD
+        static_assert!(ffi::AT_HWCAP2 == libc::AT_HWCAP2);
+        static_assert!(ffi::AT_HWCAP2 == sys::AT_HWCAP2 as AtType);
+        #[cfg(target_arch = "aarch64")]
+        {
+            // static_assert!(arch::HWCAP_ATOMICS == libc::HWCAP_ATOMICS); // libc doesn't have this
+            static_assert!(arch::HWCAP_ATOMICS == sys::HWCAP_ATOMICS as ffi::c_ulong);
+            // static_assert!(HWCAP_USCAT == libc::HWCAP_USCAT); // libc doesn't have this
+            static_assert!(arch::HWCAP_USCAT == sys::HWCAP_USCAT as ffi::c_ulong);
+            #[cfg(any(target_os = "linux", target_os = "android"))]
+            #[cfg(target_pointer_width = "64")]
+            {
+                // static_assert!(HWCAP2_LRCPC3 == libc::HWCAP2_LRCPC3); // libc doesn't have this
+                static_assert!(arch::HWCAP2_LRCPC3 == sys::HWCAP2_LRCPC3 as ffi::c_ulong);
+                // static_assert!(HWCAP2_LSE128 == libc::HWCAP2_LSE128); // libc doesn't have this
+                static_assert!(arch::HWCAP2_LSE128 == sys::HWCAP2_LSE128 as ffi::c_ulong);
+            }
+        }
+        #[cfg(target_arch = "powerpc64")]
+        {
+            // static_assert!(arch::PPC_FEATURE2_ARCH_2_07 == libc::PPC_FEATURE2_ARCH_2_07); // libc doesn't have this
+            static_assert!(
+                arch::PPC_FEATURE2_ARCH_2_07 == sys::PPC_FEATURE2_ARCH_2_07 as ffi::c_ulong
+            );
+        }
+    };
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/common.rs b/vendor/portable-atomic/src/imp/atomic128/detect/common.rs
new file mode 100644
index 0000000..b87caa3
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/common.rs
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+#[derive(Clone, Copy)]
+pub(crate) struct CpuInfo(u32);
+
+impl CpuInfo {
+    const INIT: u32 = 0;
+
+    #[inline]
+    fn set(&mut self, bit: u32) {
+        self.0 = set(self.0, bit);
+    }
+    #[inline]
+    fn test(self, bit: u32) -> bool {
+        test(self.0, bit)
+    }
+}
+
+#[inline]
+fn set(x: u32, bit: u32) -> u32 {
+    x | 1 << bit
+}
+#[inline]
+fn test(x: u32, bit: u32) -> bool {
+    x & (1 << bit) != 0
+}
+
+#[inline]
+pub(crate) fn detect() -> CpuInfo {
+    use core::sync::atomic::{AtomicU32, Ordering};
+
+    static CACHE: AtomicU32 = AtomicU32::new(0);
+    let mut info = CpuInfo(CACHE.load(Ordering::Relaxed));
+    if info.0 != 0 {
+        return info;
+    }
+    info.set(CpuInfo::INIT);
+    // Note: detect_false cfg is intended to make it easy for portable-atomic developers to
+    // test cases such as has_cmpxchg16b == false, has_lse == false,
+    // __kuser_helper_version < 5, etc., and is not a public API.
+    if !cfg!(portable_atomic_test_outline_atomics_detect_false) {
+        _detect(&mut info);
+    }
+    CACHE.store(info.0, Ordering::Relaxed);
+    info
+}
+
+#[cfg(target_arch = "aarch64")]
+impl CpuInfo {
+    /// Whether FEAT_LSE is available
+    const HAS_LSE: u32 = 1;
+    /// Whether FEAT_LSE2 is available
+    #[cfg_attr(not(test), allow(dead_code))]
+    const HAS_LSE2: u32 = 2;
+    /// Whether FEAT_LSE128 is available
+    // This is currently only used in tests.
+    #[cfg(test)]
+    const HAS_LSE128: u32 = 3;
+    /// Whether FEAT_LRCPC3 is available
+    // This is currently only used in tests.
+    #[cfg(test)]
+    const HAS_RCPC3: u32 = 4;
+
+    #[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
+    #[inline]
+    pub(crate) fn has_lse(self) -> bool {
+        self.test(CpuInfo::HAS_LSE)
+    }
+    #[cfg_attr(not(test), allow(dead_code))]
+    #[cfg(any(test, not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))))]
+    #[inline]
+    pub(crate) fn has_lse2(self) -> bool {
+        self.test(CpuInfo::HAS_LSE2)
+    }
+    #[cfg(test)]
+    #[inline]
+    pub(crate) fn has_lse128(self) -> bool {
+        self.test(CpuInfo::HAS_LSE128)
+    }
+    #[cfg(test)]
+    #[inline]
+    pub(crate) fn has_rcpc3(self) -> bool {
+        self.test(CpuInfo::HAS_RCPC3)
+    }
+}
+
+#[cfg(target_arch = "x86_64")]
+impl CpuInfo {
+    /// Whether CMPXCHG16B is available
+    const HAS_CMPXCHG16B: u32 = 1;
+    /// Whether VMOVDQA is atomic
+    const HAS_VMOVDQA_ATOMIC: u32 = 2;
+
+    #[cfg(any(
+        test,
+        not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+    ))]
+    #[inline]
+    pub(crate) fn has_cmpxchg16b(self) -> bool {
+        self.test(CpuInfo::HAS_CMPXCHG16B)
+    }
+    #[inline]
+    pub(crate) fn has_vmovdqa_atomic(self) -> bool {
+        self.test(CpuInfo::HAS_VMOVDQA_ATOMIC)
+    }
+}
+
+#[cfg(target_arch = "powerpc64")]
+impl CpuInfo {
+    /// Whether lqarx and stqcx. instructions are available
+    const HAS_QUADWORD_ATOMICS: u32 = 1;
+
+    #[cfg(any(
+        test,
+        not(any(
+            target_feature = "quadword-atomics",
+            portable_atomic_target_feature = "quadword-atomics",
+        )),
+    ))]
+    #[inline]
+    pub(crate) fn has_quadword_atomics(self) -> bool {
+        self.test(CpuInfo::HAS_QUADWORD_ATOMICS)
+    }
+}
+
+// core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47
+#[cfg(any(target_arch = "aarch64", target_arch = "powerpc64"))]
+#[cfg(not(windows))]
+#[allow(dead_code, non_camel_case_types)]
+mod c_types {
+    pub(crate) type c_void = core::ffi::c_void;
+    // c_{,u}int is {i,u}32 on non-16-bit architectures
+    // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L160
+    // (16-bit architectures currently don't use this module)
+    pub(crate) type c_int = i32;
+    pub(crate) type c_uint = u32;
+    // c_{,u}long is {i,u}64 on non-Windows 64-bit targets, otherwise is {i,u}32
+    // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L176
+    // (Windows currently doesn't use this module - this module is cfg(not(windows)))
+    #[cfg(target_pointer_width = "64")]
+    pub(crate) type c_long = i64;
+    #[cfg(not(target_pointer_width = "64"))]
+    pub(crate) type c_long = i32;
+    #[cfg(target_pointer_width = "64")]
+    pub(crate) type c_ulong = u64;
+    #[cfg(not(target_pointer_width = "64"))]
+    pub(crate) type c_ulong = u32;
+    // c_size_t is currently always usize
+    // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L88
+    pub(crate) type c_size_t = usize;
+    // c_char is u8 by default on most non-Apple/non-Windows ARM/PowerPC/RISC-V/s390x/Hexagon targets
+    // (Linux/Android/FreeBSD/NetBSD/OpenBSD/VxWorks/Fuchsia/QNX Neutrino/Horizon/AIX/z/OS)
+    // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L104
+    // https://github.com/llvm/llvm-project/blob/9734b2256d89cb4c61a4dbf4a3c3f3f942fe9b8c/lldb/source/Utility/ArchSpec.cpp#L712
+    // RISC-V https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/HEAD/riscv-cc.adoc#cc-type-representations
+    // Hexagon https://lists.llvm.org/pipermail/llvm-dev/attachments/20190916/21516a52/attachment-0001.pdf
+    // AIX https://www.ibm.com/docs/en/xl-c-aix/13.1.2?topic=descriptions-qchars
+    // z/OS https://www.ibm.com/docs/en/zos/2.5.0?topic=specifiers-character-types
+    // (macOS is currently the only Apple target that uses this module, and Windows currently doesn't use this module)
+    #[cfg(not(target_os = "macos"))]
+    pub(crate) type c_char = u8;
+    // c_char is i8 on all Apple targets
+    #[cfg(target_os = "macos")]
+    pub(crate) type c_char = i8;
+
+    // Static assertions for C type definitions.
+    #[cfg(test)]
+    const _: fn() = || {
+        use test_helper::{libc, sys};
+        let _: c_int = 0 as std::os::raw::c_int;
+        let _: c_uint = 0 as std::os::raw::c_uint;
+        let _: c_long = 0 as std::os::raw::c_long;
+        let _: c_ulong = 0 as std::os::raw::c_ulong;
+        let _: c_size_t = 0 as libc::size_t; // std::os::raw::c_size_t is unstable
+        let _: c_char = 0 as std::os::raw::c_char;
+        let _: c_char = 0 as sys::c_char;
+    };
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests_common {
+    use super::*;
+
+    #[test]
+    fn test_bit_flags() {
+        let mut x = CpuInfo(0);
+        #[cfg(target_arch = "aarch64")]
+        {
+            assert!(!x.test(CpuInfo::INIT));
+            assert!(!x.test(CpuInfo::HAS_LSE));
+            assert!(!x.test(CpuInfo::HAS_LSE2));
+            assert!(!x.test(CpuInfo::HAS_LSE128));
+            assert!(!x.test(CpuInfo::HAS_RCPC3));
+            x.set(CpuInfo::INIT);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(!x.test(CpuInfo::HAS_LSE));
+            assert!(!x.test(CpuInfo::HAS_LSE2));
+            assert!(!x.test(CpuInfo::HAS_LSE128));
+            assert!(!x.test(CpuInfo::HAS_RCPC3));
+            x.set(CpuInfo::HAS_LSE);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_LSE));
+            assert!(!x.test(CpuInfo::HAS_LSE2));
+            assert!(!x.test(CpuInfo::HAS_LSE128));
+            assert!(!x.test(CpuInfo::HAS_RCPC3));
+            x.set(CpuInfo::HAS_LSE2);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_LSE));
+            assert!(x.test(CpuInfo::HAS_LSE2));
+            assert!(!x.test(CpuInfo::HAS_LSE128));
+            assert!(!x.test(CpuInfo::HAS_RCPC3));
+            x.set(CpuInfo::HAS_LSE128);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_LSE));
+            assert!(x.test(CpuInfo::HAS_LSE2));
+            assert!(x.test(CpuInfo::HAS_LSE128));
+            assert!(!x.test(CpuInfo::HAS_RCPC3));
+            x.set(CpuInfo::HAS_RCPC3);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_LSE));
+            assert!(x.test(CpuInfo::HAS_LSE2));
+            assert!(x.test(CpuInfo::HAS_LSE128));
+            assert!(x.test(CpuInfo::HAS_RCPC3));
+        }
+        #[cfg(target_arch = "x86_64")]
+        {
+            assert!(!x.test(CpuInfo::INIT));
+            assert!(!x.test(CpuInfo::HAS_CMPXCHG16B));
+            assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+            x.set(CpuInfo::INIT);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(!x.test(CpuInfo::HAS_CMPXCHG16B));
+            assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+            x.set(CpuInfo::HAS_CMPXCHG16B);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_CMPXCHG16B));
+            assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+            x.set(CpuInfo::HAS_VMOVDQA_ATOMIC);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_CMPXCHG16B));
+            assert!(x.test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+        }
+        #[cfg(target_arch = "powerpc64")]
+        {
+            assert!(!x.test(CpuInfo::INIT));
+            assert!(!x.test(CpuInfo::HAS_QUADWORD_ATOMICS));
+            x.set(CpuInfo::INIT);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(!x.test(CpuInfo::HAS_QUADWORD_ATOMICS));
+            x.set(CpuInfo::HAS_QUADWORD_ATOMICS);
+            assert!(x.test(CpuInfo::INIT));
+            assert!(x.test(CpuInfo::HAS_QUADWORD_ATOMICS));
+        }
+    }
+
+    #[test]
+    fn print_features() {
+        use std::{fmt::Write as _, io::Write, string::String};
+
+        let mut features = String::new();
+        macro_rules! print_feature {
+            ($name:expr, $enabled:expr $(,)?) => {{
+                let _ = writeln!(features, "  {}: {}", $name, $enabled);
+            }};
+        }
+        #[cfg(target_arch = "aarch64")]
+        {
+            features.push_str("run-time:\n");
+            print_feature!("lse", detect().test(CpuInfo::HAS_LSE));
+            print_feature!("lse2", detect().test(CpuInfo::HAS_LSE2));
+            print_feature!("lse128", detect().test(CpuInfo::HAS_LSE128));
+            print_feature!("rcpc3", detect().test(CpuInfo::HAS_RCPC3));
+            features.push_str("compile-time:\n");
+            print_feature!(
+                "lse",
+                cfg!(any(target_feature = "lse", portable_atomic_target_feature = "lse")),
+            );
+            print_feature!(
+                "lse2",
+                cfg!(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")),
+            );
+        }
+        #[cfg(target_arch = "x86_64")]
+        {
+            features.push_str("run-time:\n");
+            print_feature!("cmpxchg16b", detect().test(CpuInfo::HAS_CMPXCHG16B));
+            print_feature!("vmovdqa-atomic", detect().test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+            features.push_str("compile-time:\n");
+            print_feature!(
+                "cmpxchg16b",
+                cfg!(any(
+                    target_feature = "cmpxchg16b",
+                    portable_atomic_target_feature = "cmpxchg16b",
+                )),
+            );
+        }
+        #[cfg(target_arch = "powerpc64")]
+        {
+            features.push_str("run-time:\n");
+            print_feature!("quadword-atomics", detect().test(CpuInfo::HAS_QUADWORD_ATOMICS));
+            features.push_str("compile-time:\n");
+            print_feature!(
+                "quadword-atomics",
+                cfg!(any(
+                    target_feature = "quadword-atomics",
+                    portable_atomic_target_feature = "quadword-atomics",
+                )),
+            );
+        }
+        let stdout = std::io::stderr();
+        let mut stdout = stdout.lock();
+        let _ = stdout.write_all(features.as_bytes());
+    }
+
+    #[cfg(target_arch = "x86_64")]
+    #[test]
+    #[cfg_attr(portable_atomic_test_outline_atomics_detect_false, ignore)]
+    fn test_detect() {
+        if detect().has_cmpxchg16b() {
+            assert!(detect().test(CpuInfo::HAS_CMPXCHG16B));
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_CMPXCHG16B));
+        }
+        if detect().has_vmovdqa_atomic() {
+            assert!(detect().test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_VMOVDQA_ATOMIC));
+        }
+    }
+    #[cfg(target_arch = "aarch64")]
+    #[test]
+    #[cfg_attr(portable_atomic_test_outline_atomics_detect_false, ignore)]
+    fn test_detect() {
+        let proc_cpuinfo = test_helper::cpuinfo::ProcCpuinfo::new();
+        if detect().has_lse() {
+            assert!(detect().test(CpuInfo::HAS_LSE));
+            if let Ok(proc_cpuinfo) = proc_cpuinfo {
+                assert!(proc_cpuinfo.lse);
+            }
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_LSE));
+            if let Ok(proc_cpuinfo) = proc_cpuinfo {
+                assert!(!proc_cpuinfo.lse);
+            }
+        }
+        if detect().has_lse2() {
+            assert!(detect().test(CpuInfo::HAS_LSE));
+            assert!(detect().test(CpuInfo::HAS_LSE2));
+            if let Ok(test_helper::cpuinfo::ProcCpuinfo { lse2: Some(lse2), .. }) = proc_cpuinfo {
+                assert!(lse2);
+            }
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_LSE2));
+            if let Ok(test_helper::cpuinfo::ProcCpuinfo { lse2: Some(lse2), .. }) = proc_cpuinfo {
+                assert!(!lse2);
+            }
+        }
+        if detect().has_lse128() {
+            assert!(detect().test(CpuInfo::HAS_LSE));
+            assert!(detect().test(CpuInfo::HAS_LSE2));
+            assert!(detect().test(CpuInfo::HAS_LSE128));
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_LSE128));
+        }
+        if detect().has_rcpc3() {
+            assert!(detect().test(CpuInfo::HAS_RCPC3));
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_RCPC3));
+        }
+    }
+    #[cfg(target_arch = "powerpc64")]
+    #[test]
+    #[cfg_attr(portable_atomic_test_outline_atomics_detect_false, ignore)]
+    fn test_detect() {
+        let proc_cpuinfo = test_helper::cpuinfo::ProcCpuinfo::new();
+        if detect().has_quadword_atomics() {
+            assert!(detect().test(CpuInfo::HAS_QUADWORD_ATOMICS));
+            if let Ok(proc_cpuinfo) = proc_cpuinfo {
+                assert!(proc_cpuinfo.power8);
+            }
+        } else {
+            assert!(!detect().test(CpuInfo::HAS_QUADWORD_ATOMICS));
+            if let Ok(proc_cpuinfo) = proc_cpuinfo {
+                assert!(!proc_cpuinfo.power8);
+            }
+        }
+    }
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/x86_64.rs b/vendor/portable-atomic/src/imp/atomic128/detect/x86_64.rs
new file mode 100644
index 0000000..80eefed
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/detect/x86_64.rs
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Adapted from https://github.com/rust-lang/stdarch.
+
+#![cfg_attr(any(not(target_feature = "sse"), portable_atomic_sanitize_thread), allow(dead_code))]
+
+// Miri doesn't support inline assembly used in __cpuid: https://github.com/rust-lang/miri/issues/932
+// SGX doesn't support CPUID: https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs#L102-L105
+#[cfg(any(target_env = "sgx", miri))]
+compile_error!("internal error: this module is not supported on this environment");
+
+include!("common.rs");
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+use core::arch::x86_64::{CpuidResult, _xgetbv};
+
+// Workaround for https://github.com/rust-lang/rust/issues/101346
+// It is not clear if our use cases are affected, but we implement this just in case.
+//
+// Refs:
+// - https://www.felixcloutier.com/x86/cpuid
+// - https://en.wikipedia.org/wiki/CPUID
+// - https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs
+unsafe fn __cpuid(leaf: u32) -> CpuidResult {
+    let eax;
+    let mut ebx;
+    let ecx;
+    let edx;
+    // SAFETY: the caller must guarantee that CPU supports `cpuid`.
+    unsafe {
+        asm!(
+            // rbx is reserved by LLVM
+            "mov {ebx_tmp:r}, rbx",
+            "cpuid",
+            "xchg {ebx_tmp:r}, rbx", // restore rbx
+            ebx_tmp = out(reg) ebx,
+            inout("eax") leaf => eax,
+            inout("ecx") 0 => ecx,
+            out("edx") edx,
+            options(nostack, preserves_flags),
+        );
+    }
+    CpuidResult { eax, ebx, ecx, edx }
+}
+
+// https://en.wikipedia.org/wiki/CPUID
+const VENDOR_ID_INTEL: [u8; 12] = *b"GenuineIntel";
+const VENDOR_ID_AMD: [u8; 12] = *b"AuthenticAMD";
+
+unsafe fn _vendor_id() -> [u8; 12] {
+    // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L40-L59
+    // SAFETY: the caller must guarantee that CPU supports `cpuid`.
+    let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0) };
+    let vendor_id: [[u8; 4]; 3] = [ebx.to_ne_bytes(), edx.to_ne_bytes(), ecx.to_ne_bytes()];
+    // SAFETY: transmute is safe because `[u8; 12]` and `[[u8; 4]; 3]` has the same layout.
+    unsafe { core::mem::transmute(vendor_id) }
+}
+
+#[cold]
+fn _detect(info: &mut CpuInfo) {
+    // SAFETY: Calling `_vendor_id`` is safe because the CPU has `cpuid` support.
+    let vendor_id = unsafe { _vendor_id() };
+
+    // SAFETY: Calling `__cpuid`` is safe because the CPU has `cpuid` support.
+    let proc_info_ecx = unsafe { __cpuid(0x0000_0001_u32).ecx };
+
+    // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L111
+    if test(proc_info_ecx, 13) {
+        info.set(CpuInfo::HAS_CMPXCHG16B);
+    }
+
+    // VMOVDQA is atomic on Intel and AMD CPUs with AVX.
+    // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details.
+    if vendor_id == VENDOR_ID_INTEL || vendor_id == VENDOR_ID_AMD {
+        // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L131-L224
+        let cpu_xsave = test(proc_info_ecx, 26);
+        if cpu_xsave {
+            let cpu_osxsave = test(proc_info_ecx, 27);
+            if cpu_osxsave {
+                // SAFETY: Calling `_xgetbv`` is safe because the CPU has `xsave` support
+                // and OS has set `osxsave`.
+                let xcr0 = unsafe { _xgetbv(0) };
+                let os_avx_support = xcr0 & 6 == 6;
+                if os_avx_support && test(proc_info_ecx, 28) {
+                    info.set(CpuInfo::HAS_VMOVDQA_ATOMIC);
+                }
+            }
+        }
+    }
+}
+
+#[allow(
+    clippy::alloc_instead_of_core,
+    clippy::std_instead_of_alloc,
+    clippy::std_instead_of_core,
+    clippy::undocumented_unsafe_blocks,
+    clippy::wildcard_imports
+)]
+#[cfg(test)]
+mod tests {
+    #[cfg(not(portable_atomic_test_outline_atomics_detect_false))]
+    use super::*;
+
+    #[cfg(not(portable_atomic_test_outline_atomics_detect_false))]
+    #[test]
+    fn test_cpuid() {
+        assert_eq!(std::is_x86_feature_detected!("cmpxchg16b"), detect().has_cmpxchg16b());
+        let vendor_id = unsafe { _vendor_id() };
+        if vendor_id == VENDOR_ID_INTEL || vendor_id == VENDOR_ID_AMD {
+            assert_eq!(std::is_x86_feature_detected!("avx"), detect().has_vmovdqa_atomic());
+        } else {
+            assert!(!detect().has_vmovdqa_atomic());
+        }
+    }
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/intrinsics.rs b/vendor/portable-atomic/src/imp/atomic128/intrinsics.rs
new file mode 100644
index 0000000..21b5be2
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/intrinsics.rs
@@ -0,0 +1,503 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic{I,U}128 implementation without inline assembly.
+//
+// Note: This module is currently only enabled on Miri and ThreadSanitizer which
+// do not support inline assembly.
+//
+// This uses `core::arch::x86_64::cmpxchg16b` on x86_64 and
+// `core::intrinsics::atomic_*` on aarch64, powerpc64, and s390x.
+//
+// See README.md of this directory for performance comparison with the
+// implementation with inline assembly.
+//
+// Note:
+// - This currently needs Rust 1.70 on x86_64, otherwise nightly compilers.
+// - On powerpc64, this requires LLVM 15+ and pwr8+ (quadword-atomics LLVM target feature):
+//   https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445
+// - On aarch64 big-endian, LLVM (as of 17) generates broken code. (wrong result in stress test)
+//   (on cfg(miri)/cfg(sanitize) it may be fine though)
+// - On s390x, LLVM (as of 17) generates libcalls for operations other than load/store/cmpxchg:
+//   https://godbolt.org/z/5a5T4hxMh
+//   https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll
+//   https://reviews.llvm.org/D146425
+// - On powerpc64, LLVM (as of 17) doesn't support 128-bit atomic min/max:
+//   https://github.com/llvm/llvm-project/issues/68390
+// - On powerpc64le, LLVM (as of 17) generates broken code. (wrong result from fetch_add)
+//
+// Refs: https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs
+
+include!("macros.rs");
+
+#[allow(dead_code)] // we only use compare_exchange.
+#[cfg(target_arch = "x86_64")]
+#[cfg(not(target_feature = "cmpxchg16b"))]
+#[path = "../fallback/outline_atomics.rs"]
+mod fallback;
+
+#[cfg(target_arch = "x86_64")]
+#[cfg(not(target_feature = "cmpxchg16b"))]
+#[path = "detect/x86_64.rs"]
+mod detect;
+
+use core::sync::atomic::Ordering;
+#[cfg(not(target_arch = "x86_64"))]
+use core::{
+    intrinsics,
+    sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst},
+};
+
+// https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs#L3128
+#[cfg(target_arch = "x86_64")]
+#[inline]
+fn strongest_failure_ordering(order: Ordering) -> Ordering {
+    match order {
+        Ordering::Release | Ordering::Relaxed => Ordering::Relaxed,
+        Ordering::SeqCst => Ordering::SeqCst,
+        Ordering::Acquire | Ordering::AcqRel => Ordering::Acquire,
+        _ => unreachable!("{:?}", order),
+    }
+}
+
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
+    #[cfg(target_arch = "x86_64")]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let fail_order = strongest_failure_ordering(order);
+        match atomic_compare_exchange(src, 0, 0, order, fail_order) {
+            Ok(v) | Err(v) => v,
+        }
+    }
+    #[cfg(not(target_arch = "x86_64"))]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_load_acquire(src),
+            Relaxed => intrinsics::atomic_load_relaxed(src),
+            SeqCst => intrinsics::atomic_load_seqcst(src),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
+    #[cfg(target_arch = "x86_64")]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        atomic_swap(dst, val, order);
+    }
+    #[cfg(not(target_arch = "x86_64"))]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Release => intrinsics::atomic_store_release(dst, val),
+            Relaxed => intrinsics::atomic_store_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_store_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_compare_exchange(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> Result<u128, u128> {
+    #[cfg(target_arch = "x86_64")]
+    let (val, ok) = {
+        #[cfg_attr(not(target_feature = "cmpxchg16b"), target_feature(enable = "cmpxchg16b"))]
+        #[cfg_attr(target_feature = "cmpxchg16b", inline)]
+        #[cfg_attr(not(target_feature = "cmpxchg16b"), inline(never))]
+        unsafe fn cmpxchg16b(
+            dst: *mut u128,
+            old: u128,
+            new: u128,
+            success: Ordering,
+            failure: Ordering,
+        ) -> (u128, bool) {
+            debug_assert!(dst as usize % 16 == 0);
+            #[cfg(not(target_feature = "cmpxchg16b"))]
+            {
+                debug_assert!(detect::detect().has_cmpxchg16b());
+            }
+            // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+            // reads, 16-byte aligned (required by CMPXCHG16B), that there are no
+            // concurrent non-atomic operations, and that the CPU supports CMPXCHG16B.
+            let prev = unsafe { core::arch::x86_64::cmpxchg16b(dst, old, new, success, failure) };
+            (prev, prev == old)
+        }
+        // The stronger failure ordering in cmpxchg16b_intrinsic is actually supported
+        // before stabilization, but we do not have a specific cfg for it.
+        #[cfg(portable_atomic_unstable_cmpxchg16b_intrinsic)]
+        let success = crate::utils::upgrade_success_ordering(success, failure);
+        #[cfg(target_feature = "cmpxchg16b")]
+        // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+        // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+        // and cfg guarantees that CMPXCHG16B is available at compile-time.
+        unsafe {
+            cmpxchg16b(dst, old, new, success, failure)
+        }
+        #[cfg(not(target_feature = "cmpxchg16b"))]
+        // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+        // reads, 16-byte aligned, and that there are no different kinds of concurrent accesses.
+        unsafe {
+            ifunc!(unsafe fn(
+                dst: *mut u128, old: u128, new: u128, success: Ordering, failure: Ordering
+            ) -> (u128, bool) {
+                if detect::detect().has_cmpxchg16b() {
+                    cmpxchg16b
+                } else {
+                    fallback::atomic_compare_exchange
+                }
+            })
+        }
+    };
+    #[cfg(not(target_arch = "x86_64"))]
+    // SAFETY: the caller must uphold the safety contract.
+    let (val, ok) = unsafe {
+        match (success, failure) {
+            (Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed_relaxed(dst, old, new),
+            (Relaxed, Acquire) => intrinsics::atomic_cxchg_relaxed_acquire(dst, old, new),
+            (Relaxed, SeqCst) => intrinsics::atomic_cxchg_relaxed_seqcst(dst, old, new),
+            (Acquire, Relaxed) => intrinsics::atomic_cxchg_acquire_relaxed(dst, old, new),
+            (Acquire, Acquire) => intrinsics::atomic_cxchg_acquire_acquire(dst, old, new),
+            (Acquire, SeqCst) => intrinsics::atomic_cxchg_acquire_seqcst(dst, old, new),
+            (Release, Relaxed) => intrinsics::atomic_cxchg_release_relaxed(dst, old, new),
+            (Release, Acquire) => intrinsics::atomic_cxchg_release_acquire(dst, old, new),
+            (Release, SeqCst) => intrinsics::atomic_cxchg_release_seqcst(dst, old, new),
+            (AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_relaxed(dst, old, new),
+            (AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel_acquire(dst, old, new),
+            (AcqRel, SeqCst) => intrinsics::atomic_cxchg_acqrel_seqcst(dst, old, new),
+            (SeqCst, Relaxed) => intrinsics::atomic_cxchg_seqcst_relaxed(dst, old, new),
+            (SeqCst, Acquire) => intrinsics::atomic_cxchg_seqcst_acquire(dst, old, new),
+            (SeqCst, SeqCst) => intrinsics::atomic_cxchg_seqcst_seqcst(dst, old, new),
+            _ => unreachable!("{:?}, {:?}", success, failure),
+        }
+    };
+    if ok {
+        Ok(val)
+    } else {
+        Err(val)
+    }
+}
+
+#[cfg(target_arch = "x86_64")]
+use atomic_compare_exchange as atomic_compare_exchange_weak;
+#[cfg(not(target_arch = "x86_64"))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_compare_exchange_weak(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> Result<u128, u128> {
+    // SAFETY: the caller must uphold the safety contract.
+    let (val, ok) = unsafe {
+        match (success, failure) {
+            (Relaxed, Relaxed) => intrinsics::atomic_cxchgweak_relaxed_relaxed(dst, old, new),
+            (Relaxed, Acquire) => intrinsics::atomic_cxchgweak_relaxed_acquire(dst, old, new),
+            (Relaxed, SeqCst) => intrinsics::atomic_cxchgweak_relaxed_seqcst(dst, old, new),
+            (Acquire, Relaxed) => intrinsics::atomic_cxchgweak_acquire_relaxed(dst, old, new),
+            (Acquire, Acquire) => intrinsics::atomic_cxchgweak_acquire_acquire(dst, old, new),
+            (Acquire, SeqCst) => intrinsics::atomic_cxchgweak_acquire_seqcst(dst, old, new),
+            (Release, Relaxed) => intrinsics::atomic_cxchgweak_release_relaxed(dst, old, new),
+            (Release, Acquire) => intrinsics::atomic_cxchgweak_release_acquire(dst, old, new),
+            (Release, SeqCst) => intrinsics::atomic_cxchgweak_release_seqcst(dst, old, new),
+            (AcqRel, Relaxed) => intrinsics::atomic_cxchgweak_acqrel_relaxed(dst, old, new),
+            (AcqRel, Acquire) => intrinsics::atomic_cxchgweak_acqrel_acquire(dst, old, new),
+            (AcqRel, SeqCst) => intrinsics::atomic_cxchgweak_acqrel_seqcst(dst, old, new),
+            (SeqCst, Relaxed) => intrinsics::atomic_cxchgweak_seqcst_relaxed(dst, old, new),
+            (SeqCst, Acquire) => intrinsics::atomic_cxchgweak_seqcst_acquire(dst, old, new),
+            (SeqCst, SeqCst) => intrinsics::atomic_cxchgweak_seqcst_seqcst(dst, old, new),
+            _ => unreachable!("{:?}, {:?}", success, failure),
+        }
+    };
+    if ok {
+        Ok(val)
+    } else {
+        Err(val)
+    }
+}
+
+#[inline(always)]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_update<F>(dst: *mut u128, order: Ordering, mut f: F) -> u128
+where
+    F: FnMut(u128) -> u128,
+{
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        // This is a private function and all instances of `f` only operate on the value
+        // loaded, so there is no need to synchronize the first load/failed CAS.
+        let mut prev = atomic_load(dst, Ordering::Relaxed);
+        loop {
+            let next = f(prev);
+            match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) {
+                Ok(x) => return x,
+                Err(x) => prev = x,
+            }
+        }
+    }
+}
+
+// On x86_64, we use core::arch::x86_64::cmpxchg16b instead of core::intrinsics.
+// On s390x, LLVM generates libcalls for operations other than load/store/cmpxchg (see also module-level comment).
+#[cfg(any(target_arch = "x86_64", target_arch = "s390x"))]
+atomic_rmw_by_atomic_update!();
+// On powerpc64, LLVM doesn't support 128-bit atomic min/max (see also module-level comment).
+#[cfg(target_arch = "powerpc64")]
+atomic_rmw_by_atomic_update!(cmp);
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_xchg_acquire(dst, val),
+            Release => intrinsics::atomic_xchg_release(dst, val),
+            AcqRel => intrinsics::atomic_xchg_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_xchg_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_xchg_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_add(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_xadd_acquire(dst, val),
+            Release => intrinsics::atomic_xadd_release(dst, val),
+            AcqRel => intrinsics::atomic_xadd_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_xadd_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_xadd_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_sub(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_xsub_acquire(dst, val),
+            Release => intrinsics::atomic_xsub_release(dst, val),
+            AcqRel => intrinsics::atomic_xsub_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_xsub_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_xsub_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_and_acquire(dst, val),
+            Release => intrinsics::atomic_and_release(dst, val),
+            AcqRel => intrinsics::atomic_and_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_and_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_and_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_nand(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_nand_acquire(dst, val),
+            Release => intrinsics::atomic_nand_release(dst, val),
+            AcqRel => intrinsics::atomic_nand_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_nand_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_nand_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_or_acquire(dst, val),
+            Release => intrinsics::atomic_or_release(dst, val),
+            AcqRel => intrinsics::atomic_or_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_or_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_or_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_xor(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_xor_acquire(dst, val),
+            Release => intrinsics::atomic_xor_release(dst, val),
+            AcqRel => intrinsics::atomic_xor_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_xor_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_xor_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_max(dst: *mut u128, val: u128, order: Ordering) -> i128 {
+    #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_max_acquire(dst.cast::<i128>(), val as i128),
+            Release => intrinsics::atomic_max_release(dst.cast::<i128>(), val as i128),
+            AcqRel => intrinsics::atomic_max_acqrel(dst.cast::<i128>(), val as i128),
+            Relaxed => intrinsics::atomic_max_relaxed(dst.cast::<i128>(), val as i128),
+            SeqCst => intrinsics::atomic_max_seqcst(dst.cast::<i128>(), val as i128),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_min(dst: *mut u128, val: u128, order: Ordering) -> i128 {
+    #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_min_acquire(dst.cast::<i128>(), val as i128),
+            Release => intrinsics::atomic_min_release(dst.cast::<i128>(), val as i128),
+            AcqRel => intrinsics::atomic_min_acqrel(dst.cast::<i128>(), val as i128),
+            Relaxed => intrinsics::atomic_min_relaxed(dst.cast::<i128>(), val as i128),
+            SeqCst => intrinsics::atomic_min_seqcst(dst.cast::<i128>(), val as i128),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_umax(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_umax_acquire(dst, val),
+            Release => intrinsics::atomic_umax_release(dst, val),
+            AcqRel => intrinsics::atomic_umax_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_umax_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_umax_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_umin(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        match order {
+            Acquire => intrinsics::atomic_umin_acquire(dst, val),
+            Release => intrinsics::atomic_umin_release(dst, val),
+            AcqRel => intrinsics::atomic_umin_acqrel(dst, val),
+            Relaxed => intrinsics::atomic_umin_relaxed(dst, val),
+            SeqCst => intrinsics::atomic_umin_seqcst(dst, val),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_not(dst: *mut u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe { atomic_xor(dst, !0, order) }
+}
+
+#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))]
+#[inline]
+#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+unsafe fn atomic_neg(dst: *mut u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe { atomic_update(dst, order, u128::wrapping_neg) }
+}
+
+#[cfg(not(target_arch = "x86_64"))]
+#[inline]
+const fn is_lock_free() -> bool {
+    IS_ALWAYS_LOCK_FREE
+}
+#[cfg(not(target_arch = "x86_64"))]
+const IS_ALWAYS_LOCK_FREE: bool = true;
+
+#[cfg(target_arch = "x86_64")]
+#[inline]
+fn is_lock_free() -> bool {
+    #[cfg(target_feature = "cmpxchg16b")]
+    {
+        // CMPXCHG16B is available at compile-time.
+        true
+    }
+    #[cfg(not(target_feature = "cmpxchg16b"))]
+    {
+        detect::detect().has_cmpxchg16b()
+    }
+}
+#[cfg(target_arch = "x86_64")]
+const IS_ALWAYS_LOCK_FREE: bool = cfg!(target_feature = "cmpxchg16b");
+
+atomic128!(AtomicI128, i128, atomic_max, atomic_min);
+atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    test_atomic_int!(i128);
+    test_atomic_int!(u128);
+
+    // load/store/swap implementation is not affected by signedness, so it is
+    // enough to test only unsigned types.
+    stress_test!(u128);
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/macros.rs b/vendor/portable-atomic/src/imp/atomic128/macros.rs
new file mode 100644
index 0000000..d32217e
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/macros.rs
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+macro_rules! atomic128 {
+    ($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => {
+        #[repr(C, align(16))]
+        pub(crate) struct $atomic_type {
+            v: core::cell::UnsafeCell<$int_type>,
+        }
+
+        // Send is implicitly implemented.
+        // SAFETY: any data races are prevented by atomic intrinsics.
+        unsafe impl Sync for $atomic_type {}
+
+        impl_default_no_fetch_ops!($atomic_type, $int_type);
+        impl_default_bit_opts!($atomic_type, $int_type);
+        impl $atomic_type {
+            #[inline]
+            pub(crate) const fn new(v: $int_type) -> Self {
+                Self { v: core::cell::UnsafeCell::new(v) }
+            }
+
+            #[inline]
+            pub(crate) fn is_lock_free() -> bool {
+                is_lock_free()
+            }
+            #[inline]
+            pub(crate) const fn is_always_lock_free() -> bool {
+                IS_ALWAYS_LOCK_FREE
+            }
+
+            #[inline]
+            pub(crate) fn get_mut(&mut self) -> &mut $int_type {
+                // SAFETY: the mutable reference guarantees unique ownership.
+                // (UnsafeCell::get_mut requires Rust 1.50)
+                unsafe { &mut *self.v.get() }
+            }
+
+            #[inline]
+            pub(crate) fn into_inner(self) -> $int_type {
+                self.v.into_inner()
+            }
+
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn load(&self, order: Ordering) -> $int_type {
+                crate::utils::assert_load_ordering(order);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_load(self.v.get().cast::<u128>(), order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn store(&self, val: $int_type, order: Ordering) {
+                crate::utils::assert_store_ordering(order);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_store(self.v.get().cast::<u128>(), val as u128, order) }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn swap(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_swap(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn compare_exchange(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                crate::utils::assert_compare_exchange_ordering(success, failure);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    match atomic_compare_exchange(
+                        self.v.get().cast::<u128>(),
+                        current as u128,
+                        new as u128,
+                        success,
+                        failure,
+                    ) {
+                        Ok(v) => Ok(v as $int_type),
+                        Err(v) => Err(v as $int_type),
+                    }
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(
+                any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
+                track_caller
+            )]
+            pub(crate) fn compare_exchange_weak(
+                &self,
+                current: $int_type,
+                new: $int_type,
+                success: Ordering,
+                failure: Ordering,
+            ) -> Result<$int_type, $int_type> {
+                crate::utils::assert_compare_exchange_ordering(success, failure);
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe {
+                    match atomic_compare_exchange_weak(
+                        self.v.get().cast::<u128>(),
+                        current as u128,
+                        new as u128,
+                        success,
+                        failure,
+                    ) {
+                        Ok(v) => Ok(v as $int_type),
+                        Err(v) => Err(v as $int_type),
+                    }
+                }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_add(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_add(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_sub(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_sub(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_and(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_nand(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_nand(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_or(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_xor(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { $atomic_max(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { $atomic_min(self.v.get().cast::<u128>(), val as u128, order) as $int_type }
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_not(self.v.get().cast::<u128>(), order) as $int_type }
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn not(&self, order: Ordering) {
+                self.fetch_not(order);
+            }
+
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn fetch_neg(&self, order: Ordering) -> $int_type {
+                // SAFETY: any data races are prevented by atomic intrinsics and the raw
+                // pointer passed in is valid because we got it from a reference.
+                unsafe { atomic_neg(self.v.get().cast::<u128>(), order) as $int_type }
+            }
+            #[inline]
+            #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+            pub(crate) fn neg(&self, order: Ordering) {
+                self.fetch_neg(order);
+            }
+
+            #[inline]
+            pub(crate) const fn as_ptr(&self) -> *mut $int_type {
+                self.v.get()
+            }
+        }
+    };
+}
+
+#[cfg(any(target_arch = "powerpc64", target_arch = "s390x", target_arch = "x86_64"))]
+#[allow(unused_macros)] // also used by intrinsics.rs
+macro_rules! atomic_rmw_by_atomic_update {
+    () => {
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |_| val) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_add(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| x.wrapping_add(val)) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_sub(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| x.wrapping_sub(val)) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| x & val) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_nand(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| !(x & val)) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| x | val) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_xor(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| x ^ val) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_not(dst: *mut u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| !x) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_neg(dst: *mut u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, u128::wrapping_neg) }
+        }
+        atomic_rmw_by_atomic_update!(cmp);
+    };
+    (cmp) => {
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_max(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                atomic_update(dst, order, |x| core::cmp::max(x as i128, val as i128) as u128)
+            }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_umax(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| core::cmp::max(x, val)) }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_min(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                atomic_update(dst, order, |x| core::cmp::min(x as i128, val as i128) as u128)
+            }
+        }
+        #[inline]
+        #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+        unsafe fn atomic_umin(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe { atomic_update(dst, order, |x| core::cmp::min(x, val)) }
+        }
+    };
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs b/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs
new file mode 100644
index 0000000..5edc147
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs
@@ -0,0 +1,947 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic{I,U}128 implementation on PowerPC64.
+//
+// powerpc64 on pwr8+ support 128-bit atomics:
+// https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445
+// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
+// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128.ll
+//
+// powerpc64le is pwr8+ by default https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/PowerPC/PPC.td#L663
+// See also https://github.com/rust-lang/rust/issues/59932
+//
+// Note that we do not separate LL and SC into separate functions, but handle
+// them within a single asm block. This is because it is theoretically possible
+// for the compiler to insert operations that might clear the reservation between
+// LL and SC. See aarch64.rs for details.
+//
+// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
+// this module and use intrinsics.rs instead.
+//
+// Refs:
+// - Power ISA https://openpowerfoundation.org/specifications/isa
+// - AIX Assembler language reference https://www.ibm.com/docs/en/aix/7.3?topic=aix-assembler-language-reference
+// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
+//
+// Generated asm:
+// - powerpc64 (pwr8) https://godbolt.org/z/nG5dGa38a
+// - powerpc64le https://godbolt.org/z/6c99s75e4
+
+include!("macros.rs");
+
+#[cfg(not(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+)))]
+#[path = "../fallback/outline_atomics.rs"]
+mod fallback;
+
+// On musl with static linking, it seems that getauxval is not always available.
+// See detect/auxv.rs for more.
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(any(test, portable_atomic_outline_atomics))] // TODO(powerpc64): currently disabled by default
+#[cfg(any(
+    test,
+    not(any(
+        target_feature = "quadword-atomics",
+        portable_atomic_target_feature = "quadword-atomics",
+    )),
+))]
+#[cfg(any(
+    all(
+        target_os = "linux",
+        any(
+            target_env = "gnu",
+            all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")),
+            portable_atomic_outline_atomics,
+        ),
+    ),
+    target_os = "android",
+    target_os = "freebsd",
+))]
+#[path = "detect/auxv.rs"]
+mod detect;
+
+use core::{arch::asm, sync::atomic::Ordering};
+
+use crate::utils::{Pair, U128};
+
+macro_rules! debug_assert_pwr8 {
+    () => {
+        #[cfg(not(any(
+            target_feature = "quadword-atomics",
+            portable_atomic_target_feature = "quadword-atomics",
+        )))]
+        {
+            debug_assert!(detect::detect().has_quadword_atomics());
+        }
+    };
+}
+
+// Refs: https://www.ibm.com/docs/en/aix/7.3?topic=ops-machine-pseudo-op
+//
+// This is similar to #[target_feature(enable = "quadword-atomics")], except that there are
+// no compiler guarantees regarding (un)inlining, and the scope is within an asm
+// block rather than a function. We use this directive because #[target_feature(enable = "quadword-atomics")]
+// is not supported as of Rust 1.70-nightly.
+//
+// start_pwr8 and end_pwr8 must be used in pairs.
+//
+// Note: If power8 instructions are not available at compile-time, we must guarantee that
+// the function that uses it is not inlined into a function where it is not
+// clear whether power8 instructions are available. Otherwise, (even if we checked whether
+// power8 instructions are available at run-time) optimizations that reorder its
+// instructions across the if condition might introduce undefined behavior.
+// (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts)
+// However, our code uses the ifunc helper macro that works with function pointers,
+// so we don't have to worry about this unless calling without helper macro.
+macro_rules! start_pwr8 {
+    () => {
+        ".machine push\n.machine power8"
+    };
+}
+macro_rules! end_pwr8 {
+    () => {
+        ".machine pop"
+    };
+}
+
+macro_rules! atomic_rmw {
+    ($op:ident, $order:ident) => {
+        match $order {
+            Ordering::Relaxed => $op!("", ""),
+            Ordering::Acquire => $op!("lwsync", ""),
+            Ordering::Release => $op!("", "lwsync"),
+            Ordering::AcqRel => $op!("lwsync", "lwsync"),
+            Ordering::SeqCst => $op!("lwsync", "sync"),
+            _ => unreachable!("{:?}", $order),
+        }
+    };
+}
+
+// Extracts and checks the EQ bit of cr0.
+#[inline]
+fn extract_cr0(r: u64) -> bool {
+    r & 0x20000000 != 0
+}
+
+#[cfg(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+))]
+use atomic_load_pwr8 as atomic_load;
+#[cfg(not(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+)))]
+#[inline]
+unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
+    fn_alias! {
+        // inline(never) is just a hint and also not strictly necessary
+        // because we use ifunc helper macro, but used for clarity.
+        #[inline(never)]
+        unsafe fn(src: *mut u128) -> u128;
+        atomic_load_pwr8_relaxed = atomic_load_pwr8(Ordering::Relaxed);
+        atomic_load_pwr8_acquire = atomic_load_pwr8(Ordering::Acquire);
+        atomic_load_pwr8_seqcst = atomic_load_pwr8(Ordering::SeqCst);
+    }
+    // SAFETY: the caller must uphold the safety contract.
+    // we only calls atomic_load_pwr8 if quadword-atomics is available.
+    unsafe {
+        match order {
+            Ordering::Relaxed => {
+                ifunc!(unsafe fn(src: *mut u128) -> u128 {
+                    if detect::detect().has_quadword_atomics() {
+                        atomic_load_pwr8_relaxed
+                    } else {
+                        fallback::atomic_load_non_seqcst
+                    }
+                })
+            }
+            Ordering::Acquire => {
+                ifunc!(unsafe fn(src: *mut u128) -> u128 {
+                    if detect::detect().has_quadword_atomics() {
+                        atomic_load_pwr8_acquire
+                    } else {
+                        fallback::atomic_load_non_seqcst
+                    }
+                })
+            }
+            Ordering::SeqCst => {
+                ifunc!(unsafe fn(src: *mut u128) -> u128 {
+                    if detect::detect().has_quadword_atomics() {
+                        atomic_load_pwr8_seqcst
+                    } else {
+                        fallback::atomic_load_seqcst
+                    }
+                })
+            }
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+#[inline]
+unsafe fn atomic_load_pwr8(src: *mut u128, order: Ordering) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+    debug_assert_pwr8!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    //
+    // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA
+    unsafe {
+        let (out_hi, out_lo);
+        macro_rules! atomic_load_acquire {
+            ($release:tt) => {
+                asm!(
+                    start_pwr8!(),
+                    $release,
+                    "lq %r4, 0({src})",
+                    // Lightweight acquire sync
+                    // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62
+                    "cmpd %cr7, %r4, %r4",
+                    "bne- %cr7, 2f",
+                    "2:",
+                    "isync",
+                    end_pwr8!(),
+                    src = in(reg_nonzero) ptr_reg!(src),
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                    out("r4") out_hi,
+                    out("r5") out_lo,
+                    out("cr7") _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        match order {
+            Ordering::Relaxed => {
+                asm!(
+                    start_pwr8!(),
+                    "lq %r4, 0({src})",
+                    end_pwr8!(),
+                    src = in(reg_nonzero) ptr_reg!(src),
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                    out("r4") out_hi,
+                    out("r5") out_lo,
+                    options(nostack, preserves_flags, readonly),
+                );
+            }
+            Ordering::Acquire => atomic_load_acquire!(""),
+            Ordering::SeqCst => atomic_load_acquire!("sync"),
+            _ => unreachable!("{:?}", order),
+        }
+        U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
+    }
+}
+
+#[cfg(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+))]
+use atomic_store_pwr8 as atomic_store;
+#[cfg(not(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+)))]
+#[inline]
+unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
+    fn_alias! {
+        // inline(never) is just a hint and also not strictly necessary
+        // because we use ifunc helper macro, but used for clarity.
+        #[inline(never)]
+        unsafe fn(dst: *mut u128, val: u128);
+        atomic_store_pwr8_relaxed = atomic_store_pwr8(Ordering::Relaxed);
+        atomic_store_pwr8_release = atomic_store_pwr8(Ordering::Release);
+        atomic_store_pwr8_seqcst = atomic_store_pwr8(Ordering::SeqCst);
+    }
+    // SAFETY: the caller must uphold the safety contract.
+    // we only calls atomic_store_pwr8 if quadword-atomics is available.
+    unsafe {
+        match order {
+            Ordering::Relaxed => {
+                ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                    if detect::detect().has_quadword_atomics() {
+                        atomic_store_pwr8_relaxed
+                    } else {
+                        fallback::atomic_store_non_seqcst
+                    }
+                });
+            }
+            Ordering::Release => {
+                ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                    if detect::detect().has_quadword_atomics() {
+                        atomic_store_pwr8_release
+                    } else {
+                        fallback::atomic_store_non_seqcst
+                    }
+                });
+            }
+            Ordering::SeqCst => {
+                ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                    if detect::detect().has_quadword_atomics() {
+                        atomic_store_pwr8_seqcst
+                    } else {
+                        fallback::atomic_store_seqcst
+                    }
+                });
+            }
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+#[inline]
+unsafe fn atomic_store_pwr8(dst: *mut u128, val: u128, order: Ordering) {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_pwr8!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    //
+    // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA
+    unsafe {
+        let val = U128 { whole: val };
+        macro_rules! atomic_store {
+            ($release:tt) => {
+                asm!(
+                    start_pwr8!(),
+                    $release,
+                    "stq %r4, 0({dst})",
+                    end_pwr8!(),
+                    dst = in(reg_nonzero) ptr_reg!(dst),
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                    in("r4") val.pair.hi,
+                    in("r5") val.pair.lo,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        match order {
+            Ordering::Relaxed => atomic_store!(""),
+            Ordering::Release => atomic_store!("lwsync"),
+            Ordering::SeqCst => atomic_store!("sync"),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[inline]
+unsafe fn atomic_compare_exchange(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> Result<u128, u128> {
+    let success = crate::utils::upgrade_success_ordering(success, failure);
+
+    #[cfg(any(
+        target_feature = "quadword-atomics",
+        portable_atomic_target_feature = "quadword-atomics",
+    ))]
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantees that quadword atomics instructions are available at compile-time.
+    let (prev, ok) = unsafe { atomic_compare_exchange_pwr8(dst, old, new, success) };
+    #[cfg(not(any(
+        target_feature = "quadword-atomics",
+        portable_atomic_target_feature = "quadword-atomics",
+    )))]
+    // SAFETY: the caller must uphold the safety contract.
+    let (prev, ok) = unsafe { atomic_compare_exchange_ifunc(dst, old, new, success) };
+    if ok {
+        Ok(prev)
+    } else {
+        Err(prev)
+    }
+}
+#[inline]
+unsafe fn atomic_compare_exchange_pwr8(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    order: Ordering,
+) -> (u128, bool) {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_pwr8!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    //
+    // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA
+    unsafe {
+        let old = U128 { whole: old };
+        let new = U128 { whole: new };
+        let (mut prev_hi, mut prev_lo);
+        let mut r;
+        macro_rules! cmpxchg {
+            ($acquire:tt, $release:tt) => {
+                asm!(
+                    start_pwr8!(),
+                    $release,
+                    "2:",
+                        "lqarx %r8, 0, {dst}",
+                        "xor {tmp_lo}, %r9, {old_lo}",
+                        "xor {tmp_hi}, %r8, {old_hi}",
+                        "or. {tmp_lo}, {tmp_lo}, {tmp_hi}",
+                        "bne %cr0, 3f", // jump if compare failed
+                        "stqcx. %r6, 0, {dst}",
+                        "bne %cr0, 2b", // continue loop if store failed
+                    "3:",
+                    // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set.
+                    "mfcr {tmp_lo}",
+                    $acquire,
+                    end_pwr8!(),
+                    dst = in(reg_nonzero) ptr_reg!(dst),
+                    old_hi = in(reg) old.pair.hi,
+                    old_lo = in(reg) old.pair.lo,
+                    tmp_hi = out(reg) _,
+                    tmp_lo = out(reg) r,
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                    in("r6") new.pair.hi,
+                    in("r7") new.pair.lo,
+                    out("r8") prev_hi,
+                    out("r9") prev_lo,
+                    out("cr0") _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(cmpxchg, order);
+        (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r))
+    }
+}
+
+// Always use strong CAS for outline-atomics.
+#[cfg(not(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+)))]
+use atomic_compare_exchange as atomic_compare_exchange_weak;
+#[cfg(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+))]
+#[inline]
+unsafe fn atomic_compare_exchange_weak(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    success: Ordering,
+    failure: Ordering,
+) -> Result<u128, u128> {
+    let success = crate::utils::upgrade_success_ordering(success, failure);
+
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantees that quadword atomics instructions are available at compile-time.
+    let (prev, ok) = unsafe { atomic_compare_exchange_weak_pwr8(dst, old, new, success) };
+    if ok {
+        Ok(prev)
+    } else {
+        Err(prev)
+    }
+}
+#[cfg(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+))]
+#[inline]
+unsafe fn atomic_compare_exchange_weak_pwr8(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    order: Ordering,
+) -> (u128, bool) {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_pwr8!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    //
+    // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA
+    unsafe {
+        let old = U128 { whole: old };
+        let new = U128 { whole: new };
+        let (mut prev_hi, mut prev_lo);
+        let mut r;
+        macro_rules! cmpxchg_weak {
+            ($acquire:tt, $release:tt) => {
+                asm!(
+                    start_pwr8!(),
+                    $release,
+                    "lqarx %r8, 0, {dst}",
+                    "xor {tmp_lo}, %r9, {old_lo}",
+                    "xor {tmp_hi}, %r8, {old_hi}",
+                    "or. {tmp_lo}, {tmp_lo}, {tmp_hi}",
+                    "bne %cr0, 3f", // jump if compare failed
+                    "stqcx. %r6, 0, {dst}",
+                    "3:",
+                    // if compare or stqcx failed EQ bit is cleared, if stqcx succeeds EQ bit is set.
+                    "mfcr {tmp_lo}",
+                    $acquire,
+                    end_pwr8!(),
+                    dst = in(reg_nonzero) ptr_reg!(dst),
+                    old_hi = in(reg) old.pair.hi,
+                    old_lo = in(reg) old.pair.lo,
+                    tmp_hi = out(reg) _,
+                    tmp_lo = out(reg) r,
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                    in("r6") new.pair.hi,
+                    in("r7") new.pair.lo,
+                    out("r8") prev_hi,
+                    out("r9") prev_lo,
+                    out("cr0") _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(cmpxchg_weak, order);
+        (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r))
+    }
+}
+
+#[cfg(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+))]
+use atomic_swap_pwr8 as atomic_swap;
+// Do not use atomic_rmw_ll_sc_3 because it needs extra MR to implement swap.
+#[inline]
+unsafe fn atomic_swap_pwr8(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_pwr8!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let val = U128 { whole: val };
+        let (mut prev_hi, mut prev_lo);
+        macro_rules! swap {
+            ($acquire:tt, $release:tt) => {
+                asm!(
+                    start_pwr8!(),
+                    $release,
+                    "2:",
+                        "lqarx %r6, 0, {dst}",
+                        "stqcx. %r8, 0, {dst}",
+                        "bne %cr0, 2b",
+                    $acquire,
+                    end_pwr8!(),
+                    dst = in(reg_nonzero) ptr_reg!(dst),
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                    out("r6") prev_hi,
+                    out("r7") prev_lo,
+                    in("r8") val.pair.hi,
+                    in("r9") val.pair.lo,
+                    out("cr0") _,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        atomic_rmw!(swap, order);
+        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+    }
+}
+
+/// Atomic RMW by LL/SC loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// $op can use the following registers:
+/// - val_hi/val_lo pair: val argument (read-only for `$op`)
+/// - r6/r7 pair: previous value loaded by ll (read-only for `$op`)
+/// - r8/r9 pair: new value that will be stored by sc
+macro_rules! atomic_rmw_ll_sc_3 {
+    ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => {
+        #[cfg(any(
+            target_feature = "quadword-atomics",
+            portable_atomic_target_feature = "quadword-atomics",
+        ))]
+        use $name as $reexport_name;
+        #[inline]
+        unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            debug_assert_pwr8!();
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                let val = U128 { whole: val };
+                let (mut prev_hi, mut prev_lo);
+                macro_rules! op {
+                    ($acquire:tt, $release:tt) => {
+                        asm!(
+                            start_pwr8!(),
+                            $release,
+                            "2:",
+                                "lqarx %r6, 0, {dst}",
+                                $($op)*
+                                "stqcx. %r8, 0, {dst}",
+                                "bne %cr0, 2b",
+                            $acquire,
+                            end_pwr8!(),
+                            dst = in(reg_nonzero) ptr_reg!(dst),
+                            val_hi = in(reg) val.pair.hi,
+                            val_lo = in(reg) val.pair.lo,
+                            $($reg)*
+                            // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                            // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                            out("r6") prev_hi,
+                            out("r7") prev_lo,
+                            out("r8") _, // new (hi)
+                            out("r9") _, // new (lo)
+                            out("cr0") _,
+                            options(nostack, preserves_flags),
+                        )
+                    };
+                }
+                atomic_rmw!(op, order);
+                U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+            }
+        }
+    };
+}
+/// Atomic RMW by LL/SC loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// $op can use the following registers:
+/// - r6/r7 pair: previous value loaded by ll (read-only for `$op`)
+/// - r8/r9 pair: new value that will be stored by sc
+macro_rules! atomic_rmw_ll_sc_2 {
+    ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => {
+        #[cfg(any(
+            target_feature = "quadword-atomics",
+            portable_atomic_target_feature = "quadword-atomics",
+        ))]
+        use $name as $reexport_name;
+        #[inline]
+        unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            debug_assert_pwr8!();
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                let (mut prev_hi, mut prev_lo);
+                macro_rules! op {
+                    ($acquire:tt, $release:tt) => {
+                        asm!(
+                            start_pwr8!(),
+                            $release,
+                            "2:",
+                                "lqarx %r6, 0, {dst}",
+                                $($op)*
+                                "stqcx. %r8, 0, {dst}",
+                                "bne %cr0, 2b",
+                            $acquire,
+                            end_pwr8!(),
+                            dst = in(reg_nonzero) ptr_reg!(dst),
+                            $($reg)*
+                            // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                            // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater.
+                            out("r6") prev_hi,
+                            out("r7") prev_lo,
+                            out("r8") _, // new (hi)
+                            out("r9") _, // new (lo)
+                            out("cr0") _,
+                            options(nostack, preserves_flags),
+                        )
+                    };
+                }
+                atomic_rmw!(op, order);
+                U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+            }
+        }
+    };
+}
+
+atomic_rmw_ll_sc_3! {
+    atomic_add_pwr8 as atomic_add, [out("xer") _,],
+    "addc %r9, {val_lo}, %r7",
+    "adde %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+    atomic_sub_pwr8 as atomic_sub, [out("xer") _,],
+    "subc %r9, %r7, {val_lo}",
+    "subfe %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+    atomic_and_pwr8 as atomic_and, [],
+    "and %r9, {val_lo}, %r7",
+    "and %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+    atomic_nand_pwr8 as atomic_nand, [],
+    "nand %r9, {val_lo}, %r7",
+    "nand %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+    atomic_or_pwr8 as atomic_or, [],
+    "or %r9, {val_lo}, %r7",
+    "or %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+    atomic_xor_pwr8 as atomic_xor, [],
+    "xor %r9, {val_lo}, %r7",
+    "xor %r8, {val_hi}, %r6",
+}
+atomic_rmw_ll_sc_3! {
+    atomic_max_pwr8 as atomic_max, [out("cr1") _,],
+    "cmpld %r7, {val_lo}",        // (unsigned) compare lo 64-bit, store result to cr0
+    "iselgt %r9, %r7, {val_lo}",  // select lo 64-bit based on GT bit in cr0
+    "cmpd %cr1, %r6, {val_hi}",   // (signed) compare hi 64-bit, store result to cr1
+    "isel %r8, %r7, {val_lo}, 5", // select lo 64-bit based on GT bit in cr1
+    "cmpld %r6, {val_hi}",        // (unsigned) compare hi 64-bit, store result to cr0
+    "iseleq %r9, %r9, %r8",       // select lo 64-bit based on EQ bit in cr0
+    "isel %r8, %r6, {val_hi}, 5", // select hi 64-bit based on GT bit in cr1
+}
+atomic_rmw_ll_sc_3! {
+    atomic_umax_pwr8 as atomic_umax, [],
+    "cmpld %r7, {val_lo}",       // compare lo 64-bit, store result to cr0
+    "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0
+    "cmpld %r6, {val_hi}",       // compare hi 64-bit, store result to cr0
+    "iselgt %r8, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0
+    "iseleq %r9, %r9, %r8",      // select lo 64-bit based on EQ bit in cr0
+    "iselgt %r8, %r6, {val_hi}", // select hi 64-bit based on GT bit in cr0
+}
+atomic_rmw_ll_sc_3! {
+    atomic_min_pwr8 as atomic_min, [out("cr1") _,],
+    "cmpld %r7, {val_lo}",        // (unsigned) compare lo 64-bit, store result to cr0
+    "isellt %r9, %r7, {val_lo}",  // select lo 64-bit based on LT bit in cr0
+    "cmpd %cr1, %r6, {val_hi}",   // (signed) compare hi 64-bit, store result to cr1
+    "isel %r8, %r7, {val_lo}, 4", // select lo 64-bit based on LT bit in cr1
+    "cmpld %r6, {val_hi}",        // (unsigned) compare hi 64-bit, store result to cr0
+    "iseleq %r9, %r9, %r8",       // select lo 64-bit based on EQ bit in cr0
+    "isel %r8, %r6, {val_hi}, 4", // select hi 64-bit based on LT bit in cr1
+}
+atomic_rmw_ll_sc_3! {
+    atomic_umin_pwr8 as atomic_umin, [],
+    "cmpld %r7, {val_lo}",       // compare lo 64-bit, store result to cr0
+    "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0
+    "cmpld %r6, {val_hi}",       // compare hi 64-bit, store result to cr0
+    "isellt %r8, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0
+    "iseleq %r9, %r9, %r8",      // select lo 64-bit based on EQ bit in cr0
+    "isellt %r8, %r6, {val_hi}", // select hi 64-bit based on LT bit in cr0
+}
+
+#[cfg(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+))]
+use atomic_not_pwr8 as atomic_not;
+#[inline]
+unsafe fn atomic_not_pwr8(dst: *mut u128, order: Ordering) -> u128 {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe { atomic_xor_pwr8(dst, !0, order) }
+}
+
+#[cfg(portable_atomic_llvm_16)]
+atomic_rmw_ll_sc_2! {
+    atomic_neg_pwr8 as atomic_neg, [out("xer") _,],
+    "subfic %r9, %r7, 0",
+    "subfze %r8, %r6",
+}
+// LLVM 15 miscompiles subfic.
+#[cfg(not(portable_atomic_llvm_16))]
+atomic_rmw_ll_sc_2! {
+    atomic_neg_pwr8 as atomic_neg, [zero = in(reg) 0_u64, out("xer") _,],
+    "subc %r9, {zero}, %r7",
+    "subfze %r8, %r6",
+}
+
+macro_rules! atomic_rmw_with_ifunc {
+    (
+        unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?;
+        pwr8 = $pwr8_fn:ident;
+        non_seqcst_fallback = $non_seqcst_fallback_fn:ident;
+        seqcst_fallback = $seqcst_fallback_fn:ident;
+    ) => {
+        #[cfg(not(any(
+            target_feature = "quadword-atomics",
+            portable_atomic_target_feature = "quadword-atomics",
+        )))]
+        #[inline]
+        unsafe fn $name($($arg)*, order: Ordering) $(-> $ret_ty)? {
+            fn_alias! {
+                // inline(never) is just a hint and also not strictly necessary
+                // because we use ifunc helper macro, but used for clarity.
+                #[inline(never)]
+                unsafe fn($($arg)*) $(-> $ret_ty)?;
+                pwr8_relaxed_fn = $pwr8_fn(Ordering::Relaxed);
+                pwr8_acquire_fn = $pwr8_fn(Ordering::Acquire);
+                pwr8_release_fn = $pwr8_fn(Ordering::Release);
+                pwr8_acqrel_fn = $pwr8_fn(Ordering::AcqRel);
+                pwr8_seqcst_fn = $pwr8_fn(Ordering::SeqCst);
+            }
+            // SAFETY: the caller must uphold the safety contract.
+            // we only calls pwr8_fn if quadword-atomics is available.
+            unsafe {
+                match order {
+                    Ordering::Relaxed => {
+                        ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+                            if detect::detect().has_quadword_atomics() {
+                                pwr8_relaxed_fn
+                            } else {
+                                fallback::$non_seqcst_fallback_fn
+                            }
+                        })
+                    }
+                    Ordering::Acquire => {
+                        ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+                            if detect::detect().has_quadword_atomics() {
+                                pwr8_acquire_fn
+                            } else {
+                                fallback::$non_seqcst_fallback_fn
+                            }
+                        })
+                    }
+                    Ordering::Release => {
+                        ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+                            if detect::detect().has_quadword_atomics() {
+                                pwr8_release_fn
+                            } else {
+                                fallback::$non_seqcst_fallback_fn
+                            }
+                        })
+                    }
+                    Ordering::AcqRel => {
+                        ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+                            if detect::detect().has_quadword_atomics() {
+                                pwr8_acqrel_fn
+                            } else {
+                                fallback::$non_seqcst_fallback_fn
+                            }
+                        })
+                    }
+                    Ordering::SeqCst => {
+                        ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+                            if detect::detect().has_quadword_atomics() {
+                                pwr8_seqcst_fn
+                            } else {
+                                fallback::$seqcst_fallback_fn
+                            }
+                        })
+                    }
+                    _ => unreachable!("{:?}", order),
+                }
+            }
+        }
+    };
+}
+
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_compare_exchange_ifunc(dst: *mut u128, old: u128, new: u128) -> (u128, bool);
+    pwr8 = atomic_compare_exchange_pwr8;
+    non_seqcst_fallback = atomic_compare_exchange_non_seqcst;
+    seqcst_fallback = atomic_compare_exchange_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_swap_pwr8;
+    non_seqcst_fallback = atomic_swap_non_seqcst;
+    seqcst_fallback = atomic_swap_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_add_pwr8;
+    non_seqcst_fallback = atomic_add_non_seqcst;
+    seqcst_fallback = atomic_add_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_sub_pwr8;
+    non_seqcst_fallback = atomic_sub_non_seqcst;
+    seqcst_fallback = atomic_sub_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_and_pwr8;
+    non_seqcst_fallback = atomic_and_non_seqcst;
+    seqcst_fallback = atomic_and_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_nand_pwr8;
+    non_seqcst_fallback = atomic_nand_non_seqcst;
+    seqcst_fallback = atomic_nand_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_or_pwr8;
+    non_seqcst_fallback = atomic_or_non_seqcst;
+    seqcst_fallback = atomic_or_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_xor_pwr8;
+    non_seqcst_fallback = atomic_xor_non_seqcst;
+    seqcst_fallback = atomic_xor_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_max_pwr8;
+    non_seqcst_fallback = atomic_max_non_seqcst;
+    seqcst_fallback = atomic_max_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_umax_pwr8;
+    non_seqcst_fallback = atomic_umax_non_seqcst;
+    seqcst_fallback = atomic_umax_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_min_pwr8;
+    non_seqcst_fallback = atomic_min_non_seqcst;
+    seqcst_fallback = atomic_min_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128;
+    pwr8 = atomic_umin_pwr8;
+    non_seqcst_fallback = atomic_umin_non_seqcst;
+    seqcst_fallback = atomic_umin_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_not(dst: *mut u128) -> u128;
+    pwr8 = atomic_not_pwr8;
+    non_seqcst_fallback = atomic_not_non_seqcst;
+    seqcst_fallback = atomic_not_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_neg(dst: *mut u128) -> u128;
+    pwr8 = atomic_neg_pwr8;
+    non_seqcst_fallback = atomic_neg_non_seqcst;
+    seqcst_fallback = atomic_neg_seqcst;
+}
+
+#[inline]
+fn is_lock_free() -> bool {
+    #[cfg(any(
+        target_feature = "quadword-atomics",
+        portable_atomic_target_feature = "quadword-atomics",
+    ))]
+    {
+        // lqarx and stqcx. instructions are statically available.
+        true
+    }
+    #[cfg(not(any(
+        target_feature = "quadword-atomics",
+        portable_atomic_target_feature = "quadword-atomics",
+    )))]
+    {
+        detect::detect().has_quadword_atomics()
+    }
+}
+const IS_ALWAYS_LOCK_FREE: bool = cfg!(any(
+    target_feature = "quadword-atomics",
+    portable_atomic_target_feature = "quadword-atomics",
+));
+
+atomic128!(AtomicI128, i128, atomic_max, atomic_min);
+atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    test_atomic_int!(i128);
+    test_atomic_int!(u128);
+
+    // load/store/swap implementation is not affected by signedness, so it is
+    // enough to test only unsigned types.
+    stress_test!(u128);
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/s390x.rs b/vendor/portable-atomic/src/imp/atomic128/s390x.rs
new file mode 100644
index 0000000..37c2063
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/s390x.rs
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic{I,U}128 implementation on s390x.
+//
+// s390x supports 128-bit atomic load/store/cmpxchg:
+// https://github.com/llvm/llvm-project/commit/a11f63a952664f700f076fd754476a2b9eb158cc
+//
+// LLVM's minimal supported architecture level is z10:
+// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/SystemZ/SystemZProcessors.td)
+// This does not appear to have changed since the current s390x backend was added in LLVM 3.3:
+// https://github.com/llvm/llvm-project/commit/5f613dfd1f7edb0ae95d521b7107b582d9df5103#diff-cbaef692b3958312e80fd5507a7e2aff071f1acb086f10e8a96bc06a7bb289db
+//
+// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
+// this module and use intrinsics.rs instead.
+//
+// Refs:
+// - z/Architecture Principles of Operation https://publibfp.dhe.ibm.com/epubs/pdf/a227832d.pdf
+// - z/Architecture Reference Summary https://www.ibm.com/support/pages/zarchitecture-reference-summary
+// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
+//
+// Generated asm:
+// - s390x https://godbolt.org/z/b11znnEh4
+// - s390x (z196) https://godbolt.org/z/s5n9PGcv6
+// - s390x (z15) https://godbolt.org/z/Wf49h7bPf
+
+include!("macros.rs");
+
+use core::{arch::asm, sync::atomic::Ordering};
+
+use crate::utils::{Pair, U128};
+
+// Use distinct operands on z196 or later, otherwise split to lgr and $op.
+#[cfg(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops"))]
+macro_rules! distinct_op {
+    ($op:tt, $a0:tt, $a1:tt, $a2:tt) => {
+        concat!($op, "k ", $a0, ", ", $a1, ", ", $a2)
+    };
+}
+#[cfg(not(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops")))]
+macro_rules! distinct_op {
+    ($op:tt, $a0:tt, $a1:tt, $a2:tt) => {
+        concat!("lgr ", $a0, ", ", $a1, "\n", $op, " ", $a0, ", ", $a2)
+    };
+}
+
+// Use selgr$cond on z15 or later, otherwise split to locgr$cond and $op.
+#[cfg(any(
+    target_feature = "miscellaneous-extensions-3",
+    portable_atomic_target_feature = "miscellaneous-extensions-3",
+))]
+#[cfg(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+))]
+macro_rules! select_op {
+    ($cond:tt, $a0:tt, $a1:tt, $a2:tt) => {
+        concat!("selgr", $cond, " ", $a0, ", ", $a1, ", ", $a2)
+    };
+}
+#[cfg(not(any(
+    target_feature = "miscellaneous-extensions-3",
+    portable_atomic_target_feature = "miscellaneous-extensions-3",
+)))]
+#[cfg(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+))]
+macro_rules! select_op {
+    ($cond:tt, $a0:tt, $a1:tt, $a2:tt) => {
+        concat!("lgr ", $a0, ", ", $a2, "\n", "locgr", $cond, " ", $a0, ", ", $a1)
+    };
+}
+
+#[inline]
+unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        // atomic load is always SeqCst.
+        let (out_hi, out_lo);
+        asm!(
+            "lpq %r0, 0({src})",
+            src = in(reg) ptr_reg!(src),
+            // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+            out("r0") out_hi,
+            out("r1") out_lo,
+            options(nostack, preserves_flags),
+        );
+        U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
+    }
+}
+
+#[inline]
+unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let val = U128 { whole: val };
+        macro_rules! atomic_store {
+            ($fence:tt) => {
+                asm!(
+                    "stpq %r0, 0({dst})",
+                    $fence,
+                    dst = in(reg) ptr_reg!(dst),
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    in("r0") val.pair.hi,
+                    in("r1") val.pair.lo,
+                    options(nostack, preserves_flags),
+                )
+            };
+        }
+        match order {
+            // Relaxed and Release stores are equivalent.
+            Ordering::Relaxed | Ordering::Release => atomic_store!(""),
+            // bcr 14,0 (fast-BCR-serialization) requires z196 or later.
+            #[cfg(any(
+                target_feature = "fast-serialization",
+                portable_atomic_target_feature = "fast-serialization",
+            ))]
+            Ordering::SeqCst => atomic_store!("bcr 14, 0"),
+            #[cfg(not(any(
+                target_feature = "fast-serialization",
+                portable_atomic_target_feature = "fast-serialization",
+            )))]
+            Ordering::SeqCst => atomic_store!("bcr 15, 0"),
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[inline]
+unsafe fn atomic_compare_exchange(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    _success: Ordering,
+    _failure: Ordering,
+) -> Result<u128, u128> {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    let prev = unsafe {
+        // atomic CAS is always SeqCst.
+        let old = U128 { whole: old };
+        let new = U128 { whole: new };
+        let (prev_hi, prev_lo);
+        asm!(
+            "cdsg %r0, %r12, 0({dst})",
+            dst = in(reg) ptr_reg!(dst),
+            // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+            inout("r0") old.pair.hi => prev_hi,
+            inout("r1") old.pair.lo => prev_lo,
+            in("r12") new.pair.hi,
+            in("r13") new.pair.lo,
+            // Do not use `preserves_flags` because CDSG modifies the condition code.
+            options(nostack),
+        );
+        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+    };
+    if prev == old {
+        Ok(prev)
+    } else {
+        Err(prev)
+    }
+}
+
+// cdsg is always strong.
+use atomic_compare_exchange as atomic_compare_exchange_weak;
+
+#[cfg(not(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+)))]
+#[inline(always)]
+unsafe fn atomic_update<F>(dst: *mut u128, order: Ordering, mut f: F) -> u128
+where
+    F: FnMut(u128) -> u128,
+{
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        // This is a private function and all instances of `f` only operate on the value
+        // loaded, so there is no need to synchronize the first load/failed CAS.
+        let mut prev = atomic_load(dst, Ordering::Relaxed);
+        loop {
+            let next = f(prev);
+            match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) {
+                Ok(x) => return x,
+                Err(x) => prev = x,
+            }
+        }
+    }
+}
+
+#[inline]
+unsafe fn atomic_swap(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+
+    // SAFETY: the caller must uphold the safety contract.
+    //
+    // We could use atomic_update here, but using an inline assembly allows omitting
+    // the comparison of results and the storing/comparing of condition flags.
+    //
+    // Do not use atomic_rmw_cas_3 because it needs extra LGR to implement swap.
+    unsafe {
+        // atomic swap is always SeqCst.
+        let val = U128 { whole: val };
+        let (mut prev_hi, mut prev_lo);
+        asm!(
+            "lpq %r0, 0({dst})",
+            "2:",
+                "cdsg %r0, %r12, 0({dst})",
+                "jl 2b",
+            dst = in(reg) ptr_reg!(dst),
+            // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+            out("r0") prev_hi,
+            out("r1") prev_lo,
+            in("r12") val.pair.hi,
+            in("r13") val.pair.lo,
+            // Do not use `preserves_flags` because CDSG modifies the condition code.
+            options(nostack),
+        );
+        U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+    }
+}
+
+/// Atomic RMW by CAS loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - val_hi/val_lo pair: val argument (read-only for `$op`)
+/// - r0/r1 pair: previous value loaded (read-only for `$op`)
+/// - r12/r13 pair: new value that will be stored
+// We could use atomic_update here, but using an inline assembly allows omitting
+// the comparison of results and the storing/comparing of condition flags.
+macro_rules! atomic_rmw_cas_3 {
+    ($name:ident, [$($reg:tt)*], $($op:tt)*) => {
+        #[inline]
+        unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                // atomic RMW is always SeqCst.
+                let val = U128 { whole: val };
+                let (mut prev_hi, mut prev_lo);
+                asm!(
+                    "lpq %r0, 0({dst})",
+                    "2:",
+                        $($op)*
+                        "cdsg %r0, %r12, 0({dst})",
+                        "jl 2b",
+                    dst = in(reg) ptr_reg!(dst),
+                    val_hi = in(reg) val.pair.hi,
+                    val_lo = in(reg) val.pair.lo,
+                    $($reg)*
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    out("r0") prev_hi,
+                    out("r1") prev_lo,
+                    out("r12") _,
+                    out("r13") _,
+                    // Do not use `preserves_flags` because CDSG modifies the condition code.
+                    options(nostack),
+                );
+                U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+            }
+        }
+    };
+}
+/// Atomic RMW by CAS loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - r0/r1 pair: previous value loaded (read-only for `$op`)
+/// - r12/r13 pair: new value that will be stored
+// We could use atomic_update here, but using an inline assembly allows omitting
+// the comparison of results and the storing/comparing of condition flags.
+macro_rules! atomic_rmw_cas_2 {
+    ($name:ident, $($op:tt)*) => {
+        #[inline]
+        unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            // SAFETY: the caller must uphold the safety contract.
+            unsafe {
+                // atomic RMW is always SeqCst.
+                let (mut prev_hi, mut prev_lo);
+                asm!(
+                    "lpq %r0, 0({dst})",
+                    "2:",
+                        $($op)*
+                        "cdsg %r0, %r12, 0({dst})",
+                        "jl 2b",
+                    dst = in(reg) ptr_reg!(dst),
+                    // Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
+                    out("r0") prev_hi,
+                    out("r1") prev_lo,
+                    out("r12") _,
+                    out("r13") _,
+                    // Do not use `preserves_flags` because CDSG modifies the condition code.
+                    options(nostack),
+                );
+                U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
+            }
+        }
+    };
+}
+
+atomic_rmw_cas_3! {
+    atomic_add, [],
+    distinct_op!("algr", "%r13", "%r1", "{val_lo}"),
+    "lgr %r12, %r0",
+    "alcgr %r12, {val_hi}",
+}
+atomic_rmw_cas_3! {
+    atomic_sub, [],
+    distinct_op!("slgr", "%r13", "%r1", "{val_lo}"),
+    "lgr %r12, %r0",
+    "slbgr %r12, {val_hi}",
+}
+atomic_rmw_cas_3! {
+    atomic_and, [],
+    distinct_op!("ngr", "%r13", "%r1", "{val_lo}"),
+    distinct_op!("ngr", "%r12", "%r0", "{val_hi}"),
+}
+
+// Use nngrk on z15 or later.
+#[cfg(any(
+    target_feature = "miscellaneous-extensions-3",
+    portable_atomic_target_feature = "miscellaneous-extensions-3",
+))]
+atomic_rmw_cas_3! {
+    atomic_nand, [],
+    "nngrk %r13, %r1, {val_lo}",
+    "nngrk %r12, %r0, {val_hi}",
+}
+#[cfg(not(any(
+    target_feature = "miscellaneous-extensions-3",
+    portable_atomic_target_feature = "miscellaneous-extensions-3",
+)))]
+atomic_rmw_cas_3! {
+    atomic_nand, [],
+    distinct_op!("ngr", "%r13", "%r1", "{val_lo}"),
+    "xihf %r13, 4294967295",
+    "xilf %r13, 4294967295",
+    distinct_op!("ngr", "%r12", "%r0", "{val_hi}"),
+    "xihf %r12, 4294967295",
+    "xilf %r12, 4294967295",
+}
+
+atomic_rmw_cas_3! {
+    atomic_or, [],
+    distinct_op!("ogr", "%r13", "%r1", "{val_lo}"),
+    distinct_op!("ogr", "%r12", "%r0", "{val_hi}"),
+}
+atomic_rmw_cas_3! {
+    atomic_xor, [],
+    distinct_op!("xgr", "%r13", "%r1", "{val_lo}"),
+    distinct_op!("xgr", "%r12", "%r0", "{val_hi}"),
+}
+
+#[cfg(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+))]
+atomic_rmw_cas_3! {
+    atomic_max, [],
+    "clgr %r1, {val_lo}",
+    select_op!("h", "%r12", "%r1", "{val_lo}"),
+    "cgr %r0, {val_hi}",
+    select_op!("h", "%r13", "%r1", "{val_lo}"),
+    "locgre %r13, %r12",
+    select_op!("h", "%r12", "%r0", "{val_hi}"),
+}
+#[cfg(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+))]
+atomic_rmw_cas_3! {
+    atomic_umax, [tmp = out(reg) _,],
+    "clgr %r1, {val_lo}",
+    select_op!("h", "{tmp}", "%r1", "{val_lo}"),
+    "clgr %r0, {val_hi}",
+    select_op!("h", "%r12", "%r0", "{val_hi}"),
+    select_op!("h", "%r13", "%r1", "{val_lo}"),
+    "cgr %r0, {val_hi}",
+    "locgre %r13, {tmp}",
+}
+#[cfg(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+))]
+atomic_rmw_cas_3! {
+    atomic_min, [],
+    "clgr %r1, {val_lo}",
+    select_op!("l", "%r12", "%r1", "{val_lo}"),
+    "cgr %r0, {val_hi}",
+    select_op!("l", "%r13", "%r1", "{val_lo}"),
+    "locgre %r13, %r12",
+    select_op!("l", "%r12", "%r0", "{val_hi}"),
+}
+#[cfg(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+))]
+atomic_rmw_cas_3! {
+    atomic_umin, [tmp = out(reg) _,],
+    "clgr %r1, {val_lo}",
+    select_op!("l", "{tmp}", "%r1", "{val_lo}"),
+    "clgr %r0, {val_hi}",
+    select_op!("l", "%r12", "%r0", "{val_hi}"),
+    select_op!("l", "%r13", "%r1", "{val_lo}"),
+    "cgr %r0, {val_hi}",
+    "locgre %r13, {tmp}",
+}
+// We use atomic_update for atomic min/max on pre-z196 because
+// z10 doesn't seem to have a good way to implement 128-bit min/max.
+// loc{,g}r requires z196 or later.
+// https://godbolt.org/z/j8KG9q5oq
+#[cfg(not(any(
+    target_feature = "load-store-on-cond",
+    portable_atomic_target_feature = "load-store-on-cond",
+)))]
+atomic_rmw_by_atomic_update!(cmp);
+
+atomic_rmw_cas_2! {
+    atomic_not,
+    "lgr %r13, %r1",
+    "xihf %r13, 4294967295",
+    "xilf %r13, 4294967295",
+    "lgr %r12, %r0",
+    "xihf %r12, 4294967295",
+    "xilf %r12, 4294967295",
+}
+atomic_rmw_cas_2! {
+    atomic_neg,
+    "lghi %r13, 0",
+    "slgr %r13, %r1",
+    "lghi %r12, 0",
+    "slbgr %r12, %r0",
+}
+
+#[inline]
+const fn is_lock_free() -> bool {
+    IS_ALWAYS_LOCK_FREE
+}
+const IS_ALWAYS_LOCK_FREE: bool = true;
+
+atomic128!(AtomicI128, i128, atomic_max, atomic_min);
+atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    test_atomic_int!(i128);
+    test_atomic_int!(u128);
+
+    // load/store/swap implementation is not affected by signedness, so it is
+    // enough to test only unsigned types.
+    stress_test!(u128);
+}
diff --git a/vendor/portable-atomic/src/imp/atomic128/x86_64.rs b/vendor/portable-atomic/src/imp/atomic128/x86_64.rs
new file mode 100644
index 0000000..3b9d141
--- /dev/null
+++ b/vendor/portable-atomic/src/imp/atomic128/x86_64.rs
@@ -0,0 +1,854 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// Atomic{I,U}128 implementation on x86_64 using CMPXCHG16B (DWCAS).
+//
+// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
+// this module and use intrinsics.rs instead.
+//
+// Refs:
+// - x86 and amd64 instruction reference https://www.felixcloutier.com/x86
+// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
+//
+// Generated asm:
+// - x86_64 (+cmpxchg16b) https://godbolt.org/z/55n54WeKr
+
+include!("macros.rs");
+
+#[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))]
+#[path = "../fallback/outline_atomics.rs"]
+mod fallback;
+
+#[cfg(not(portable_atomic_no_outline_atomics))]
+#[cfg(not(target_env = "sgx"))]
+#[path = "detect/x86_64.rs"]
+mod detect;
+
+#[cfg(not(portable_atomic_no_asm))]
+use core::arch::asm;
+use core::sync::atomic::Ordering;
+
+use crate::utils::{Pair, U128};
+
+// Asserts that the function is called in the correct context.
+macro_rules! debug_assert_cmpxchg16b {
+    () => {
+        #[cfg(not(any(
+            target_feature = "cmpxchg16b",
+            portable_atomic_target_feature = "cmpxchg16b",
+        )))]
+        {
+            debug_assert!(detect::detect().has_cmpxchg16b());
+        }
+    };
+}
+#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
+#[cfg(target_feature = "sse")]
+macro_rules! debug_assert_vmovdqa_atomic {
+    () => {{
+        debug_assert_cmpxchg16b!();
+        debug_assert!(detect::detect().has_vmovdqa_atomic());
+    }};
+}
+
+#[allow(unused_macros)]
+#[cfg(target_pointer_width = "32")]
+macro_rules! ptr_modifier {
+    () => {
+        ":e"
+    };
+}
+#[allow(unused_macros)]
+#[cfg(target_pointer_width = "64")]
+macro_rules! ptr_modifier {
+    () => {
+        ""
+    };
+}
+
+#[cfg_attr(
+    not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+    target_feature(enable = "cmpxchg16b")
+)]
+#[inline]
+unsafe fn cmpxchg16b(dst: *mut u128, old: u128, new: u128) -> (u128, bool) {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_cmpxchg16b!();
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned (required by CMPXCHG16B), that there are no
+    // concurrent non-atomic operations, and that the CPU supports CMPXCHG16B.
+    //
+    // If the value at `dst` (destination operand) and rdx:rax are equal, the
+    // 128-bit value in rcx:rbx is stored in the `dst`, otherwise the value at
+    // `dst` is loaded to rdx:rax.
+    //
+    // The ZF flag is set if the value at `dst` and rdx:rax are equal,
+    // otherwise it is cleared. Other flags are unaffected.
+    //
+    // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b
+    unsafe {
+        // cmpxchg16b is always SeqCst.
+        let r: u8;
+        let old = U128 { whole: old };
+        let new = U128 { whole: new };
+        let (prev_lo, prev_hi);
+        macro_rules! cmpxchg16b {
+            ($rdi:tt) => {
+                asm!(
+                    // rbx is reserved by LLVM
+                    "xchg {rbx_tmp}, rbx",
+                    concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
+                    "sete r8b",
+                    "mov rbx, {rbx_tmp}", // restore rbx
+                    rbx_tmp = inout(reg) new.pair.lo => _,
+                    in("rcx") new.pair.hi,
+                    inout("rax") old.pair.lo => prev_lo,
+                    inout("rdx") old.pair.hi => prev_hi,
+                    in($rdi) dst,
+                    out("r8b") r,
+                    // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
+                    options(nostack),
+                )
+            };
+        }
+        #[cfg(target_pointer_width = "32")]
+        cmpxchg16b!("edi");
+        #[cfg(target_pointer_width = "64")]
+        cmpxchg16b!("rdi");
+        (U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole, r != 0)
+    }
+}
+
+// VMOVDQA is atomic on Intel and AMD CPUs with AVX.
+// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details.
+//
+// Refs: https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64
+//
+// Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled.
+// https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html
+#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
+#[cfg(target_feature = "sse")]
+#[target_feature(enable = "avx")]
+#[inline]
+unsafe fn atomic_load_vmovdqa(src: *mut u128) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+    debug_assert_vmovdqa_atomic!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    //
+    // atomic load by vmovdqa is always SeqCst.
+    unsafe {
+        let out: core::arch::x86_64::__m128;
+        asm!(
+            concat!("vmovdqa {out}, xmmword ptr [{src", ptr_modifier!(), "}]"),
+            src = in(reg) src,
+            out = out(xmm_reg) out,
+            options(nostack, preserves_flags),
+        );
+        core::mem::transmute(out)
+    }
+}
+#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))]
+#[cfg(target_feature = "sse")]
+#[target_feature(enable = "avx")]
+#[inline]
+unsafe fn atomic_store_vmovdqa(dst: *mut u128, val: u128, order: Ordering) {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_vmovdqa_atomic!();
+
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        let val: core::arch::x86_64::__m128 = core::mem::transmute(val);
+        match order {
+            // Relaxed and Release stores are equivalent.
+            Ordering::Relaxed | Ordering::Release => {
+                asm!(
+                    concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"),
+                    dst = in(reg) dst,
+                    val = in(xmm_reg) val,
+                    options(nostack, preserves_flags),
+                );
+            }
+            Ordering::SeqCst => {
+                asm!(
+                    concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"),
+                    "mfence",
+                    dst = in(reg) dst,
+                    val = in(xmm_reg) val,
+                    options(nostack, preserves_flags),
+                );
+            }
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+
+#[cfg(not(all(
+    any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
+    any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
+)))]
+macro_rules! load_store_detect {
+    (
+        vmovdqa = $vmovdqa:ident
+        cmpxchg16b = $cmpxchg16b:ident
+        fallback = $fallback:ident
+    ) => {{
+        let cpuid = detect::detect();
+        #[cfg(not(any(
+            target_feature = "cmpxchg16b",
+            portable_atomic_target_feature = "cmpxchg16b",
+        )))]
+        {
+            // Check CMPXCHG16B first to prevent mixing atomic and non-atomic access.
+            if cpuid.has_cmpxchg16b() {
+                // We do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled.
+                #[cfg(target_feature = "sse")]
+                {
+                    if cpuid.has_vmovdqa_atomic() {
+                        $vmovdqa
+                    } else {
+                        $cmpxchg16b
+                    }
+                }
+                #[cfg(not(target_feature = "sse"))]
+                {
+                    $cmpxchg16b
+                }
+            } else {
+                fallback::$fallback
+            }
+        }
+        #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
+        {
+            if cpuid.has_vmovdqa_atomic() {
+                $vmovdqa
+            } else {
+                $cmpxchg16b
+            }
+        }
+    }};
+}
+
+#[inline]
+unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 {
+    // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled.
+    // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html
+    // SGX doesn't support CPUID.
+    #[cfg(all(
+        any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
+        any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
+    ))]
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantees that CMPXCHG16B is available at compile-time.
+    unsafe {
+        // cmpxchg16b is always SeqCst.
+        atomic_load_cmpxchg16b(src)
+    }
+    #[cfg(not(all(
+        any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
+        any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
+    )))]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        ifunc!(unsafe fn(src: *mut u128) -> u128 {
+            load_store_detect! {
+                vmovdqa = atomic_load_vmovdqa
+                cmpxchg16b = atomic_load_cmpxchg16b
+                // Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst.
+                fallback = atomic_load_seqcst
+            }
+        })
+    }
+}
+#[cfg_attr(
+    not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+    target_feature(enable = "cmpxchg16b")
+)]
+#[inline]
+unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 {
+    debug_assert!(src as usize % 16 == 0);
+    debug_assert_cmpxchg16b!();
+
+    // SAFETY: the caller must guarantee that `src` is valid for both writes and
+    // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
+    // cfg guarantees that the CPU supports CMPXCHG16B.
+    //
+    // See cmpxchg16b function for more.
+    //
+    // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows
+    // omitting the storing of condition flags and avoid use of xchg to handle rbx.
+    unsafe {
+        // cmpxchg16b is always SeqCst.
+        let (out_lo, out_hi);
+        macro_rules! cmpxchg16b {
+            ($rdi:tt) => {
+                asm!(
+                    // rbx is reserved by LLVM
+                    "mov {rbx_tmp}, rbx",
+                    "xor rbx, rbx", // zeroed rbx
+                    concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
+                    "mov rbx, {rbx_tmp}", // restore rbx
+                    // set old/new args of cmpxchg16b to 0 (rbx is zeroed after saved to rbx_tmp, to avoid xchg)
+                    rbx_tmp = out(reg) _,
+                    in("rcx") 0_u64,
+                    inout("rax") 0_u64 => out_lo,
+                    inout("rdx") 0_u64 => out_hi,
+                    in($rdi) src,
+                    // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
+                    options(nostack),
+                )
+            };
+        }
+        #[cfg(target_pointer_width = "32")]
+        cmpxchg16b!("edi");
+        #[cfg(target_pointer_width = "64")]
+        cmpxchg16b!("rdi");
+        U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
+    }
+}
+
+#[inline]
+unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
+    // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled.
+    // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html
+    // SGX doesn't support CPUID.
+    #[cfg(all(
+        any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
+        any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
+    ))]
+    // SAFETY: the caller must uphold the safety contract.
+    // cfg guarantees that CMPXCHG16B is available at compile-time.
+    unsafe {
+        // cmpxchg16b is always SeqCst.
+        let _ = order;
+        atomic_store_cmpxchg16b(dst, val);
+    }
+    #[cfg(not(all(
+        any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"),
+        any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")),
+    )))]
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        #[cfg(target_feature = "sse")]
+        fn_alias! {
+            #[target_feature(enable = "avx")]
+            unsafe fn(dst: *mut u128, val: u128);
+            // atomic store by vmovdqa has at least release semantics.
+            atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release);
+            atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst);
+        }
+        match order {
+            // Relaxed and Release stores are equivalent in all implementations
+            // that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback).
+            // core::arch's cmpxchg16b will never called here.
+            Ordering::Relaxed | Ordering::Release => {
+                ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                    load_store_detect! {
+                        vmovdqa = atomic_store_vmovdqa_non_seqcst
+                        cmpxchg16b = atomic_store_cmpxchg16b
+                        fallback = atomic_store_non_seqcst
+                    }
+                });
+            }
+            Ordering::SeqCst => {
+                ifunc!(unsafe fn(dst: *mut u128, val: u128) {
+                    load_store_detect! {
+                        vmovdqa = atomic_store_vmovdqa_seqcst
+                        cmpxchg16b = atomic_store_cmpxchg16b
+                        fallback = atomic_store_seqcst
+                    }
+                });
+            }
+            _ => unreachable!("{:?}", order),
+        }
+    }
+}
+#[cfg_attr(
+    not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+    target_feature(enable = "cmpxchg16b")
+)]
+unsafe fn atomic_store_cmpxchg16b(dst: *mut u128, val: u128) {
+    // SAFETY: the caller must uphold the safety contract.
+    unsafe {
+        // cmpxchg16b is always SeqCst.
+        atomic_swap_cmpxchg16b(dst, val, Ordering::SeqCst);
+    }
+}
+
+#[inline]
+unsafe fn atomic_compare_exchange(
+    dst: *mut u128,
+    old: u128,
+    new: u128,
+    _success: Ordering,
+    _failure: Ordering,
+) -> Result<u128, u128> {
+    #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
+    // and cfg guarantees that CMPXCHG16B is available at compile-time.
+    let (prev, ok) = unsafe { cmpxchg16b(dst, old, new) };
+    #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))]
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, and that there are no different kinds of concurrent accesses.
+    let (prev, ok) = unsafe {
+        ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool) {
+            if detect::detect().has_cmpxchg16b() {
+                cmpxchg16b
+            } else {
+                // Use SeqCst because cmpxchg16b is always SeqCst.
+                fallback::atomic_compare_exchange_seqcst
+            }
+        })
+    };
+    if ok {
+        Ok(prev)
+    } else {
+        Err(prev)
+    }
+}
+
+// cmpxchg16b is always strong.
+use atomic_compare_exchange as atomic_compare_exchange_weak;
+
+#[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
+use atomic_swap_cmpxchg16b as atomic_swap;
+#[cfg_attr(
+    not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+    target_feature(enable = "cmpxchg16b")
+)]
+#[inline]
+unsafe fn atomic_swap_cmpxchg16b(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
+    debug_assert!(dst as usize % 16 == 0);
+    debug_assert_cmpxchg16b!();
+
+    // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+    // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
+    // cfg guarantees that the CPU supports CMPXCHG16B.
+    //
+    // See cmpxchg16b function for more.
+    //
+    // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows
+    // omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx.
+    //
+    // Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap.
+    unsafe {
+        // cmpxchg16b is always SeqCst.
+        let val = U128 { whole: val };
+        let (mut prev_lo, mut prev_hi);
+        macro_rules! cmpxchg16b {
+            ($rdi:tt) => {
+                asm!(
+                    // rbx is reserved by LLVM
+                    "xchg {rbx_tmp}, rbx",
+                    // This is not single-copy atomic reads, but this is ok because subsequent
+                    // CAS will check for consistency.
+                    //
+                    // This is based on the code generated for the first load in DW RMWs by LLVM.
+                    //
+                    // Note that the C++20 memory model does not allow mixed-sized atomic access,
+                    // so we must use inline assembly to implement this.
+                    // (i.e., byte-wise atomic based on the standard library's atomic types
+                    // cannot be used here).
+                    concat!("mov rax, qword ptr [", $rdi, "]"),
+                    concat!("mov rdx, qword ptr [", $rdi, " + 8]"),
+                    "2:",
+                        concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
+                        "jne 2b",
+                    "mov rbx, {rbx_tmp}", // restore rbx
+                    rbx_tmp = inout(reg) val.pair.lo => _,
+                    in("rcx") val.pair.hi,
+                    out("rax") prev_lo,
+                    out("rdx") prev_hi,
+                    in($rdi) dst,
+                    // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
+                    options(nostack),
+                )
+            };
+        }
+        #[cfg(target_pointer_width = "32")]
+        cmpxchg16b!("edi");
+        #[cfg(target_pointer_width = "64")]
+        cmpxchg16b!("rdi");
+        U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+    }
+}
+
+/// Atomic RMW by CAS loop (3 arguments)
+/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - rsi/r8 pair: val argument (read-only for `$op`)
+/// - rax/rdx pair: previous value loaded (read-only for `$op`)
+/// - rbx/rcx pair: new value that will be stored
+// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows
+// omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx.
+macro_rules! atomic_rmw_cas_3 {
+    ($name:ident as $reexport_name:ident, $($op:tt)*) => {
+        #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
+        use $name as $reexport_name;
+        #[cfg_attr(
+            not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+            target_feature(enable = "cmpxchg16b")
+        )]
+        #[inline]
+        unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            debug_assert_cmpxchg16b!();
+            // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+            // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
+            // cfg guarantees that the CPU supports CMPXCHG16B.
+            //
+            // See cmpxchg16b function for more.
+            unsafe {
+                // cmpxchg16b is always SeqCst.
+                let val = U128 { whole: val };
+                let (mut prev_lo, mut prev_hi);
+                macro_rules! cmpxchg16b {
+                    ($rdi:tt) => {
+                        asm!(
+                            // rbx is reserved by LLVM
+                            "mov {rbx_tmp}, rbx",
+                            // This is not single-copy atomic reads, but this is ok because subsequent
+                            // CAS will check for consistency.
+                            //
+                            // This is based on the code generated for the first load in DW RMWs by LLVM.
+                            //
+                            // Note that the C++20 memory model does not allow mixed-sized atomic access,
+                            // so we must use inline assembly to implement this.
+                            // (i.e., byte-wise atomic based on the standard library's atomic types
+                            // cannot be used here).
+                            concat!("mov rax, qword ptr [", $rdi, "]"),
+                            concat!("mov rdx, qword ptr [", $rdi, " + 8]"),
+                            "2:",
+                                $($op)*
+                                concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
+                                "jne 2b",
+                            "mov rbx, {rbx_tmp}", // restore rbx
+                            rbx_tmp = out(reg) _,
+                            out("rcx") _,
+                            out("rax") prev_lo,
+                            out("rdx") prev_hi,
+                            in($rdi) dst,
+                            in("rsi") val.pair.lo,
+                            in("r8") val.pair.hi,
+                            // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
+                            options(nostack),
+                        )
+                    };
+                }
+                #[cfg(target_pointer_width = "32")]
+                cmpxchg16b!("edi");
+                #[cfg(target_pointer_width = "64")]
+                cmpxchg16b!("rdi");
+                U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+            }
+        }
+    };
+}
+/// Atomic RMW by CAS loop (2 arguments)
+/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
+///
+/// `$op` can use the following registers:
+/// - rax/rdx pair: previous value loaded (read-only for `$op`)
+/// - rbx/rcx pair: new value that will be stored
+// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows
+// omitting the storing of condition flags and avoid use of xchg to handle rbx.
+macro_rules! atomic_rmw_cas_2 {
+    ($name:ident as $reexport_name:ident, $($op:tt)*) => {
+        #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
+        use $name as $reexport_name;
+        #[cfg_attr(
+            not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")),
+            target_feature(enable = "cmpxchg16b")
+        )]
+        #[inline]
+        unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 {
+            debug_assert!(dst as usize % 16 == 0);
+            debug_assert_cmpxchg16b!();
+            // SAFETY: the caller must guarantee that `dst` is valid for both writes and
+            // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
+            // cfg guarantees that the CPU supports CMPXCHG16B.
+            //
+            // See cmpxchg16b function for more.
+            unsafe {
+                // cmpxchg16b is always SeqCst.
+                let (mut prev_lo, mut prev_hi);
+                macro_rules! cmpxchg16b {
+                    ($rdi:tt) => {
+                        asm!(
+                            // rbx is reserved by LLVM
+                            "mov {rbx_tmp}, rbx",
+                            // This is not single-copy atomic reads, but this is ok because subsequent
+                            // CAS will check for consistency.
+                            //
+                            // This is based on the code generated for the first load in DW RMWs by LLVM.
+                            //
+                            // Note that the C++20 memory model does not allow mixed-sized atomic access,
+                            // so we must use inline assembly to implement this.
+                            // (i.e., byte-wise atomic based on the standard library's atomic types
+                            // cannot be used here).
+                            concat!("mov rax, qword ptr [", $rdi, "]"),
+                            concat!("mov rdx, qword ptr [", $rdi, " + 8]"),
+                            "2:",
+                                $($op)*
+                                concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"),
+                                "jne 2b",
+                            "mov rbx, {rbx_tmp}", // restore rbx
+                            rbx_tmp = out(reg) _,
+                            out("rcx") _,
+                            out("rax") prev_lo,
+                            out("rdx") prev_hi,
+                            in($rdi) dst,
+                            // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag.
+                            options(nostack),
+                        )
+                    };
+                }
+                #[cfg(target_pointer_width = "32")]
+                cmpxchg16b!("edi");
+                #[cfg(target_pointer_width = "64")]
+                cmpxchg16b!("rdi");
+                U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
+            }
+        }
+    };
+}
+
+atomic_rmw_cas_3! {
+    atomic_add_cmpxchg16b as atomic_add,
+    "mov rbx, rax",
+    "add rbx, rsi",
+    "mov rcx, rdx",
+    "adc rcx, r8",
+}
+atomic_rmw_cas_3! {
+    atomic_sub_cmpxchg16b as atomic_sub,
+    "mov rbx, rax",
+    "sub rbx, rsi",
+    "mov rcx, rdx",
+    "sbb rcx, r8",
+}
+atomic_rmw_cas_3! {
+    atomic_and_cmpxchg16b as atomic_and,
+    "mov rbx, rax",
+    "and rbx, rsi",
+    "mov rcx, rdx",
+    "and rcx, r8",
+}
+atomic_rmw_cas_3! {
+    atomic_nand_cmpxchg16b as atomic_nand,
+    "mov rbx, rax",
+    "and rbx, rsi",
+    "not rbx",
+    "mov rcx, rdx",
+    "and rcx, r8",
+    "not rcx",
+}
+atomic_rmw_cas_3! {
+    atomic_or_cmpxchg16b as atomic_or,
+    "mov rbx, rax",
+    "or rbx, rsi",
+    "mov rcx, rdx",
+    "or rcx, r8",
+}
+atomic_rmw_cas_3! {
+    atomic_xor_cmpxchg16b as atomic_xor,
+    "mov rbx, rax",
+    "xor rbx, rsi",
+    "mov rcx, rdx",
+    "xor rcx, r8",
+}
+
+atomic_rmw_cas_2! {
+    atomic_not_cmpxchg16b as atomic_not,
+    "mov rbx, rax",
+    "not rbx",
+    "mov rcx, rdx",
+    "not rcx",
+}
+atomic_rmw_cas_2! {
+    atomic_neg_cmpxchg16b as atomic_neg,
+    "mov rbx, rax",
+    "neg rbx",
+    "mov rcx, 0",
+    "sbb rcx, rdx",
+}
+
+atomic_rmw_cas_3! {
+    atomic_max_cmpxchg16b as atomic_max,
+    "cmp rsi, rax",
+    "mov rcx, r8",
+    "sbb rcx, rdx",
+    "mov rcx, r8",
+    "cmovl rcx, rdx",
+    "mov rbx, rsi",
+    "cmovl rbx, rax",
+}
+atomic_rmw_cas_3! {
+    atomic_umax_cmpxchg16b as atomic_umax,
+    "cmp rsi, rax",
+    "mov rcx, r8",
+    "sbb rcx, rdx",
+    "mov rcx, r8",
+    "cmovb rcx, rdx",
+    "mov rbx, rsi",
+    "cmovb rbx, rax",
+}
+atomic_rmw_cas_3! {
+    atomic_min_cmpxchg16b as atomic_min,
+    "cmp rsi, rax",
+    "mov rcx, r8",
+    "sbb rcx, rdx",
+    "mov rcx, r8",
+    "cmovge rcx, rdx",
+    "mov rbx, rsi",
+    "cmovge rbx, rax",
+}
+atomic_rmw_cas_3! {
+    atomic_umin_cmpxchg16b as atomic_umin,
+    "cmp rsi, rax",
+    "mov rcx, r8",
+    "sbb rcx, rdx",
+    "mov rcx, r8",
+    "cmovae rcx, rdx",
+    "mov rbx, rsi",
+    "cmovae rbx, rax",
+}
+
+macro_rules! atomic_rmw_with_ifunc {
+    (
+        unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?;
+        cmpxchg16b = $cmpxchg16b_fn:ident;
+        fallback = $seqcst_fallback_fn:ident;
+    ) => {
+        #[cfg(not(any(
+            target_feature = "cmpxchg16b",
+            portable_atomic_target_feature = "cmpxchg16b",
+        )))]
+        #[inline]
+        unsafe fn $name($($arg)*, _order: Ordering) $(-> $ret_ty)? {
+            fn_alias! {
+                #[cfg_attr(
+                    not(any(
+                        target_feature = "cmpxchg16b",
+                        portable_atomic_target_feature = "cmpxchg16b",
+                    )),
+                    target_feature(enable = "cmpxchg16b")
+                )]
+                unsafe fn($($arg)*) $(-> $ret_ty)?;
+                // cmpxchg16b is always SeqCst.
+                cmpxchg16b_seqcst_fn = $cmpxchg16b_fn(Ordering::SeqCst);
+            }
+            // SAFETY: the caller must uphold the safety contract.
+            // we only calls cmpxchg16b_fn if cmpxchg16b is available.
+            unsafe {
+                ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? {
+                    if detect::detect().has_cmpxchg16b() {
+                        cmpxchg16b_seqcst_fn
+                    } else {
+                        // Use SeqCst because cmpxchg16b is always SeqCst.
+                        fallback::$seqcst_fallback_fn
+                    }
+                })
+            }
+        }
+    };
+}
+
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_swap_cmpxchg16b;
+    fallback = atomic_swap_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_add_cmpxchg16b;
+    fallback = atomic_add_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_sub_cmpxchg16b;
+    fallback = atomic_sub_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_and_cmpxchg16b;
+    fallback = atomic_and_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_nand_cmpxchg16b;
+    fallback = atomic_nand_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_or_cmpxchg16b;
+    fallback = atomic_or_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_xor_cmpxchg16b;
+    fallback = atomic_xor_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_max_cmpxchg16b;
+    fallback = atomic_max_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_umax_cmpxchg16b;
+    fallback = atomic_umax_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_min_cmpxchg16b;
+    fallback = atomic_min_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128;
+    cmpxchg16b = atomic_umin_cmpxchg16b;
+    fallback = atomic_umin_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_not(dst: *mut u128) -> u128;
+    cmpxchg16b = atomic_not_cmpxchg16b;
+    fallback = atomic_not_seqcst;
+}
+atomic_rmw_with_ifunc! {
+    unsafe fn atomic_neg(dst: *mut u128) -> u128;
+    cmpxchg16b = atomic_neg_cmpxchg16b;
+    fallback = atomic_neg_seqcst;
+}
+
+#[inline]
+fn is_lock_free() -> bool {
+    #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))]
+    {
+        // CMPXCHG16B is available at compile-time.
+        true
+    }
+    #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))]
+    {
+        detect::detect().has_cmpxchg16b()
+    }
+}
+const IS_ALWAYS_LOCK_FREE: bool =
+    cfg!(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"));
+
+atomic128!(AtomicI128, i128, atomic_max, atomic_min);
+atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
+
+#[allow(clippy::undocumented_unsafe_blocks, clippy::wildcard_imports)]
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    test_atomic_int!(i128);
+    test_atomic_int!(u128);
+
+    // load/store/swap implementation is not affected by signedness, so it is
+    // enough to test only unsigned types.
+    stress_test!(u128);
+}