/*! Wrapper routines for `memchr` and friends. These routines efficiently dispatch to the best implementation based on what the CPU supports. */ /// Provides a way to run a memchr-like function while amortizing the cost of /// runtime CPU feature detection. /// /// This works by loading a function pointer from an atomic global. Initially, /// this global is set to a function that does CPU feature detection. For /// example, if AVX2 is enabled, then the AVX2 implementation is used. /// Otherwise, at least on x86_64, the SSE2 implementation is used. (And /// in some niche cases, if SSE2 isn't available, then the architecture /// independent fallback implementation is used.) /// /// After the first call to this function, the atomic global is replaced with /// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then /// will directly call the chosen routine instead of needing to go through the /// CPU feature detection branching again. /// /// This particular macro is specifically written to provide the implementation /// of functions with the following signature: /// /// ```ignore /// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option; /// ``` /// /// Where you can also have `memchr2` and `memchr3`, but with `needle2` and /// `needle3`, respectively. The `start` and `end` parameters correspond to the /// start and end of the haystack, respectively. /// /// We use raw pointers here instead of the more obvious `haystack: &[u8]` so /// that the function is compatible with our lower level iterator logic that /// operates on raw pointers. We use this macro to implement "raw" memchr /// routines with the signature above, and then define memchr routines using /// regular slices on top of them. /// /// Note that we use `#[cfg(target_feature = "sse2")]` below even though /// it shouldn't be strictly necessary because without it, it seems to /// cause the compiler to blow up. I guess it can't handle a function /// pointer being created with a sse target feature? Dunno. See the /// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with /// this. /// /// # Safety /// /// Primarily callers must that `$fnty` is a correct function pointer type and /// not something else. /// /// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a /// routine that returns a valid function pointer when a match is found. That /// is, a pointer that is `>= start` and `< end`. /// /// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers /// correspond to valid pointers. macro_rules! unsafe_ifunc { ( $memchrty:ident, $memchrfind:ident, $fnty:ty, $retty:ty, $hay_start:ident, $hay_end:ident, $($needle:ident),+ ) => {{ #![allow(unused_unsafe)] use core::sync::atomic::{AtomicPtr, Ordering}; type Fn = *mut (); type RealFn = $fnty; static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn); #[cfg(target_feature = "sse2")] #[target_feature(enable = "sse2", enable = "avx2")] unsafe fn find_avx2( $($needle: u8),+, $hay_start: *const u8, $hay_end: *const u8, ) -> $retty { use crate::arch::x86_64::avx2::memchr::$memchrty; $memchrty::new_unchecked($($needle),+) .$memchrfind($hay_start, $hay_end) } #[cfg(target_feature = "sse2")] #[target_feature(enable = "sse2")] unsafe fn find_sse2( $($needle: u8),+, $hay_start: *const u8, $hay_end: *const u8, ) -> $retty { use crate::arch::x86_64::sse2::memchr::$memchrty; $memchrty::new_unchecked($($needle),+) .$memchrfind($hay_start, $hay_end) } unsafe fn find_fallback( $($needle: u8),+, $hay_start: *const u8, $hay_end: *const u8, ) -> $retty { use crate::arch::all::memchr::$memchrty; $memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end) } unsafe fn detect( $($needle: u8),+, $hay_start: *const u8, $hay_end: *const u8, ) -> $retty { let fun = { #[cfg(not(target_feature = "sse2"))] { debug!( "no sse2 feature available, using fallback for {}", stringify!($memchrty), ); find_fallback as RealFn } #[cfg(target_feature = "sse2")] { use crate::arch::x86_64::{sse2, avx2}; if avx2::memchr::$memchrty::is_available() { debug!("chose AVX2 for {}", stringify!($memchrty)); find_avx2 as RealFn } else if sse2::memchr::$memchrty::is_available() { debug!("chose SSE2 for {}", stringify!($memchrty)); find_sse2 as RealFn } else { debug!("chose fallback for {}", stringify!($memchrty)); find_fallback as RealFn } } }; FN.store(fun as Fn, Ordering::Relaxed); // SAFETY: The only thing we need to uphold here is the // `#[target_feature]` requirements. Since we check is_available // above before using the corresponding implementation, we are // guaranteed to only call code that is supported on the current // CPU. fun($($needle),+, $hay_start, $hay_end) } // SAFETY: By virtue of the caller contract, RealFn is a function // pointer, which is always safe to transmute with a *mut (). Also, // since we use $memchrty::is_available, it is guaranteed to be safe // to call $memchrty::$memchrfind. unsafe { let fun = FN.load(Ordering::Relaxed); core::mem::transmute::(fun)( $($needle),+, $hay_start, $hay_end, ) } }}; } // The routines below dispatch to AVX2, SSE2 or a fallback routine based on // what's available in the current environment. The secret sauce here is that // we only check for which one to use approximately once, and then "cache" that // choice into a global function pointer. Subsequent invocations then just call // the appropriate function directly. /// memchr, but using raw pointers to represent the haystack. /// /// # Safety /// /// Pointers must be valid. See `One::find_raw`. #[inline(always)] pub(crate) fn memchr_raw( n1: u8, start: *const u8, end: *const u8, ) -> Option<*const u8> { // SAFETY: We provide a valid function pointer type. unsafe_ifunc!( One, find_raw, unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, Option<*const u8>, start, end, n1 ) } /// memrchr, but using raw pointers to represent the haystack. /// /// # Safety /// /// Pointers must be valid. See `One::rfind_raw`. #[inline(always)] pub(crate) fn memrchr_raw( n1: u8, start: *const u8, end: *const u8, ) -> Option<*const u8> { // SAFETY: We provide a valid function pointer type. unsafe_ifunc!( One, rfind_raw, unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, Option<*const u8>, start, end, n1 ) } /// memchr2, but using raw pointers to represent the haystack. /// /// # Safety /// /// Pointers must be valid. See `Two::find_raw`. #[inline(always)] pub(crate) fn memchr2_raw( n1: u8, n2: u8, start: *const u8, end: *const u8, ) -> Option<*const u8> { // SAFETY: We provide a valid function pointer type. unsafe_ifunc!( Two, find_raw, unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, Option<*const u8>, start, end, n1, n2 ) } /// memrchr2, but using raw pointers to represent the haystack. /// /// # Safety /// /// Pointers must be valid. See `Two::rfind_raw`. #[inline(always)] pub(crate) fn memrchr2_raw( n1: u8, n2: u8, start: *const u8, end: *const u8, ) -> Option<*const u8> { // SAFETY: We provide a valid function pointer type. unsafe_ifunc!( Two, rfind_raw, unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, Option<*const u8>, start, end, n1, n2 ) } /// memchr3, but using raw pointers to represent the haystack. /// /// # Safety /// /// Pointers must be valid. See `Three::find_raw`. #[inline(always)] pub(crate) fn memchr3_raw( n1: u8, n2: u8, n3: u8, start: *const u8, end: *const u8, ) -> Option<*const u8> { // SAFETY: We provide a valid function pointer type. unsafe_ifunc!( Three, find_raw, unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, Option<*const u8>, start, end, n1, n2, n3 ) } /// memrchr3, but using raw pointers to represent the haystack. /// /// # Safety /// /// Pointers must be valid. See `Three::rfind_raw`. #[inline(always)] pub(crate) fn memrchr3_raw( n1: u8, n2: u8, n3: u8, start: *const u8, end: *const u8, ) -> Option<*const u8> { // SAFETY: We provide a valid function pointer type. unsafe_ifunc!( Three, rfind_raw, unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, Option<*const u8>, start, end, n1, n2, n3 ) } /// Count all matching bytes, but using raw pointers to represent the haystack. /// /// # Safety /// /// Pointers must be valid. See `One::count_raw`. #[inline(always)] pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize { // SAFETY: We provide a valid function pointer type. unsafe_ifunc!( One, count_raw, unsafe fn(u8, *const u8, *const u8) -> usize, usize, start, end, n1 ) }