diff options
Diffstat (limited to 'vendor/memchr/src')
46 files changed, 15763 insertions, 0 deletions
diff --git a/vendor/memchr/src/arch/aarch64/memchr.rs b/vendor/memchr/src/arch/aarch64/memchr.rs new file mode 100644 index 0000000..e0053b2 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/memchr.rs @@ -0,0 +1,137 @@ +/*! +Wrapper routines for `memchr` and friends. + +These routines choose the best implementation at compile time. (This is +different from `x86_64` because it is expected that `neon` is almost always +available for `aarch64` targets.) +*/ + +macro_rules! defraw { +    ($ty:ident, $find:ident, $start:ident, $end:ident, $($needles:ident),+) => {{ +        #[cfg(target_feature = "neon")] +        { +            use crate::arch::aarch64::neon::memchr::$ty; + +            debug!("chose neon for {}", stringify!($ty)); +            debug_assert!($ty::is_available()); +            // SAFETY: We know that wasm memchr is always available whenever +            // code is compiled for `aarch64` with the `neon` target feature +            // enabled. +            $ty::new_unchecked($($needles),+).$find($start, $end) +        } +        #[cfg(not(target_feature = "neon"))] +        { +            use crate::arch::all::memchr::$ty; + +            debug!( +                "no neon feature available, using fallback for {}", +                stringify!($ty), +            ); +            $ty::new($($needles),+).$find($start, $end) +        } +    }} +} + +/// memchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr_raw( +    n1: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    defraw!(One, find_raw, start, end, n1) +} + +/// memrchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr_raw( +    n1: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    defraw!(One, rfind_raw, start, end, n1) +} + +/// memchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr2_raw( +    n1: u8, +    n2: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    defraw!(Two, find_raw, start, end, n1, n2) +} + +/// memrchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr2_raw( +    n1: u8, +    n2: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    defraw!(Two, rfind_raw, start, end, n1, n2) +} + +/// memchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr3_raw( +    n1: u8, +    n2: u8, +    n3: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    defraw!(Three, find_raw, start, end, n1, n2, n3) +} + +/// memrchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr3_raw( +    n1: u8, +    n2: u8, +    n3: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    defraw!(Three, rfind_raw, start, end, n1, n2, n3) +} + +/// Count all matching bytes, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::count_raw`. +#[inline(always)] +pub(crate) unsafe fn count_raw( +    n1: u8, +    start: *const u8, +    end: *const u8, +) -> usize { +    defraw!(One, count_raw, start, end, n1) +} diff --git a/vendor/memchr/src/arch/aarch64/mod.rs b/vendor/memchr/src/arch/aarch64/mod.rs new file mode 100644 index 0000000..7b32912 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/mod.rs @@ -0,0 +1,7 @@ +/*! +Vector algorithms for the `aarch64` target. +*/ + +pub mod neon; + +pub(crate) mod memchr; diff --git a/vendor/memchr/src/arch/aarch64/neon/memchr.rs b/vendor/memchr/src/arch/aarch64/neon/memchr.rs new file mode 100644 index 0000000..5fcc762 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/neon/memchr.rs @@ -0,0 +1,1031 @@ +/*! +This module defines 128-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::aarch64::uint8x16_t; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One(generic::One<uint8x16_t>); + +impl One { +    /// Create a new searcher that finds occurrences of the needle byte given. +    /// +    /// This particular searcher is specialized to use neon vector instructions +    /// that typically make it quite fast. +    /// +    /// If neon is unavailable in the current environment, then `None` is +    /// returned. +    #[inline] +    pub fn new(needle: u8) -> Option<One> { +        if One::is_available() { +            // SAFETY: we check that neon is available above. +            unsafe { Some(One::new_unchecked(needle)) } +        } else { +            None +        } +    } + +    /// Create a new finder specific to neon vectors and routines without +    /// checking that neon is available. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that it is safe to execute `neon` instructions +    /// in the current environment. +    /// +    /// Note that it is a common misconception that if one compiles for an +    /// `x86_64` target, then they therefore automatically have access to neon +    /// instructions. While this is almost always the case, it isn't true in +    /// 100% of cases. +    #[target_feature(enable = "neon")] +    #[inline] +    pub unsafe fn new_unchecked(needle: u8) -> One { +        One(generic::One::new(needle)) +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`One::new`] will return +    /// a `Some` value. Similarly, when it is false, it is guaranteed that +    /// `One::new` will return a `None` value. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(target_feature = "neon")] +        { +            true +        } +        #[cfg(not(target_feature = "neon"))] +        { +            false +        } +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Counts all occurrences of this byte in the given haystack. +    #[inline] +    pub fn count(&self, haystack: &[u8]) -> usize { +        // SAFETY: All of our pointers are derived directly from a borrowed +        // slice, which is guaranteed to be valid. +        unsafe { +            let start = haystack.as_ptr(); +            let end = start.add(haystack.len()); +            self.count_raw(start, end) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < uint8x16_t::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::fwd_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +            }); +        } +        // SAFETY: Building a `One` means it's safe to call 'neon' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        self.find_raw_impl(start, end) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < uint8x16_t::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::rev_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +            }); +        } +        // SAFETY: Building a `One` means it's safe to call 'neon' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        self.rfind_raw_impl(start, end) +    } + +    /// Like `count`, but accepts and returns raw pointers. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { +        if start >= end { +            return 0; +        } +        if end.distance(start) < uint8x16_t::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::count_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +            }); +        } +        // SAFETY: Building a `One` means it's safe to call 'neon' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        self.count_raw_impl(start, end) +    } + +    /// Execute a search using neon vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of a neon vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `neon` routines.) +    #[target_feature(enable = "neon")] +    #[inline] +    unsafe fn find_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.find_raw(start, end) +    } + +    /// Execute a search using neon vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of a neon vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `neon` routines.) +    #[target_feature(enable = "neon")] +    #[inline] +    unsafe fn rfind_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.rfind_raw(start, end) +    } + +    /// Execute a count using neon vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::count_raw`], except the distance between `start` and +    /// `end` must be at least the size of a neon vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `neon` routines.) +    #[target_feature(enable = "neon")] +    #[inline] +    unsafe fn count_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> usize { +        self.0.count_raw(start, end) +    } + +    /// Returns an iterator over all occurrences of the needle byte in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { +        OneIter { searcher: self, it: generic::Iter::new(haystack) } +    } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { +    searcher: &'a One, +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn count(self) -> usize { +        self.it.count(|s, e| { +            // SAFETY: We rely on our generic iterator to return valid start +            // and end pointers. +            unsafe { self.searcher.count_raw(s, e) } +        }) +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two(generic::Two<uint8x16_t>); + +impl Two { +    /// Create a new searcher that finds occurrences of the needle bytes given. +    /// +    /// This particular searcher is specialized to use neon vector instructions +    /// that typically make it quite fast. +    /// +    /// If neon is unavailable in the current environment, then `None` is +    /// returned. +    #[inline] +    pub fn new(needle1: u8, needle2: u8) -> Option<Two> { +        if Two::is_available() { +            // SAFETY: we check that neon is available above. +            unsafe { Some(Two::new_unchecked(needle1, needle2)) } +        } else { +            None +        } +    } + +    /// Create a new finder specific to neon vectors and routines without +    /// checking that neon is available. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that it is safe to execute `neon` instructions +    /// in the current environment. +    /// +    /// Note that it is a common misconception that if one compiles for an +    /// `x86_64` target, then they therefore automatically have access to neon +    /// instructions. While this is almost always the case, it isn't true in +    /// 100% of cases. +    #[target_feature(enable = "neon")] +    #[inline] +    pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { +        Two(generic::Two::new(needle1, needle2)) +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`Two::new`] will return +    /// a `Some` value. Similarly, when it is false, it is guaranteed that +    /// `Two::new` will return a `None` value. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(target_feature = "neon")] +        { +            true +        } +        #[cfg(not(target_feature = "neon"))] +        { +            false +        } +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < uint8x16_t::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::fwd_byte_by_byte(start, end, |b| { +                b == self.0.needle1() || b == self.0.needle2() +            }); +        } +        // SAFETY: Building a `Two` means it's safe to call 'neon' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        self.find_raw_impl(start, end) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < uint8x16_t::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::rev_byte_by_byte(start, end, |b| { +                b == self.0.needle1() || b == self.0.needle2() +            }); +        } +        // SAFETY: Building a `Two` means it's safe to call 'neon' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        self.rfind_raw_impl(start, end) +    } + +    /// Execute a search using neon vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Two::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of a neon vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Two`, which can only be constructed +    /// when it is safe to call `neon` routines.) +    #[target_feature(enable = "neon")] +    #[inline] +    unsafe fn find_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.find_raw(start, end) +    } + +    /// Execute a search using neon vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Two::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of a neon vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Two`, which can only be constructed +    /// when it is safe to call `neon` routines.) +    #[target_feature(enable = "neon")] +    #[inline] +    unsafe fn rfind_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.rfind_raw(start, end) +    } + +    /// Returns an iterator over all occurrences of the needle bytes in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { +        TwoIter { searcher: self, it: generic::Iter::new(haystack) } +    } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { +    searcher: &'a Two, +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three(generic::Three<uint8x16_t>); + +impl Three { +    /// Create a new searcher that finds occurrences of the needle bytes given. +    /// +    /// This particular searcher is specialized to use neon vector instructions +    /// that typically make it quite fast. +    /// +    /// If neon is unavailable in the current environment, then `None` is +    /// returned. +    #[inline] +    pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> { +        if Three::is_available() { +            // SAFETY: we check that neon is available above. +            unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } +        } else { +            None +        } +    } + +    /// Create a new finder specific to neon vectors and routines without +    /// checking that neon is available. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that it is safe to execute `neon` instructions +    /// in the current environment. +    /// +    /// Note that it is a common misconception that if one compiles for an +    /// `x86_64` target, then they therefore automatically have access to neon +    /// instructions. While this is almost always the case, it isn't true in +    /// 100% of cases. +    #[target_feature(enable = "neon")] +    #[inline] +    pub unsafe fn new_unchecked( +        needle1: u8, +        needle2: u8, +        needle3: u8, +    ) -> Three { +        Three(generic::Three::new(needle1, needle2, needle3)) +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`Three::new`] will return +    /// a `Some` value. Similarly, when it is false, it is guaranteed that +    /// `Three::new` will return a `None` value. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(target_feature = "neon")] +        { +            true +        } +        #[cfg(not(target_feature = "neon"))] +        { +            false +        } +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < uint8x16_t::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::fwd_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +                    || b == self.0.needle2() +                    || b == self.0.needle3() +            }); +        } +        // SAFETY: Building a `Three` means it's safe to call 'neon' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        self.find_raw_impl(start, end) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < uint8x16_t::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::rev_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +                    || b == self.0.needle2() +                    || b == self.0.needle3() +            }); +        } +        // SAFETY: Building a `Three` means it's safe to call 'neon' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        self.rfind_raw_impl(start, end) +    } + +    /// Execute a search using neon vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Three::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of a neon vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Three`, which can only be constructed +    /// when it is safe to call `neon` routines.) +    #[target_feature(enable = "neon")] +    #[inline] +    unsafe fn find_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.find_raw(start, end) +    } + +    /// Execute a search using neon vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Three::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of a neon vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Three`, which can only be constructed +    /// when it is safe to call `neon` routines.) +    #[target_feature(enable = "neon")] +    #[inline] +    unsafe fn rfind_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.rfind_raw(start, end) +    } + +    /// Returns an iterator over all occurrences of the needle byte in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { +        ThreeIter { searcher: self, it: generic::Iter::new(haystack) } +    } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { +    searcher: &'a Three, +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { +    use super::*; + +    define_memchr_quickcheck!(super); + +    #[test] +    fn forward_one() { +        crate::tests::memchr::Runner::new(1).forward_iter( +            |haystack, needles| { +                Some(One::new(needles[0])?.iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_one() { +        crate::tests::memchr::Runner::new(1).reverse_iter( +            |haystack, needles| { +                Some(One::new(needles[0])?.iter(haystack).rev().collect()) +            }, +        ) +    } + +    #[test] +    fn count_one() { +        crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { +            Some(One::new(needles[0])?.iter(haystack).count()) +        }) +    } + +    #[test] +    fn forward_two() { +        crate::tests::memchr::Runner::new(2).forward_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(Two::new(n1, n2)?.iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_two() { +        crate::tests::memchr::Runner::new(2).reverse_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) +            }, +        ) +    } + +    #[test] +    fn forward_three() { +        crate::tests::memchr::Runner::new(3).forward_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_three() { +        crate::tests::memchr::Runner::new(3).reverse_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) +            }, +        ) +    } +} diff --git a/vendor/memchr/src/arch/aarch64/neon/mod.rs b/vendor/memchr/src/arch/aarch64/neon/mod.rs new file mode 100644 index 0000000..ccf9cf8 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/neon/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `aarch64` target using 128-bit vectors via NEON. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/aarch64/neon/packedpair.rs b/vendor/memchr/src/arch/aarch64/neon/packedpair.rs new file mode 100644 index 0000000..6884882 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/neon/packedpair.rs @@ -0,0 +1,236 @@ +/*! +A 128-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::aarch64::uint8x16_t; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 128-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder(packedpair::Finder<uint8x16_t>); + +/// A "packed pair" finder that uses 128-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +impl Finder { +    /// Create a new pair searcher. The searcher returned can either report +    /// exact matches of `needle` or act as a prefilter and report candidate +    /// positions of `needle`. +    /// +    /// If neon is unavailable in the current environment or if a [`Pair`] +    /// could not be constructed from the needle given, then `None` is +    /// returned. +    #[inline] +    pub fn new(needle: &[u8]) -> Option<Finder> { +        Finder::with_pair(needle, Pair::new(needle)?) +    } + +    /// Create a new "packed pair" finder using the pair of bytes given. +    /// +    /// This constructor permits callers to control precisely which pair of +    /// bytes is used as a predicate. +    /// +    /// If neon is unavailable in the current environment, then `None` is +    /// returned. +    #[inline] +    pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> { +        if Finder::is_available() { +            // SAFETY: we check that sse2 is available above. We are also +            // guaranteed to have needle.len() > 1 because we have a valid +            // Pair. +            unsafe { Some(Finder::with_pair_impl(needle, pair)) } +        } else { +            None +        } +    } + +    /// Create a new `Finder` specific to neon vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as the safety for `packedpair::Finder::new`, and callers must also +    /// ensure that neon is available. +    #[target_feature(enable = "neon")] +    #[inline] +    unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { +        let finder = packedpair::Finder::<uint8x16_t>::new(needle, pair); +        Finder(finder) +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`Finder::with_pair`] will +    /// return a `Some` value. Similarly, when it is false, it is guaranteed +    /// that `Finder::with_pair` will return a `None` value. Notice that this +    /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, +    /// even when `Finder::is_available` is true, it is not guaranteed that a +    /// valid [`Pair`] can be found from the needle given. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(target_feature = "neon")] +        { +            true +        } +        #[cfg(not(target_feature = "neon"))] +        { +            false +        } +    } + +    /// Execute a search using neon vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    #[inline] +    pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { +        // SAFETY: Building a `Finder` means it's safe to call 'neon' routines. +        unsafe { self.find_impl(haystack, needle) } +    } + +    /// Execute a search using neon vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    #[inline] +    pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: Building a `Finder` means it's safe to call 'neon' routines. +        unsafe { self.find_prefilter_impl(haystack) } +    } + +    /// Execute a search using neon vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    /// +    /// # Safety +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Finder`, which can only be constructed +    /// when it is safe to call `neon` routines.) +    #[target_feature(enable = "neon")] +    #[inline] +    unsafe fn find_impl( +        &self, +        haystack: &[u8], +        needle: &[u8], +    ) -> Option<usize> { +        self.0.find(haystack, needle) +    } + +    /// Execute a prefilter search using neon vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    /// +    /// # Safety +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Finder`, which can only be constructed +    /// when it is safe to call `neon` routines.) +    #[target_feature(enable = "neon")] +    #[inline] +    unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> { +        self.0.find_prefilter(haystack) +    } + +    /// Returns the pair of offsets (into the needle) used to check as a +    /// predicate before confirming whether a needle exists at a particular +    /// position. +    #[inline] +    pub fn pair(&self) -> &Pair { +        self.0.pair() +    } + +    /// Returns the minimum haystack length that this `Finder` can search. +    /// +    /// Using a haystack with length smaller than this in a search will result +    /// in a panic. The reason for this restriction is that this finder is +    /// meant to be a low-level component that is part of a larger substring +    /// strategy. In that sense, it avoids trying to handle all cases and +    /// instead only handles the cases that it can handle very well. +    #[inline] +    pub fn min_haystack_len(&self) -> usize { +        self.0.min_haystack_len() +    } +} + +#[cfg(test)] +mod tests { +    use super::*; + +    fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> { +        let f = Finder::new(needle)?; +        if haystack.len() < f.min_haystack_len() { +            return None; +        } +        Some(f.find(haystack, needle)) +    } + +    define_substring_forward_quickcheck!(find); + +    #[test] +    fn forward_substring() { +        crate::tests::substring::Runner::new().fwd(find).run() +    } + +    #[test] +    fn forward_packedpair() { +        fn find( +            haystack: &[u8], +            needle: &[u8], +            index1: u8, +            index2: u8, +        ) -> Option<Option<usize>> { +            let pair = Pair::with_indices(needle, index1, index2)?; +            let f = Finder::with_pair(needle, pair)?; +            if haystack.len() < f.min_haystack_len() { +                return None; +            } +            Some(f.find(haystack, needle)) +        } +        crate::tests::packedpair::Runner::new().fwd(find).run() +    } + +    #[test] +    fn forward_packedpair_prefilter() { +        fn find( +            haystack: &[u8], +            needle: &[u8], +            index1: u8, +            index2: u8, +        ) -> Option<Option<usize>> { +            let pair = Pair::with_indices(needle, index1, index2)?; +            let f = Finder::with_pair(needle, pair)?; +            if haystack.len() < f.min_haystack_len() { +                return None; +            } +            Some(f.find_prefilter(haystack)) +        } +        crate::tests::packedpair::Runner::new().fwd(find).run() +    } +} diff --git a/vendor/memchr/src/arch/all/memchr.rs b/vendor/memchr/src/arch/all/memchr.rs new file mode 100644 index 0000000..435b1be --- /dev/null +++ b/vendor/memchr/src/arch/all/memchr.rs @@ -0,0 +1,996 @@ +/*! +Provides architecture independent implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers +are typically slower than hand-coded vector routines accomplishing the same +task, but are also typically faster than naive scalar code. These routines +effectively work by treating a `usize` as a vector of 8-bit lanes, and thus +achieves some level of data parallelism even without explicit vector support. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use crate::{arch::generic::memchr as generic, ext::Pointer}; + +/// The number of bytes in a single `usize` value. +const USIZE_BYTES: usize = (usize::BITS / 8) as usize; +/// The bits that must be zero for a `*const usize` to be properly aligned. +const USIZE_ALIGN: usize = USIZE_BYTES - 1; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One { +    s1: u8, +    v1: usize, +} + +impl One { +    /// The number of bytes we examine per each iteration of our search loop. +    const LOOP_BYTES: usize = 2 * USIZE_BYTES; + +    /// Create a new searcher that finds occurrences of the byte given. +    #[inline] +    pub fn new(needle: u8) -> One { +        One { s1: needle, v1: splat(needle) } +    } + +    /// A test-only routine so that we can bundle a bunch of quickcheck +    /// properties into a single macro. Basically, this provides a constructor +    /// that makes it identical to most other memchr implementations, which +    /// have fallible constructors. +    #[cfg(test)] +    pub(crate) fn try_new(needle: u8) -> Option<One> { +        Some(One::new(needle)) +    } + +    /// Return the first occurrence of the needle in the given haystack. If no +    /// such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value for a non-empty haystack is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of the needle in the given haystack. If no +    /// such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value for a non-empty haystack is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Counts all occurrences of this byte in the given haystack. +    #[inline] +    pub fn count(&self, haystack: &[u8]) -> usize { +        // SAFETY: All of our pointers are derived directly from a borrowed +        // slice, which is guaranteed to be valid. +        unsafe { +            let start = haystack.as_ptr(); +            let end = start.add(haystack.len()); +            self.count_raw(start, end) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        let confirm = |b| self.confirm(b); +        let len = end.distance(start); +        if len < USIZE_BYTES { +            return generic::fwd_byte_by_byte(start, end, confirm); +        } + +        // The start of the search may not be aligned to `*const usize`, +        // so we do an unaligned load here. +        let chunk = start.cast::<usize>().read_unaligned(); +        if self.has_needle(chunk) { +            return generic::fwd_byte_by_byte(start, end, confirm); +        } + +        // And now we start our search at a guaranteed aligned position. +        // The first iteration of the loop below will overlap with the the +        // unaligned chunk above in cases where the search starts at an +        // unaligned offset, but that's okay as we're only here if that +        // above didn't find a match. +        let mut cur = +            start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN)); +        debug_assert!(cur > start); +        if len <= One::LOOP_BYTES { +            return generic::fwd_byte_by_byte(cur, end, confirm); +        } +        debug_assert!(end.sub(One::LOOP_BYTES) >= start); +        while cur <= end.sub(One::LOOP_BYTES) { +            debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + +            let a = cur.cast::<usize>().read(); +            let b = cur.add(USIZE_BYTES).cast::<usize>().read(); +            if self.has_needle(a) || self.has_needle(b) { +                break; +            } +            cur = cur.add(One::LOOP_BYTES); +        } +        generic::fwd_byte_by_byte(cur, end, confirm) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        let confirm = |b| self.confirm(b); +        let len = end.distance(start); +        if len < USIZE_BYTES { +            return generic::rev_byte_by_byte(start, end, confirm); +        } + +        let chunk = end.sub(USIZE_BYTES).cast::<usize>().read_unaligned(); +        if self.has_needle(chunk) { +            return generic::rev_byte_by_byte(start, end, confirm); +        } + +        let mut cur = end.sub(end.as_usize() & USIZE_ALIGN); +        debug_assert!(start <= cur && cur <= end); +        if len <= One::LOOP_BYTES { +            return generic::rev_byte_by_byte(start, cur, confirm); +        } +        while cur >= start.add(One::LOOP_BYTES) { +            debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + +            let a = cur.sub(2 * USIZE_BYTES).cast::<usize>().read(); +            let b = cur.sub(1 * USIZE_BYTES).cast::<usize>().read(); +            if self.has_needle(a) || self.has_needle(b) { +                break; +            } +            cur = cur.sub(One::LOOP_BYTES); +        } +        generic::rev_byte_by_byte(start, cur, confirm) +    } + +    /// Counts all occurrences of this byte in the given haystack represented +    /// by raw pointers. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `0` will always be returned. +    #[inline] +    pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { +        if start >= end { +            return 0; +        } +        // Sadly I couldn't get the SWAR approach to work here, so we just do +        // one byte at a time for now. PRs to improve this are welcome. +        let mut ptr = start; +        let mut count = 0; +        while ptr < end { +            count += (ptr.read() == self.s1) as usize; +            ptr = ptr.offset(1); +        } +        count +    } + +    /// Returns an iterator over all occurrences of the needle byte in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { +        OneIter { searcher: self, it: generic::Iter::new(haystack) } +    } + +    #[inline(always)] +    fn has_needle(&self, chunk: usize) -> bool { +        has_zero_byte(self.v1 ^ chunk) +    } + +    #[inline(always)] +    fn confirm(&self, haystack_byte: u8) -> bool { +        self.s1 == haystack_byte +    } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { +    /// The underlying memchr searcher. +    searcher: &'a One, +    /// Generic iterator implementation. +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn count(self) -> usize { +        self.it.count(|s, e| { +            // SAFETY: We rely on our generic iterator to return valid start +            // and end pointers. +            unsafe { self.searcher.count_raw(s, e) } +        }) +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two { +    s1: u8, +    s2: u8, +    v1: usize, +    v2: usize, +} + +impl Two { +    /// Create a new searcher that finds occurrences of the two needle bytes +    /// given. +    #[inline] +    pub fn new(needle1: u8, needle2: u8) -> Two { +        Two { +            s1: needle1, +            s2: needle2, +            v1: splat(needle1), +            v2: splat(needle2), +        } +    } + +    /// A test-only routine so that we can bundle a bunch of quickcheck +    /// properties into a single macro. Basically, this provides a constructor +    /// that makes it identical to most other memchr implementations, which +    /// have fallible constructors. +    #[cfg(test)] +    pub(crate) fn try_new(needle1: u8, needle2: u8) -> Option<Two> { +        Some(Two::new(needle1, needle2)) +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value for a non-empty haystack is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value for a non-empty haystack is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        let confirm = |b| self.confirm(b); +        let len = end.distance(start); +        if len < USIZE_BYTES { +            return generic::fwd_byte_by_byte(start, end, confirm); +        } + +        // The start of the search may not be aligned to `*const usize`, +        // so we do an unaligned load here. +        let chunk = start.cast::<usize>().read_unaligned(); +        if self.has_needle(chunk) { +            return generic::fwd_byte_by_byte(start, end, confirm); +        } + +        // And now we start our search at a guaranteed aligned position. +        // The first iteration of the loop below will overlap with the the +        // unaligned chunk above in cases where the search starts at an +        // unaligned offset, but that's okay as we're only here if that +        // above didn't find a match. +        let mut cur = +            start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN)); +        debug_assert!(cur > start); +        debug_assert!(end.sub(USIZE_BYTES) >= start); +        while cur <= end.sub(USIZE_BYTES) { +            debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + +            let chunk = cur.cast::<usize>().read(); +            if self.has_needle(chunk) { +                break; +            } +            cur = cur.add(USIZE_BYTES); +        } +        generic::fwd_byte_by_byte(cur, end, confirm) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        let confirm = |b| self.confirm(b); +        let len = end.distance(start); +        if len < USIZE_BYTES { +            return generic::rev_byte_by_byte(start, end, confirm); +        } + +        let chunk = end.sub(USIZE_BYTES).cast::<usize>().read_unaligned(); +        if self.has_needle(chunk) { +            return generic::rev_byte_by_byte(start, end, confirm); +        } + +        let mut cur = end.sub(end.as_usize() & USIZE_ALIGN); +        debug_assert!(start <= cur && cur <= end); +        while cur >= start.add(USIZE_BYTES) { +            debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + +            let chunk = cur.sub(USIZE_BYTES).cast::<usize>().read(); +            if self.has_needle(chunk) { +                break; +            } +            cur = cur.sub(USIZE_BYTES); +        } +        generic::rev_byte_by_byte(start, cur, confirm) +    } + +    /// Returns an iterator over all occurrences of one of the needle bytes in +    /// the given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { +        TwoIter { searcher: self, it: generic::Iter::new(haystack) } +    } + +    #[inline(always)] +    fn has_needle(&self, chunk: usize) -> bool { +        has_zero_byte(self.v1 ^ chunk) || has_zero_byte(self.v2 ^ chunk) +    } + +    #[inline(always)] +    fn confirm(&self, haystack_byte: u8) -> bool { +        self.s1 == haystack_byte || self.s2 == haystack_byte +    } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { +    /// The underlying memchr searcher. +    searcher: &'a Two, +    /// Generic iterator implementation. +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three { +    s1: u8, +    s2: u8, +    s3: u8, +    v1: usize, +    v2: usize, +    v3: usize, +} + +impl Three { +    /// Create a new searcher that finds occurrences of the three needle bytes +    /// given. +    #[inline] +    pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Three { +        Three { +            s1: needle1, +            s2: needle2, +            s3: needle3, +            v1: splat(needle1), +            v2: splat(needle2), +            v3: splat(needle3), +        } +    } + +    /// A test-only routine so that we can bundle a bunch of quickcheck +    /// properties into a single macro. Basically, this provides a constructor +    /// that makes it identical to most other memchr implementations, which +    /// have fallible constructors. +    #[cfg(test)] +    pub(crate) fn try_new( +        needle1: u8, +        needle2: u8, +        needle3: u8, +    ) -> Option<Three> { +        Some(Three::new(needle1, needle2, needle3)) +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value for a non-empty haystack is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value for a non-empty haystack is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        let confirm = |b| self.confirm(b); +        let len = end.distance(start); +        if len < USIZE_BYTES { +            return generic::fwd_byte_by_byte(start, end, confirm); +        } + +        // The start of the search may not be aligned to `*const usize`, +        // so we do an unaligned load here. +        let chunk = start.cast::<usize>().read_unaligned(); +        if self.has_needle(chunk) { +            return generic::fwd_byte_by_byte(start, end, confirm); +        } + +        // And now we start our search at a guaranteed aligned position. +        // The first iteration of the loop below will overlap with the the +        // unaligned chunk above in cases where the search starts at an +        // unaligned offset, but that's okay as we're only here if that +        // above didn't find a match. +        let mut cur = +            start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN)); +        debug_assert!(cur > start); +        debug_assert!(end.sub(USIZE_BYTES) >= start); +        while cur <= end.sub(USIZE_BYTES) { +            debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + +            let chunk = cur.cast::<usize>().read(); +            if self.has_needle(chunk) { +                break; +            } +            cur = cur.add(USIZE_BYTES); +        } +        generic::fwd_byte_by_byte(cur, end, confirm) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        let confirm = |b| self.confirm(b); +        let len = end.distance(start); +        if len < USIZE_BYTES { +            return generic::rev_byte_by_byte(start, end, confirm); +        } + +        let chunk = end.sub(USIZE_BYTES).cast::<usize>().read_unaligned(); +        if self.has_needle(chunk) { +            return generic::rev_byte_by_byte(start, end, confirm); +        } + +        let mut cur = end.sub(end.as_usize() & USIZE_ALIGN); +        debug_assert!(start <= cur && cur <= end); +        while cur >= start.add(USIZE_BYTES) { +            debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + +            let chunk = cur.sub(USIZE_BYTES).cast::<usize>().read(); +            if self.has_needle(chunk) { +                break; +            } +            cur = cur.sub(USIZE_BYTES); +        } +        generic::rev_byte_by_byte(start, cur, confirm) +    } + +    /// Returns an iterator over all occurrences of one of the needle bytes in +    /// the given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { +        ThreeIter { searcher: self, it: generic::Iter::new(haystack) } +    } + +    #[inline(always)] +    fn has_needle(&self, chunk: usize) -> bool { +        has_zero_byte(self.v1 ^ chunk) +            || has_zero_byte(self.v2 ^ chunk) +            || has_zero_byte(self.v3 ^ chunk) +    } + +    #[inline(always)] +    fn confirm(&self, haystack_byte: u8) -> bool { +        self.s1 == haystack_byte +            || self.s2 == haystack_byte +            || self.s3 == haystack_byte +    } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { +    /// The underlying memchr searcher. +    searcher: &'a Three, +    /// Generic iterator implementation. +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +/// Return `true` if `x` contains any zero byte. +/// +/// That is, this routine treats `x` as a register of 8-bit lanes and returns +/// true when any of those lanes is `0`. +/// +/// From "Matters Computational" by J. Arndt. +#[inline(always)] +fn has_zero_byte(x: usize) -> bool { +    // "The idea is to subtract one from each of the bytes and then look for +    // bytes where the borrow propagated all the way to the most significant +    // bit." +    const LO: usize = splat(0x01); +    const HI: usize = splat(0x80); + +    (x.wrapping_sub(LO) & !x & HI) != 0 +} + +/// Repeat the given byte into a word size number. That is, every 8 bits +/// is equivalent to the given byte. For example, if `b` is `\x4E` or +/// `01001110` in binary, then the returned value on a 32-bit system would be: +/// `01001110_01001110_01001110_01001110`. +#[inline(always)] +const fn splat(b: u8) -> usize { +    // TODO: use `usize::from` once it can be used in const context. +    (b as usize) * (usize::MAX / 255) +} + +#[cfg(test)] +mod tests { +    use super::*; + +    define_memchr_quickcheck!(super, try_new); + +    #[test] +    fn forward_one() { +        crate::tests::memchr::Runner::new(1).forward_iter( +            |haystack, needles| { +                Some(One::new(needles[0]).iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_one() { +        crate::tests::memchr::Runner::new(1).reverse_iter( +            |haystack, needles| { +                Some(One::new(needles[0]).iter(haystack).rev().collect()) +            }, +        ) +    } + +    #[test] +    fn count_one() { +        crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { +            Some(One::new(needles[0]).iter(haystack).count()) +        }) +    } + +    #[test] +    fn forward_two() { +        crate::tests::memchr::Runner::new(2).forward_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(Two::new(n1, n2).iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_two() { +        crate::tests::memchr::Runner::new(2).reverse_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(Two::new(n1, n2).iter(haystack).rev().collect()) +            }, +        ) +    } + +    #[test] +    fn forward_three() { +        crate::tests::memchr::Runner::new(3).forward_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(Three::new(n1, n2, n3).iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_three() { +        crate::tests::memchr::Runner::new(3).reverse_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(Three::new(n1, n2, n3).iter(haystack).rev().collect()) +            }, +        ) +    } + +    // This was found by quickcheck in the course of refactoring this crate +    // after memchr 2.5.0. +    #[test] +    fn regression_double_ended_iterator() { +        let finder = One::new(b'a'); +        let haystack = "a"; +        let mut it = finder.iter(haystack.as_bytes()); +        assert_eq!(Some(0), it.next()); +        assert_eq!(None, it.next_back()); +    } + +    // This regression test was caught by ripgrep's test suite on i686 when +    // upgrading to memchr 2.6. Namely, something about the \x0B bytes here +    // screws with the SWAR counting approach I was using. This regression test +    // prompted me to remove the SWAR counting approach and just replace it +    // with a byte-at-a-time loop. +    #[test] +    fn regression_count_new_lines() { +        let haystack = "01234567\x0b\n\x0b\n\x0b\n\x0b\nx"; +        let count = One::new(b'\n').count(haystack.as_bytes()); +        assert_eq!(4, count); +    } +} diff --git a/vendor/memchr/src/arch/all/mod.rs b/vendor/memchr/src/arch/all/mod.rs new file mode 100644 index 0000000..559cb75 --- /dev/null +++ b/vendor/memchr/src/arch/all/mod.rs @@ -0,0 +1,234 @@ +/*! +Contains architecture independent routines. + +These routines are often used as a "fallback" implementation when the more +specialized architecture dependent routines are unavailable. +*/ + +pub mod memchr; +pub mod packedpair; +pub mod rabinkarp; +#[cfg(feature = "alloc")] +pub mod shiftor; +pub mod twoway; + +/// Returns true if and only if `needle` is a prefix of `haystack`. +/// +/// This uses a latency optimized variant of `memcmp` internally which *might* +/// make this faster for very short strings. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +#[inline(always)] +pub fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { +    needle.len() <= haystack.len() +        && is_equal(&haystack[..needle.len()], needle) +} + +/// Returns true if and only if `needle` is a suffix of `haystack`. +/// +/// This uses a latency optimized variant of `memcmp` internally which *might* +/// make this faster for very short strings. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +#[inline(always)] +pub fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { +    needle.len() <= haystack.len() +        && is_equal(&haystack[haystack.len() - needle.len()..], needle) +} + +/// Compare corresponding bytes in `x` and `y` for equality. +/// +/// That is, this returns true if and only if `x.len() == y.len()` and +/// `x[i] == y[i]` for all `0 <= i < x.len()`. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +/// +/// # Motivation +/// +/// Why not use slice equality instead? Well, slice equality usually results in +/// a call out to the current platform's `libc` which might not be inlineable +/// or have other overhead. This routine isn't guaranteed to be a win, but it +/// might be in some cases. +#[inline(always)] +pub fn is_equal(x: &[u8], y: &[u8]) -> bool { +    if x.len() != y.len() { +        return false; +    } +    // SAFETY: Our pointers are derived directly from borrowed slices which +    // uphold all of our safety guarantees except for length. We account for +    // length with the check above. +    unsafe { is_equal_raw(x.as_ptr(), y.as_ptr(), x.len()) } +} + +/// Compare `n` bytes at the given pointers for equality. +/// +/// This returns true if and only if `*x.add(i) == *y.add(i)` for all +/// `0 <= i < n`. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +/// +/// # Motivation +/// +/// Why not use slice equality instead? Well, slice equality usually results in +/// a call out to the current platform's `libc` which might not be inlineable +/// or have other overhead. This routine isn't guaranteed to be a win, but it +/// might be in some cases. +/// +/// # Safety +/// +/// * Both `x` and `y` must be valid for reads of up to `n` bytes. +/// * Both `x` and `y` must point to an initialized value. +/// * Both `x` and `y` must each point to an allocated object and +/// must either be in bounds or at most one byte past the end of the +/// allocated object. `x` and `y` do not need to point to the same allocated +/// object, but they may. +/// * Both `x` and `y` must be _derived from_ a pointer to their respective +/// allocated objects. +/// * The distance between `x` and `x+n` must not overflow `isize`. Similarly +/// for `y` and `y+n`. +/// * The distance being in bounds must not rely on "wrapping around" the +/// address space. +#[inline(always)] +pub unsafe fn is_equal_raw( +    mut x: *const u8, +    mut y: *const u8, +    mut n: usize, +) -> bool { +    // When we have 4 or more bytes to compare, then proceed in chunks of 4 at +    // a time using unaligned loads. +    // +    // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is +    // that this particular version of memcmp is likely to be called with tiny +    // needles. That means that if we do 8 byte loads, then a higher proportion +    // of memcmp calls will use the slower variant above. With that said, this +    // is a hypothesis and is only loosely supported by benchmarks. There's +    // likely some improvement that could be made here. The main thing here +    // though is to optimize for latency, not throughput. + +    // SAFETY: The caller is responsible for ensuring the pointers we get are +    // valid and readable for at least `n` bytes. We also do unaligned loads, +    // so there's no need to ensure we're aligned. (This is justified by this +    // routine being specifically for short strings.) +    while n >= 4 { +        let vx = x.cast::<u32>().read_unaligned(); +        let vy = y.cast::<u32>().read_unaligned(); +        if vx != vy { +            return false; +        } +        x = x.add(4); +        y = y.add(4); +        n -= 4; +    } +    // If we don't have enough bytes to do 4-byte at a time loads, then +    // do partial loads. Note that I used to have a byte-at-a-time +    // loop here and that turned out to be quite a bit slower for the +    // memmem/pathological/defeat-simple-vector-alphabet benchmark. +    if n >= 2 { +        let vx = x.cast::<u16>().read_unaligned(); +        let vy = y.cast::<u16>().read_unaligned(); +        if vx != vy { +            return false; +        } +        x = x.add(2); +        y = y.add(2); +        n -= 2; +    } +    if n > 0 { +        if x.read() != y.read() { +            return false; +        } +    } +    true +} + +#[cfg(test)] +mod tests { +    use super::*; + +    #[test] +    fn equals_different_lengths() { +        assert!(!is_equal(b"", b"a")); +        assert!(!is_equal(b"a", b"")); +        assert!(!is_equal(b"ab", b"a")); +        assert!(!is_equal(b"a", b"ab")); +    } + +    #[test] +    fn equals_mismatch() { +        let one_mismatch = [ +            (&b"a"[..], &b"x"[..]), +            (&b"ab"[..], &b"ax"[..]), +            (&b"abc"[..], &b"abx"[..]), +            (&b"abcd"[..], &b"abcx"[..]), +            (&b"abcde"[..], &b"abcdx"[..]), +            (&b"abcdef"[..], &b"abcdex"[..]), +            (&b"abcdefg"[..], &b"abcdefx"[..]), +            (&b"abcdefgh"[..], &b"abcdefgx"[..]), +            (&b"abcdefghi"[..], &b"abcdefghx"[..]), +            (&b"abcdefghij"[..], &b"abcdefghix"[..]), +            (&b"abcdefghijk"[..], &b"abcdefghijx"[..]), +            (&b"abcdefghijkl"[..], &b"abcdefghijkx"[..]), +            (&b"abcdefghijklm"[..], &b"abcdefghijklx"[..]), +            (&b"abcdefghijklmn"[..], &b"abcdefghijklmx"[..]), +        ]; +        for (x, y) in one_mismatch { +            assert_eq!(x.len(), y.len(), "lengths should match"); +            assert!(!is_equal(x, y)); +            assert!(!is_equal(y, x)); +        } +    } + +    #[test] +    fn equals_yes() { +        assert!(is_equal(b"", b"")); +        assert!(is_equal(b"a", b"a")); +        assert!(is_equal(b"ab", b"ab")); +        assert!(is_equal(b"abc", b"abc")); +        assert!(is_equal(b"abcd", b"abcd")); +        assert!(is_equal(b"abcde", b"abcde")); +        assert!(is_equal(b"abcdef", b"abcdef")); +        assert!(is_equal(b"abcdefg", b"abcdefg")); +        assert!(is_equal(b"abcdefgh", b"abcdefgh")); +        assert!(is_equal(b"abcdefghi", b"abcdefghi")); +    } + +    #[test] +    fn prefix() { +        assert!(is_prefix(b"", b"")); +        assert!(is_prefix(b"a", b"")); +        assert!(is_prefix(b"ab", b"")); +        assert!(is_prefix(b"foo", b"foo")); +        assert!(is_prefix(b"foobar", b"foo")); + +        assert!(!is_prefix(b"foo", b"fob")); +        assert!(!is_prefix(b"foobar", b"fob")); +    } + +    #[test] +    fn suffix() { +        assert!(is_suffix(b"", b"")); +        assert!(is_suffix(b"a", b"")); +        assert!(is_suffix(b"ab", b"")); +        assert!(is_suffix(b"foo", b"foo")); +        assert!(is_suffix(b"foobar", b"bar")); + +        assert!(!is_suffix(b"foo", b"goo")); +        assert!(!is_suffix(b"foobar", b"gar")); +    } +} diff --git a/vendor/memchr/src/arch/all/packedpair/default_rank.rs b/vendor/memchr/src/arch/all/packedpair/default_rank.rs new file mode 100644 index 0000000..6aa3895 --- /dev/null +++ b/vendor/memchr/src/arch/all/packedpair/default_rank.rs @@ -0,0 +1,258 @@ +pub(crate) const RANK: [u8; 256] = [ +    55,  // '\x00' +    52,  // '\x01' +    51,  // '\x02' +    50,  // '\x03' +    49,  // '\x04' +    48,  // '\x05' +    47,  // '\x06' +    46,  // '\x07' +    45,  // '\x08' +    103, // '\t' +    242, // '\n' +    66,  // '\x0b' +    67,  // '\x0c' +    229, // '\r' +    44,  // '\x0e' +    43,  // '\x0f' +    42,  // '\x10' +    41,  // '\x11' +    40,  // '\x12' +    39,  // '\x13' +    38,  // '\x14' +    37,  // '\x15' +    36,  // '\x16' +    35,  // '\x17' +    34,  // '\x18' +    33,  // '\x19' +    56,  // '\x1a' +    32,  // '\x1b' +    31,  // '\x1c' +    30,  // '\x1d' +    29,  // '\x1e' +    28,  // '\x1f' +    255, // ' ' +    148, // '!' +    164, // '"' +    149, // '#' +    136, // '$' +    160, // '%' +    155, // '&' +    173, // "'" +    221, // '(' +    222, // ')' +    134, // '*' +    122, // '+' +    232, // ',' +    202, // '-' +    215, // '.' +    224, // '/' +    208, // '0' +    220, // '1' +    204, // '2' +    187, // '3' +    183, // '4' +    179, // '5' +    177, // '6' +    168, // '7' +    178, // '8' +    200, // '9' +    226, // ':' +    195, // ';' +    154, // '<' +    184, // '=' +    174, // '>' +    126, // '?' +    120, // '@' +    191, // 'A' +    157, // 'B' +    194, // 'C' +    170, // 'D' +    189, // 'E' +    162, // 'F' +    161, // 'G' +    150, // 'H' +    193, // 'I' +    142, // 'J' +    137, // 'K' +    171, // 'L' +    176, // 'M' +    185, // 'N' +    167, // 'O' +    186, // 'P' +    112, // 'Q' +    175, // 'R' +    192, // 'S' +    188, // 'T' +    156, // 'U' +    140, // 'V' +    143, // 'W' +    123, // 'X' +    133, // 'Y' +    128, // 'Z' +    147, // '[' +    138, // '\\' +    146, // ']' +    114, // '^' +    223, // '_' +    151, // '`' +    249, // 'a' +    216, // 'b' +    238, // 'c' +    236, // 'd' +    253, // 'e' +    227, // 'f' +    218, // 'g' +    230, // 'h' +    247, // 'i' +    135, // 'j' +    180, // 'k' +    241, // 'l' +    233, // 'm' +    246, // 'n' +    244, // 'o' +    231, // 'p' +    139, // 'q' +    245, // 'r' +    243, // 's' +    251, // 't' +    235, // 'u' +    201, // 'v' +    196, // 'w' +    240, // 'x' +    214, // 'y' +    152, // 'z' +    182, // '{' +    205, // '|' +    181, // '}' +    127, // '~' +    27,  // '\x7f' +    212, // '\x80' +    211, // '\x81' +    210, // '\x82' +    213, // '\x83' +    228, // '\x84' +    197, // '\x85' +    169, // '\x86' +    159, // '\x87' +    131, // '\x88' +    172, // '\x89' +    105, // '\x8a' +    80,  // '\x8b' +    98,  // '\x8c' +    96,  // '\x8d' +    97,  // '\x8e' +    81,  // '\x8f' +    207, // '\x90' +    145, // '\x91' +    116, // '\x92' +    115, // '\x93' +    144, // '\x94' +    130, // '\x95' +    153, // '\x96' +    121, // '\x97' +    107, // '\x98' +    132, // '\x99' +    109, // '\x9a' +    110, // '\x9b' +    124, // '\x9c' +    111, // '\x9d' +    82,  // '\x9e' +    108, // '\x9f' +    118, // '\xa0' +    141, // '¡' +    113, // '¢' +    129, // '£' +    119, // '¤' +    125, // '¥' +    165, // '¦' +    117, // '§' +    92,  // '¨' +    106, // '©' +    83,  // 'ª' +    72,  // '«' +    99,  // '¬' +    93,  // '\xad' +    65,  // '®' +    79,  // '¯' +    166, // '°' +    237, // '±' +    163, // '²' +    199, // '³' +    190, // '´' +    225, // 'µ' +    209, // '¶' +    203, // '·' +    198, // '¸' +    217, // '¹' +    219, // 'º' +    206, // '»' +    234, // '¼' +    248, // '½' +    158, // '¾' +    239, // '¿' +    255, // 'À' +    255, // 'Á' +    255, // 'Â' +    255, // 'Ã' +    255, // 'Ä' +    255, // 'Å' +    255, // 'Æ' +    255, // 'Ç' +    255, // 'È' +    255, // 'É' +    255, // 'Ê' +    255, // 'Ë' +    255, // 'Ì' +    255, // 'Í' +    255, // 'Î' +    255, // 'Ï' +    255, // 'Ð' +    255, // 'Ñ' +    255, // 'Ò' +    255, // 'Ó' +    255, // 'Ô' +    255, // 'Õ' +    255, // 'Ö' +    255, // '×' +    255, // 'Ø' +    255, // 'Ù' +    255, // 'Ú' +    255, // 'Û' +    255, // 'Ü' +    255, // 'Ý' +    255, // 'Þ' +    255, // 'ß' +    255, // 'à' +    255, // 'á' +    255, // 'â' +    255, // 'ã' +    255, // 'ä' +    255, // 'å' +    255, // 'æ' +    255, // 'ç' +    255, // 'è' +    255, // 'é' +    255, // 'ê' +    255, // 'ë' +    255, // 'ì' +    255, // 'í' +    255, // 'î' +    255, // 'ï' +    255, // 'ð' +    255, // 'ñ' +    255, // 'ò' +    255, // 'ó' +    255, // 'ô' +    255, // 'õ' +    255, // 'ö' +    255, // '÷' +    255, // 'ø' +    255, // 'ù' +    255, // 'ú' +    255, // 'û' +    255, // 'ü' +    255, // 'ý' +    255, // 'þ' +    255, // 'ÿ' +]; diff --git a/vendor/memchr/src/arch/all/packedpair/mod.rs b/vendor/memchr/src/arch/all/packedpair/mod.rs new file mode 100644 index 0000000..148a985 --- /dev/null +++ b/vendor/memchr/src/arch/all/packedpair/mod.rs @@ -0,0 +1,359 @@ +/*! +Provides an architecture independent implementation of the "packed pair" +algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. Note that +this module provides an architecture independent version that doesn't do as +good of a job keeping the search for candidates inside a SIMD hot path. It +however can be good enough in many circumstances. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use crate::memchr; + +mod default_rank; + +/// An architecture independent "packed pair" finder. +/// +/// This finder picks two bytes that it believes have high predictive power for +/// indicating an overall match of a needle. At search time, it reports offsets +/// where the needle could match based on whether the pair of bytes it chose +/// match. +/// +/// This is architecture independent because it utilizes `memchr` to find the +/// occurrence of one of the bytes in the pair, and then checks whether the +/// second byte matches. If it does, in the case of [`Finder::find_prefilter`], +/// the location at which the needle could match is returned. +/// +/// It is generally preferred to use architecture specific routines for a +/// "packed pair" prefilter, but this can be a useful fallback when the +/// architecture independent routines are unavailable. +#[derive(Clone, Copy, Debug)] +pub struct Finder { +    pair: Pair, +    byte1: u8, +    byte2: u8, +} + +impl Finder { +    /// Create a new prefilter that reports possible locations where the given +    /// needle matches. +    #[inline] +    pub fn new(needle: &[u8]) -> Option<Finder> { +        Finder::with_pair(needle, Pair::new(needle)?) +    } + +    /// Create a new prefilter using the pair given. +    /// +    /// If the prefilter could not be constructed, then `None` is returned. +    /// +    /// This constructor permits callers to control precisely which pair of +    /// bytes is used as a predicate. +    #[inline] +    pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> { +        let byte1 = needle[usize::from(pair.index1())]; +        let byte2 = needle[usize::from(pair.index2())]; +        // Currently this can never fail so we could just return a Finder, +        // but it's conceivable this could change. +        Some(Finder { pair, byte1, byte2 }) +    } + +    /// Run this finder on the given haystack as a prefilter. +    /// +    /// If a candidate match is found, then an offset where the needle *could* +    /// begin in the haystack is returned. +    #[inline] +    pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> { +        let mut i = 0; +        let index1 = usize::from(self.pair.index1()); +        let index2 = usize::from(self.pair.index2()); +        loop { +            // Use a fast vectorized implementation to skip to the next +            // occurrence of the rarest byte (heuristically chosen) in the +            // needle. +            i += memchr(self.byte1, &haystack[i..])?; +            let found = i; +            i += 1; + +            // If we can't align our first byte match with the haystack, then a +            // match is impossible. +            let aligned1 = match found.checked_sub(index1) { +                None => continue, +                Some(aligned1) => aligned1, +            }; + +            // Now align the second byte match with the haystack. A mismatch +            // means that a match is impossible. +            let aligned2 = match aligned1.checked_add(index2) { +                None => continue, +                Some(aligned_index2) => aligned_index2, +            }; +            if haystack.get(aligned2).map_or(true, |&b| b != self.byte2) { +                continue; +            } + +            // We've done what we can. There might be a match here. +            return Some(aligned1); +        } +    } + +    /// Returns the pair of offsets (into the needle) used to check as a +    /// predicate before confirming whether a needle exists at a particular +    /// position. +    #[inline] +    pub fn pair(&self) -> &Pair { +        &self.pair +    } +} + +/// A pair of byte offsets into a needle to use as a predicate. +/// +/// This pair is used as a predicate to quickly filter out positions in a +/// haystack in which a needle cannot match. In some cases, this pair can even +/// be used in vector algorithms such that the vector algorithm only switches +/// over to scalar code once this pair has been found. +/// +/// A pair of offsets can be used in both substring search implementations and +/// in prefilters. The former will report matches of a needle in a haystack +/// where as the latter will only report possible matches of a needle. +/// +/// The offsets are limited each to a maximum of 255 to keep memory usage low. +/// Moreover, it's rarely advantageous to create a predicate using offsets +/// greater than 255 anyway. +/// +/// The only guarantee enforced on the pair of offsets is that they are not +/// equivalent. It is not necessarily the case that `index1 < index2` for +/// example. By convention, `index1` corresponds to the byte in the needle +/// that is believed to be most the predictive. Note also that because of the +/// requirement that the indices be both valid for the needle used to build +/// the pair and not equal, it follows that a pair can only be constructed for +/// needles with length at least 2. +#[derive(Clone, Copy, Debug)] +pub struct Pair { +    index1: u8, +    index2: u8, +} + +impl Pair { +    /// Create a new pair of offsets from the given needle. +    /// +    /// If a pair could not be created (for example, if the needle is too +    /// short), then `None` is returned. +    /// +    /// This chooses the pair in the needle that is believed to be as +    /// predictive of an overall match of the needle as possible. +    #[inline] +    pub fn new(needle: &[u8]) -> Option<Pair> { +        Pair::with_ranker(needle, DefaultFrequencyRank) +    } + +    /// Create a new pair of offsets from the given needle and ranker. +    /// +    /// This permits the caller to choose a background frequency distribution +    /// with which bytes are selected. The idea is to select a pair of bytes +    /// that is believed to strongly predict a match in the haystack. This +    /// usually means selecting bytes that occur rarely in a haystack. +    /// +    /// If a pair could not be created (for example, if the needle is too +    /// short), then `None` is returned. +    #[inline] +    pub fn with_ranker<R: HeuristicFrequencyRank>( +        needle: &[u8], +        ranker: R, +    ) -> Option<Pair> { +        if needle.len() <= 1 { +            return None; +        } +        // Find the rarest two bytes. We make them distinct indices by +        // construction. (The actual byte value may be the same in degenerate +        // cases, but that's OK.) +        let (mut rare1, mut index1) = (needle[0], 0); +        let (mut rare2, mut index2) = (needle[1], 1); +        if ranker.rank(rare2) < ranker.rank(rare1) { +            core::mem::swap(&mut rare1, &mut rare2); +            core::mem::swap(&mut index1, &mut index2); +        } +        let max = usize::from(core::u8::MAX); +        for (i, &b) in needle.iter().enumerate().take(max).skip(2) { +            if ranker.rank(b) < ranker.rank(rare1) { +                rare2 = rare1; +                index2 = index1; +                rare1 = b; +                index1 = u8::try_from(i).unwrap(); +            } else if b != rare1 && ranker.rank(b) < ranker.rank(rare2) { +                rare2 = b; +                index2 = u8::try_from(i).unwrap(); +            } +        } +        // While not strictly required for how a Pair is normally used, we +        // really don't want these to be equivalent. If they were, it would +        // reduce the effectiveness of candidate searching using these rare +        // bytes by increasing the rate of false positives. +        assert_ne!(index1, index2); +        Some(Pair { index1, index2 }) +    } + +    /// Create a new pair using the offsets given for the needle given. +    /// +    /// This bypasses any sort of heuristic process for choosing the offsets +    /// and permits the caller to choose the offsets themselves. +    /// +    /// Indices are limited to valid `u8` values so that a `Pair` uses less +    /// memory. It is not possible to create a `Pair` with offsets bigger than +    /// `u8::MAX`. It's likely that such a thing is not needed, but if it is, +    /// it's suggested to build your own bespoke algorithm because you're +    /// likely working on a very niche case. (File an issue if this suggestion +    /// does not make sense to you.) +    /// +    /// If a pair could not be created (for example, if the needle is too +    /// short), then `None` is returned. +    #[inline] +    pub fn with_indices( +        needle: &[u8], +        index1: u8, +        index2: u8, +    ) -> Option<Pair> { +        // While not strictly required for how a Pair is normally used, we +        // really don't want these to be equivalent. If they were, it would +        // reduce the effectiveness of candidate searching using these rare +        // bytes by increasing the rate of false positives. +        if index1 == index2 { +            return None; +        } +        // Similarly, invalid indices means the Pair is invalid too. +        if usize::from(index1) >= needle.len() { +            return None; +        } +        if usize::from(index2) >= needle.len() { +            return None; +        } +        Some(Pair { index1, index2 }) +    } + +    /// Returns the first offset of the pair. +    #[inline] +    pub fn index1(&self) -> u8 { +        self.index1 +    } + +    /// Returns the second offset of the pair. +    #[inline] +    pub fn index2(&self) -> u8 { +        self.index2 +    } +} + +/// This trait allows the user to customize the heuristic used to determine the +/// relative frequency of a given byte in the dataset being searched. +/// +/// The use of this trait can have a dramatic impact on performance depending +/// on the type of data being searched. The details of why are explained in the +/// docs of [`crate::memmem::Prefilter`]. To summarize, the core algorithm uses +/// a prefilter to quickly identify candidate matches that are later verified +/// more slowly. This prefilter is implemented in terms of trying to find +/// `rare` bytes at specific offsets that will occur less frequently in the +/// dataset. While the concept of a `rare` byte is similar for most datasets, +/// there are some specific datasets (like binary executables) that have +/// dramatically different byte distributions. For these datasets customizing +/// the byte frequency heuristic can have a massive impact on performance, and +/// might even need to be done at runtime. +/// +/// The default implementation of `HeuristicFrequencyRank` reads from the +/// static frequency table defined in `src/memmem/byte_frequencies.rs`. This +/// is optimal for most inputs, so if you are unsure of the impact of using a +/// custom `HeuristicFrequencyRank` you should probably just use the default. +/// +/// # Example +/// +/// ``` +/// use memchr::{ +///     arch::all::packedpair::HeuristicFrequencyRank, +///     memmem::FinderBuilder, +/// }; +/// +/// /// A byte-frequency table that is good for scanning binary executables. +/// struct Binary; +/// +/// impl HeuristicFrequencyRank for Binary { +///     fn rank(&self, byte: u8) -> u8 { +///         const TABLE: [u8; 256] = [ +///             255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17, +///             89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16, +///             68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11, +///             9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8, +///             48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27, +///             4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19, +///             19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24, +///             1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5, +///             51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15, +///             0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, +///             12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0, +///             2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5, +///             46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13, +///             3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2, +///             16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5, +///             8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175, +///         ]; +///         TABLE[byte as usize] +///     } +/// } +/// // Create a new finder with the custom heuristic. +/// let finder = FinderBuilder::new() +///     .build_forward_with_ranker(Binary, b"\x00\x00\xdd\xdd"); +/// // Find needle with custom heuristic. +/// assert!(finder.find(b"\x00\x00\x00\xdd\xdd").is_some()); +/// ``` +pub trait HeuristicFrequencyRank { +    /// Return the heuristic frequency rank of the given byte. A lower rank +    /// means the byte is believed to occur less frequently in the haystack. +    /// +    /// Some uses of this heuristic may treat arbitrary absolute rank values as +    /// significant. For example, an implementation detail in this crate may +    /// determine that heuristic prefilters are inappropriate if every byte in +    /// the needle has a "high" rank. +    fn rank(&self, byte: u8) -> u8; +} + +/// The default byte frequency heuristic that is good for most haystacks. +pub(crate) struct DefaultFrequencyRank; + +impl HeuristicFrequencyRank for DefaultFrequencyRank { +    fn rank(&self, byte: u8) -> u8 { +        self::default_rank::RANK[usize::from(byte)] +    } +} + +/// This permits passing any implementation of `HeuristicFrequencyRank` as a +/// borrowed version of itself. +impl<'a, R> HeuristicFrequencyRank for &'a R +where +    R: HeuristicFrequencyRank, +{ +    fn rank(&self, byte: u8) -> u8 { +        (**self).rank(byte) +    } +} + +#[cfg(test)] +mod tests { +    use super::*; + +    #[test] +    fn forward_packedpair() { +        fn find( +            haystack: &[u8], +            needle: &[u8], +            _index1: u8, +            _index2: u8, +        ) -> Option<Option<usize>> { +            // We ignore the index positions requested since it winds up making +            // this test too slow overall. +            let f = Finder::new(needle)?; +            Some(f.find_prefilter(haystack)) +        } +        crate::tests::packedpair::Runner::new().fwd(find).run() +    } +} diff --git a/vendor/memchr/src/arch/all/rabinkarp.rs b/vendor/memchr/src/arch/all/rabinkarp.rs new file mode 100644 index 0000000..e0bafba --- /dev/null +++ b/vendor/memchr/src/arch/all/rabinkarp.rs @@ -0,0 +1,390 @@ +/*! +An implementation of the [Rabin-Karp substring search algorithm][rabinkarp]. + +Rabin-Karp works by creating a hash of the needle provided and then computing +a rolling hash for each needle sized window in the haystack. When the rolling +hash matches the hash of the needle, a byte-wise comparison is done to check +if a match exists. The worst case time complexity of Rabin-Karp is `O(m * +n)` where `m ~ len(needle)` and `n ~ len(haystack)`. Its worst case space +complexity is constant. + +The main utility of Rabin-Karp is that the searcher can be constructed very +quickly with very little memory. This makes it especially useful when searching +for small needles in small haystacks, as it might finish its search before a +beefier algorithm (like Two-Way) even starts. + +[rabinkarp]: https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm +*/ + +/* +(This was the comment I wrote for this module originally when it was not +exposed. The comment still looks useful, but it's a bit in the weeds, so it's +not public itself.) + +This module implements the classical Rabin-Karp substring search algorithm, +with no extra frills. While its use would seem to break our time complexity +guarantee of O(m+n) (RK's time complexity is O(mn)), we are careful to only +ever use RK on a constant subset of haystacks. The main point here is that +RK has good latency properties for small needles/haystacks. It's very quick +to compute a needle hash and zip through the haystack when compared to +initializing Two-Way, for example. And this is especially useful for cases +where the haystack is just too short for vector instructions to do much good. + +The hashing function used here is the same one recommended by ESMAJ. + +Another choice instead of Rabin-Karp would be Shift-Or. But its latency +isn't quite as good since its preprocessing time is a bit more expensive +(both in practice and in theory). However, perhaps Shift-Or has a place +somewhere else for short patterns. I think the main problem is that it +requires space proportional to the alphabet and the needle. If we, for +example, supported needles up to length 16, then the total table size would be +len(alphabet)*size_of::<u16>()==512 bytes. Which isn't exactly small, and it's +probably bad to put that on the stack. So ideally, we'd throw it on the heap, +but we'd really like to write as much code without using alloc/std as possible. +But maybe it's worth the special casing. It's a TODO to benchmark. + +Wikipedia has a decent explanation, if a bit heavy on the theory: +https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm + +But ESMAJ provides something a bit more concrete: +http://www-igm.univ-mlv.fr/~lecroq/string/node5.html + +Finally, aho-corasick uses Rabin-Karp for multiple pattern match in some cases: +https://github.com/BurntSushi/aho-corasick/blob/3852632f10587db0ff72ef29e88d58bf305a0946/src/packed/rabinkarp.rs +*/ + +use crate::ext::Pointer; + +/// A forward substring searcher using the Rabin-Karp algorithm. +/// +/// Note that, as a lower level API, a `Finder` does not have access to the +/// needle it was constructed with. For this reason, executing a search +/// with a `Finder` requires passing both the needle and the haystack, +/// where the needle is exactly equivalent to the one given to the `Finder` +/// at construction time. This design was chosen so that callers can have +/// more precise control over where and how many times a needle is stored. +/// For example, in cases where Rabin-Karp is just one of several possible +/// substring search algorithms. +#[derive(Clone, Debug)] +pub struct Finder { +    /// The actual hash. +    hash: Hash, +    /// The factor needed to multiply a byte by in order to subtract it from +    /// the hash. It is defined to be 2^(n-1) (using wrapping exponentiation), +    /// where n is the length of the needle. This is how we "remove" a byte +    /// from the hash once the hash window rolls past it. +    hash_2pow: u32, +} + +impl Finder { +    /// Create a new Rabin-Karp forward searcher for the given `needle`. +    /// +    /// The needle may be empty. The empty needle matches at every byte offset. +    /// +    /// Note that callers must pass the same needle to all search calls using +    /// this `Finder`. +    #[inline] +    pub fn new(needle: &[u8]) -> Finder { +        let mut s = Finder { hash: Hash::new(), hash_2pow: 1 }; +        let first_byte = match needle.get(0) { +            None => return s, +            Some(&first_byte) => first_byte, +        }; +        s.hash.add(first_byte); +        for b in needle.iter().copied().skip(1) { +            s.hash.add(b); +            s.hash_2pow = s.hash_2pow.wrapping_shl(1); +        } +        s +    } + +    /// Return the first occurrence of the `needle` in the `haystack` +    /// given. If no such occurrence exists, then `None` is returned. +    /// +    /// The `needle` provided must match the needle given to this finder at +    /// construction time. +    /// +    /// The maximum value this can return is `haystack.len()`, which can only +    /// occur when the needle and haystack both have length zero. Otherwise, +    /// for non-empty haystacks, the maximum value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { +        unsafe { +            let hstart = haystack.as_ptr(); +            let hend = hstart.add(haystack.len()); +            let nstart = needle.as_ptr(); +            let nend = nstart.add(needle.len()); +            let found = self.find_raw(hstart, hend, nstart, nend)?; +            Some(found.distance(hstart)) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `<= end`. The pointer returned is only ever equivalent +    /// to `end` when both the needle and haystack are empty. (That is, the +    /// empty string matches the empty string.) +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// Note that `start` and `end` below refer to both pairs of pointers given +    /// to this routine. That is, the conditions apply to both `hstart`/`hend` +    /// and `nstart`/`nend`. +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// * It must be the case that `start <= end`. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        hstart: *const u8, +        hend: *const u8, +        nstart: *const u8, +        nend: *const u8, +    ) -> Option<*const u8> { +        let hlen = hend.distance(hstart); +        let nlen = nend.distance(nstart); +        if nlen > hlen { +            return None; +        } +        let mut cur = hstart; +        let end = hend.sub(nlen); +        let mut hash = Hash::forward(cur, cur.add(nlen)); +        loop { +            if self.hash == hash && is_equal_raw(cur, nstart, nlen) { +                return Some(cur); +            } +            if cur >= end { +                return None; +            } +            hash.roll(self, cur.read(), cur.add(nlen).read()); +            cur = cur.add(1); +        } +    } +} + +/// A reverse substring searcher using the Rabin-Karp algorithm. +#[derive(Clone, Debug)] +pub struct FinderRev(Finder); + +impl FinderRev { +    /// Create a new Rabin-Karp reverse searcher for the given `needle`. +    #[inline] +    pub fn new(needle: &[u8]) -> FinderRev { +        let mut s = FinderRev(Finder { hash: Hash::new(), hash_2pow: 1 }); +        let last_byte = match needle.last() { +            None => return s, +            Some(&last_byte) => last_byte, +        }; +        s.0.hash.add(last_byte); +        for b in needle.iter().rev().copied().skip(1) { +            s.0.hash.add(b); +            s.0.hash_2pow = s.0.hash_2pow.wrapping_shl(1); +        } +        s +    } + +    /// Return the last occurrence of the `needle` in the `haystack` +    /// given. If no such occurrence exists, then `None` is returned. +    /// +    /// The `needle` provided must match the needle given to this finder at +    /// construction time. +    /// +    /// The maximum value this can return is `haystack.len()`, which can only +    /// occur when the needle and haystack both have length zero. Otherwise, +    /// for non-empty haystacks, the maximum value is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { +        unsafe { +            let hstart = haystack.as_ptr(); +            let hend = hstart.add(haystack.len()); +            let nstart = needle.as_ptr(); +            let nend = nstart.add(needle.len()); +            let found = self.rfind_raw(hstart, hend, nstart, nend)?; +            Some(found.distance(hstart)) +        } +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `<= end`. The pointer returned is only ever equivalent +    /// to `end` when both the needle and haystack are empty. (That is, the +    /// empty string matches the empty string.) +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// Note that `start` and `end` below refer to both pairs of pointers given +    /// to this routine. That is, the conditions apply to both `hstart`/`hend` +    /// and `nstart`/`nend`. +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// * It must be the case that `start <= end`. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        hstart: *const u8, +        hend: *const u8, +        nstart: *const u8, +        nend: *const u8, +    ) -> Option<*const u8> { +        let hlen = hend.distance(hstart); +        let nlen = nend.distance(nstart); +        if nlen > hlen { +            return None; +        } +        let mut cur = hend.sub(nlen); +        let start = hstart; +        let mut hash = Hash::reverse(cur, cur.add(nlen)); +        loop { +            if self.0.hash == hash && is_equal_raw(cur, nstart, nlen) { +                return Some(cur); +            } +            if cur <= start { +                return None; +            } +            cur = cur.sub(1); +            hash.roll(&self.0, cur.add(nlen).read(), cur.read()); +        } +    } +} + +/// Whether RK is believed to be very fast for the given needle/haystack. +#[inline] +pub(crate) fn is_fast(haystack: &[u8], _needle: &[u8]) -> bool { +    haystack.len() < 16 +} + +/// A Rabin-Karp hash. This might represent the hash of a needle, or the hash +/// of a rolling window in the haystack. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +struct Hash(u32); + +impl Hash { +    /// Create a new hash that represents the empty string. +    #[inline(always)] +    fn new() -> Hash { +        Hash(0) +    } + +    /// Create a new hash from the bytes given for use in forward searches. +    /// +    /// # Safety +    /// +    /// The given pointers must be valid to read from within their range. +    #[inline(always)] +    unsafe fn forward(mut start: *const u8, end: *const u8) -> Hash { +        let mut hash = Hash::new(); +        while start < end { +            hash.add(start.read()); +            start = start.add(1); +        } +        hash +    } + +    /// Create a new hash from the bytes given for use in reverse searches. +    /// +    /// # Safety +    /// +    /// The given pointers must be valid to read from within their range. +    #[inline(always)] +    unsafe fn reverse(start: *const u8, mut end: *const u8) -> Hash { +        let mut hash = Hash::new(); +        while start < end { +            end = end.sub(1); +            hash.add(end.read()); +        } +        hash +    } + +    /// Add 'new' and remove 'old' from this hash. The given needle hash should +    /// correspond to the hash computed for the needle being searched for. +    /// +    /// This is meant to be used when the rolling window of the haystack is +    /// advanced. +    #[inline(always)] +    fn roll(&mut self, finder: &Finder, old: u8, new: u8) { +        self.del(finder, old); +        self.add(new); +    } + +    /// Add a byte to this hash. +    #[inline(always)] +    fn add(&mut self, byte: u8) { +        self.0 = self.0.wrapping_shl(1).wrapping_add(u32::from(byte)); +    } + +    /// Remove a byte from this hash. The given needle hash should correspond +    /// to the hash computed for the needle being searched for. +    #[inline(always)] +    fn del(&mut self, finder: &Finder, byte: u8) { +        let factor = finder.hash_2pow; +        self.0 = self.0.wrapping_sub(u32::from(byte).wrapping_mul(factor)); +    } +} + +/// Returns true when `x[i] == y[i]` for all `0 <= i < n`. +/// +/// We forcefully don't inline this to hint at the compiler that it is unlikely +/// to be called. This causes the inner rabinkarp loop above to be a bit +/// tighter and leads to some performance improvement. See the +/// memmem/krate/prebuilt/sliceslice-words/words benchmark. +/// +/// # Safety +/// +/// Same as `crate::arch::all::is_equal_raw`. +#[cold] +#[inline(never)] +unsafe fn is_equal_raw(x: *const u8, y: *const u8, n: usize) -> bool { +    crate::arch::all::is_equal_raw(x, y, n) +} + +#[cfg(test)] +mod tests { +    use super::*; + +    define_substring_forward_quickcheck!(|h, n| Some( +        Finder::new(n).find(h, n) +    )); +    define_substring_reverse_quickcheck!(|h, n| Some( +        FinderRev::new(n).rfind(h, n) +    )); + +    #[test] +    fn forward() { +        crate::tests::substring::Runner::new() +            .fwd(|h, n| Some(Finder::new(n).find(h, n))) +            .run(); +    } + +    #[test] +    fn reverse() { +        crate::tests::substring::Runner::new() +            .rev(|h, n| Some(FinderRev::new(n).rfind(h, n))) +            .run(); +    } +} diff --git a/vendor/memchr/src/arch/all/shiftor.rs b/vendor/memchr/src/arch/all/shiftor.rs new file mode 100644 index 0000000..b690564 --- /dev/null +++ b/vendor/memchr/src/arch/all/shiftor.rs @@ -0,0 +1,89 @@ +/*! +An implementation of the [Shift-Or substring search algorithm][shiftor]. + +[shiftor]: https://en.wikipedia.org/wiki/Bitap_algorithm +*/ + +use alloc::boxed::Box; + +/// The type of our mask. +/// +/// While we don't expose anyway to configure this in the public API, if one +/// really needs less memory usage or support for longer needles, then it is +/// suggested to copy the code from this module and modify it to fit your +/// needs. The code below is written to be correct regardless of whether Mask +/// is a u8, u16, u32, u64 or u128. +type Mask = u16; + +/// A forward substring searcher using the Shift-Or algorithm. +#[derive(Debug)] +pub struct Finder { +    masks: Box<[Mask; 256]>, +    needle_len: usize, +} + +impl Finder { +    const MAX_NEEDLE_LEN: usize = (Mask::BITS - 1) as usize; + +    /// Create a new Shift-Or forward searcher for the given `needle`. +    /// +    /// The needle may be empty. The empty needle matches at every byte offset. +    #[inline] +    pub fn new(needle: &[u8]) -> Option<Finder> { +        let needle_len = needle.len(); +        if needle_len > Finder::MAX_NEEDLE_LEN { +            // A match is found when bit 7 is set in 'result' in the search +            // routine below. So our needle can't be bigger than 7. We could +            // permit bigger needles by using u16, u32 or u64 for our mask +            // entries. But this is all we need for this example. +            return None; +        } +        let mut searcher = Finder { masks: Box::from([!0; 256]), needle_len }; +        for (i, &byte) in needle.iter().enumerate() { +            searcher.masks[usize::from(byte)] &= !(1 << i); +        } +        Some(searcher) +    } + +    /// Return the first occurrence of the needle given to `Finder::new` in +    /// the `haystack` given. If no such occurrence exists, then `None` is +    /// returned. +    /// +    /// Unlike most other substring search implementations in this crate, this +    /// finder does not require passing the needle at search time. A match can +    /// be determined without the needle at all since the required information +    /// is already encoded into this finder at construction time. +    /// +    /// The maximum value this can return is `haystack.len()`, which can only +    /// occur when the needle and haystack both have length zero. Otherwise, +    /// for non-empty haystacks, the maximum value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        if self.needle_len == 0 { +            return Some(0); +        } +        let mut result = !1; +        for (i, &byte) in haystack.iter().enumerate() { +            result |= self.masks[usize::from(byte)]; +            result <<= 1; +            if result & (1 << self.needle_len) == 0 { +                return Some(i + 1 - self.needle_len); +            } +        } +        None +    } +} + +#[cfg(test)] +mod tests { +    use super::*; + +    define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n)?.find(h))); + +    #[test] +    fn forward() { +        crate::tests::substring::Runner::new() +            .fwd(|h, n| Some(Finder::new(n)?.find(h))) +            .run(); +    } +} diff --git a/vendor/memchr/src/arch/all/twoway.rs b/vendor/memchr/src/arch/all/twoway.rs new file mode 100644 index 0000000..0df3b4a --- /dev/null +++ b/vendor/memchr/src/arch/all/twoway.rs @@ -0,0 +1,877 @@ +/*! +An implementation of the [Two-Way substring search algorithm][two-way]. + +[`Finder`] can be built for forward searches, while [`FinderRev`] can be built +for reverse searches. + +Two-Way makes for a nice general purpose substring search algorithm because of +its time and space complexity properties. It also performs well in practice. +Namely, with `m = len(needle)` and `n = len(haystack)`, Two-Way takes `O(m)` +time to create a finder, `O(1)` space and `O(n)` search time. In other words, +the preprocessing step is quick, doesn't require any heap memory and the worst +case search time is guaranteed to be linear in the haystack regardless of the +size of the needle. + +While vector algorithms will usually beat Two-Way handedly, vector algorithms +also usually have pathological or edge cases that are better handled by Two-Way. +Moreover, not all targets support vector algorithms or implementations for them +simply may not exist yet. + +Two-Way can be found in the `memmem` implementations in at least [GNU libc] and +[musl]. + +[two-way]: https://en.wikipedia.org/wiki/Two-way_string-matching_algorithm +[GNU libc]: https://www.gnu.org/software/libc/ +[musl]: https://www.musl-libc.org/ +*/ + +use core::cmp; + +use crate::{ +    arch::all::{is_prefix, is_suffix}, +    memmem::Pre, +}; + +/// A forward substring searcher that uses the Two-Way algorithm. +#[derive(Clone, Copy, Debug)] +pub struct Finder(TwoWay); + +/// A reverse substring searcher that uses the Two-Way algorithm. +#[derive(Clone, Copy, Debug)] +pub struct FinderRev(TwoWay); + +/// An implementation of the TwoWay substring search algorithm. +/// +/// This searcher supports forward and reverse search, although not +/// simultaneously. It runs in `O(n + m)` time and `O(1)` space, where +/// `n ~ len(needle)` and `m ~ len(haystack)`. +/// +/// The implementation here roughly matches that which was developed by +/// Crochemore and Perrin in their 1991 paper "Two-way string-matching." The +/// changes in this implementation are 1) the use of zero-based indices, 2) a +/// heuristic skip table based on the last byte (borrowed from Rust's standard +/// library) and 3) the addition of heuristics for a fast skip loop. For (3), +/// callers can pass any kind of prefilter they want, but usually it's one +/// based on a heuristic that uses an approximate background frequency of bytes +/// to choose rare bytes to quickly look for candidate match positions. Note +/// though that currently, this prefilter functionality is not exposed directly +/// in the public API. (File an issue if you want it and provide a use case +/// please.) +/// +/// The heuristic for fast skipping is automatically shut off if it's +/// detected to be ineffective at search time. Generally, this only occurs in +/// pathological cases. But this is generally necessary in order to preserve +/// a `O(n + m)` time bound. +/// +/// The code below is fairly complex and not obviously correct at all. It's +/// likely necessary to read the Two-Way paper cited above in order to fully +/// grok this code. The essence of it is: +/// +/// 1. Do something to detect a "critical" position in the needle. +/// 2. For the current position in the haystack, look if `needle[critical..]` +/// matches at that position. +/// 3. If so, look if `needle[..critical]` matches. +/// 4. If a mismatch occurs, shift the search by some amount based on the +/// critical position and a pre-computed shift. +/// +/// This type is wrapped in the forward and reverse finders that expose +/// consistent forward or reverse APIs. +#[derive(Clone, Copy, Debug)] +struct TwoWay { +    /// A small bitset used as a quick prefilter (in addition to any prefilter +    /// given by the caller). Namely, a bit `i` is set if and only if `b%64==i` +    /// for any `b == needle[i]`. +    /// +    /// When used as a prefilter, if the last byte at the current candidate +    /// position is NOT in this set, then we can skip that entire candidate +    /// position (the length of the needle). This is essentially the shift +    /// trick found in Boyer-Moore, but only applied to bytes that don't appear +    /// in the needle. +    /// +    /// N.B. This trick was inspired by something similar in std's +    /// implementation of Two-Way. +    byteset: ApproximateByteSet, +    /// A critical position in needle. Specifically, this position corresponds +    /// to beginning of either the minimal or maximal suffix in needle. (N.B. +    /// See SuffixType below for why "minimal" isn't quite the correct word +    /// here.) +    /// +    /// This is the position at which every search begins. Namely, search +    /// starts by scanning text to the right of this position, and only if +    /// there's a match does the text to the left of this position get scanned. +    critical_pos: usize, +    /// The amount we shift by in the Two-Way search algorithm. This +    /// corresponds to the "small period" and "large period" cases. +    shift: Shift, +} + +impl Finder { +    /// Create a searcher that finds occurrences of the given `needle`. +    /// +    /// An empty `needle` results in a match at every position in a haystack, +    /// including at `haystack.len()`. +    #[inline] +    pub fn new(needle: &[u8]) -> Finder { +        let byteset = ApproximateByteSet::new(needle); +        let min_suffix = Suffix::forward(needle, SuffixKind::Minimal); +        let max_suffix = Suffix::forward(needle, SuffixKind::Maximal); +        let (period_lower_bound, critical_pos) = +            if min_suffix.pos > max_suffix.pos { +                (min_suffix.period, min_suffix.pos) +            } else { +                (max_suffix.period, max_suffix.pos) +            }; +        let shift = Shift::forward(needle, period_lower_bound, critical_pos); +        Finder(TwoWay { byteset, critical_pos, shift }) +    } + +    /// Returns the first occurrence of `needle` in the given `haystack`, or +    /// `None` if no such occurrence could be found. +    /// +    /// The `needle` given must be the same as the `needle` provided to +    /// [`Finder::new`]. +    /// +    /// An empty `needle` results in a match at every position in a haystack, +    /// including at `haystack.len()`. +    #[inline] +    pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { +        self.find_with_prefilter(None, haystack, needle) +    } + +    /// This is like [`Finder::find`], but it accepts a prefilter for +    /// accelerating searches. +    /// +    /// Currently this is not exposed in the public API because, at the time +    /// of writing, I didn't want to spend time thinking about how to expose +    /// the prefilter infrastructure (if at all). If you have a compelling use +    /// case for exposing this routine, please create an issue. Do *not* open +    /// a PR that just exposes `Pre` and friends. Exporting this routine will +    /// require API design. +    #[inline(always)] +    pub(crate) fn find_with_prefilter( +        &self, +        pre: Option<Pre<'_>>, +        haystack: &[u8], +        needle: &[u8], +    ) -> Option<usize> { +        match self.0.shift { +            Shift::Small { period } => { +                self.find_small_imp(pre, haystack, needle, period) +            } +            Shift::Large { shift } => { +                self.find_large_imp(pre, haystack, needle, shift) +            } +        } +    } + +    // Each of the two search implementations below can be accelerated by a +    // prefilter, but it is not always enabled. To avoid its overhead when +    // its disabled, we explicitly inline each search implementation based on +    // whether a prefilter will be used or not. The decision on which to use +    // is made in the parent meta searcher. + +    #[inline(always)] +    fn find_small_imp( +        &self, +        mut pre: Option<Pre<'_>>, +        haystack: &[u8], +        needle: &[u8], +        period: usize, +    ) -> Option<usize> { +        let mut pos = 0; +        let mut shift = 0; +        let last_byte_pos = match needle.len().checked_sub(1) { +            None => return Some(pos), +            Some(last_byte) => last_byte, +        }; +        while pos + needle.len() <= haystack.len() { +            let mut i = cmp::max(self.0.critical_pos, shift); +            if let Some(pre) = pre.as_mut() { +                if pre.is_effective() { +                    pos += pre.find(&haystack[pos..])?; +                    shift = 0; +                    i = self.0.critical_pos; +                    if pos + needle.len() > haystack.len() { +                        return None; +                    } +                } +            } +            if !self.0.byteset.contains(haystack[pos + last_byte_pos]) { +                pos += needle.len(); +                shift = 0; +                continue; +            } +            while i < needle.len() && needle[i] == haystack[pos + i] { +                i += 1; +            } +            if i < needle.len() { +                pos += i - self.0.critical_pos + 1; +                shift = 0; +            } else { +                let mut j = self.0.critical_pos; +                while j > shift && needle[j] == haystack[pos + j] { +                    j -= 1; +                } +                if j <= shift && needle[shift] == haystack[pos + shift] { +                    return Some(pos); +                } +                pos += period; +                shift = needle.len() - period; +            } +        } +        None +    } + +    #[inline(always)] +    fn find_large_imp( +        &self, +        mut pre: Option<Pre<'_>>, +        haystack: &[u8], +        needle: &[u8], +        shift: usize, +    ) -> Option<usize> { +        let mut pos = 0; +        let last_byte_pos = match needle.len().checked_sub(1) { +            None => return Some(pos), +            Some(last_byte) => last_byte, +        }; +        'outer: while pos + needle.len() <= haystack.len() { +            if let Some(pre) = pre.as_mut() { +                if pre.is_effective() { +                    pos += pre.find(&haystack[pos..])?; +                    if pos + needle.len() > haystack.len() { +                        return None; +                    } +                } +            } + +            if !self.0.byteset.contains(haystack[pos + last_byte_pos]) { +                pos += needle.len(); +                continue; +            } +            let mut i = self.0.critical_pos; +            while i < needle.len() && needle[i] == haystack[pos + i] { +                i += 1; +            } +            if i < needle.len() { +                pos += i - self.0.critical_pos + 1; +            } else { +                for j in (0..self.0.critical_pos).rev() { +                    if needle[j] != haystack[pos + j] { +                        pos += shift; +                        continue 'outer; +                    } +                } +                return Some(pos); +            } +        } +        None +    } +} + +impl FinderRev { +    /// Create a searcher that finds occurrences of the given `needle`. +    /// +    /// An empty `needle` results in a match at every position in a haystack, +    /// including at `haystack.len()`. +    #[inline] +    pub fn new(needle: &[u8]) -> FinderRev { +        let byteset = ApproximateByteSet::new(needle); +        let min_suffix = Suffix::reverse(needle, SuffixKind::Minimal); +        let max_suffix = Suffix::reverse(needle, SuffixKind::Maximal); +        let (period_lower_bound, critical_pos) = +            if min_suffix.pos < max_suffix.pos { +                (min_suffix.period, min_suffix.pos) +            } else { +                (max_suffix.period, max_suffix.pos) +            }; +        let shift = Shift::reverse(needle, period_lower_bound, critical_pos); +        FinderRev(TwoWay { byteset, critical_pos, shift }) +    } + +    /// Returns the last occurrence of `needle` in the given `haystack`, or +    /// `None` if no such occurrence could be found. +    /// +    /// The `needle` given must be the same as the `needle` provided to +    /// [`FinderRev::new`]. +    /// +    /// An empty `needle` results in a match at every position in a haystack, +    /// including at `haystack.len()`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { +        // For the reverse case, we don't use a prefilter. It's plausible that +        // perhaps we should, but it's a lot of additional code to do it, and +        // it's not clear that it's actually worth it. If you have a really +        // compelling use case for this, please file an issue. +        match self.0.shift { +            Shift::Small { period } => { +                self.rfind_small_imp(haystack, needle, period) +            } +            Shift::Large { shift } => { +                self.rfind_large_imp(haystack, needle, shift) +            } +        } +    } + +    #[inline(always)] +    fn rfind_small_imp( +        &self, +        haystack: &[u8], +        needle: &[u8], +        period: usize, +    ) -> Option<usize> { +        let nlen = needle.len(); +        let mut pos = haystack.len(); +        let mut shift = nlen; +        let first_byte = match needle.get(0) { +            None => return Some(pos), +            Some(&first_byte) => first_byte, +        }; +        while pos >= nlen { +            if !self.0.byteset.contains(haystack[pos - nlen]) { +                pos -= nlen; +                shift = nlen; +                continue; +            } +            let mut i = cmp::min(self.0.critical_pos, shift); +            while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { +                i -= 1; +            } +            if i > 0 || first_byte != haystack[pos - nlen] { +                pos -= self.0.critical_pos - i + 1; +                shift = nlen; +            } else { +                let mut j = self.0.critical_pos; +                while j < shift && needle[j] == haystack[pos - nlen + j] { +                    j += 1; +                } +                if j >= shift { +                    return Some(pos - nlen); +                } +                pos -= period; +                shift = period; +            } +        } +        None +    } + +    #[inline(always)] +    fn rfind_large_imp( +        &self, +        haystack: &[u8], +        needle: &[u8], +        shift: usize, +    ) -> Option<usize> { +        let nlen = needle.len(); +        let mut pos = haystack.len(); +        let first_byte = match needle.get(0) { +            None => return Some(pos), +            Some(&first_byte) => first_byte, +        }; +        while pos >= nlen { +            if !self.0.byteset.contains(haystack[pos - nlen]) { +                pos -= nlen; +                continue; +            } +            let mut i = self.0.critical_pos; +            while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { +                i -= 1; +            } +            if i > 0 || first_byte != haystack[pos - nlen] { +                pos -= self.0.critical_pos - i + 1; +            } else { +                let mut j = self.0.critical_pos; +                while j < nlen && needle[j] == haystack[pos - nlen + j] { +                    j += 1; +                } +                if j == nlen { +                    return Some(pos - nlen); +                } +                pos -= shift; +            } +        } +        None +    } +} + +/// A representation of the amount we're allowed to shift by during Two-Way +/// search. +/// +/// When computing a critical factorization of the needle, we find the position +/// of the critical factorization by finding the needle's maximal (or minimal) +/// suffix, along with the period of that suffix. It turns out that the period +/// of that suffix is a lower bound on the period of the needle itself. +/// +/// This lower bound is equivalent to the actual period of the needle in +/// some cases. To describe that case, we denote the needle as `x` where +/// `x = uv` and `v` is the lexicographic maximal suffix of `v`. The lower +/// bound given here is always the period of `v`, which is `<= period(x)`. The +/// case where `period(v) == period(x)` occurs when `len(u) < (len(x) / 2)` and +/// where `u` is a suffix of `v[0..period(v)]`. +/// +/// This case is important because the search algorithm for when the +/// periods are equivalent is slightly different than the search algorithm +/// for when the periods are not equivalent. In particular, when they aren't +/// equivalent, we know that the period of the needle is no less than half its +/// length. In this case, we shift by an amount less than or equal to the +/// period of the needle (determined by the maximum length of the components +/// of the critical factorization of `x`, i.e., `max(len(u), len(v))`).. +/// +/// The above two cases are represented by the variants below. Each entails +/// a different instantiation of the Two-Way search algorithm. +/// +/// N.B. If we could find a way to compute the exact period in all cases, +/// then we could collapse this case analysis and simplify the algorithm. The +/// Two-Way paper suggests this is possible, but more reading is required to +/// grok why the authors didn't pursue that path. +#[derive(Clone, Copy, Debug)] +enum Shift { +    Small { period: usize }, +    Large { shift: usize }, +} + +impl Shift { +    /// Compute the shift for a given needle in the forward direction. +    /// +    /// This requires a lower bound on the period and a critical position. +    /// These can be computed by extracting both the minimal and maximal +    /// lexicographic suffixes, and choosing the right-most starting position. +    /// The lower bound on the period is then the period of the chosen suffix. +    fn forward( +        needle: &[u8], +        period_lower_bound: usize, +        critical_pos: usize, +    ) -> Shift { +        let large = cmp::max(critical_pos, needle.len() - critical_pos); +        if critical_pos * 2 >= needle.len() { +            return Shift::Large { shift: large }; +        } + +        let (u, v) = needle.split_at(critical_pos); +        if !is_suffix(&v[..period_lower_bound], u) { +            return Shift::Large { shift: large }; +        } +        Shift::Small { period: period_lower_bound } +    } + +    /// Compute the shift for a given needle in the reverse direction. +    /// +    /// This requires a lower bound on the period and a critical position. +    /// These can be computed by extracting both the minimal and maximal +    /// lexicographic suffixes, and choosing the left-most starting position. +    /// The lower bound on the period is then the period of the chosen suffix. +    fn reverse( +        needle: &[u8], +        period_lower_bound: usize, +        critical_pos: usize, +    ) -> Shift { +        let large = cmp::max(critical_pos, needle.len() - critical_pos); +        if (needle.len() - critical_pos) * 2 >= needle.len() { +            return Shift::Large { shift: large }; +        } + +        let (v, u) = needle.split_at(critical_pos); +        if !is_prefix(&v[v.len() - period_lower_bound..], u) { +            return Shift::Large { shift: large }; +        } +        Shift::Small { period: period_lower_bound } +    } +} + +/// A suffix extracted from a needle along with its period. +#[derive(Debug)] +struct Suffix { +    /// The starting position of this suffix. +    /// +    /// If this is a forward suffix, then `&bytes[pos..]` can be used. If this +    /// is a reverse suffix, then `&bytes[..pos]` can be used. That is, for +    /// forward suffixes, this is an inclusive starting position, where as for +    /// reverse suffixes, this is an exclusive ending position. +    pos: usize, +    /// The period of this suffix. +    /// +    /// Note that this is NOT necessarily the period of the string from which +    /// this suffix comes from. (It is always less than or equal to the period +    /// of the original string.) +    period: usize, +} + +impl Suffix { +    fn forward(needle: &[u8], kind: SuffixKind) -> Suffix { +        // suffix represents our maximal (or minimal) suffix, along with +        // its period. +        let mut suffix = Suffix { pos: 0, period: 1 }; +        // The start of a suffix in `needle` that we are considering as a +        // more maximal (or minimal) suffix than what's in `suffix`. +        let mut candidate_start = 1; +        // The current offset of our suffixes that we're comparing. +        // +        // When the characters at this offset are the same, then we mush on +        // to the next position since no decision is possible. When the +        // candidate's character is greater (or lesser) than the corresponding +        // character than our current maximal (or minimal) suffix, then the +        // current suffix is changed over to the candidate and we restart our +        // search. Otherwise, the candidate suffix is no good and we restart +        // our search on the next candidate. +        // +        // The three cases above correspond to the three cases in the loop +        // below. +        let mut offset = 0; + +        while candidate_start + offset < needle.len() { +            let current = needle[suffix.pos + offset]; +            let candidate = needle[candidate_start + offset]; +            match kind.cmp(current, candidate) { +                SuffixOrdering::Accept => { +                    suffix = Suffix { pos: candidate_start, period: 1 }; +                    candidate_start += 1; +                    offset = 0; +                } +                SuffixOrdering::Skip => { +                    candidate_start += offset + 1; +                    offset = 0; +                    suffix.period = candidate_start - suffix.pos; +                } +                SuffixOrdering::Push => { +                    if offset + 1 == suffix.period { +                        candidate_start += suffix.period; +                        offset = 0; +                    } else { +                        offset += 1; +                    } +                } +            } +        } +        suffix +    } + +    fn reverse(needle: &[u8], kind: SuffixKind) -> Suffix { +        // See the comments in `forward` for how this works. +        let mut suffix = Suffix { pos: needle.len(), period: 1 }; +        if needle.len() == 1 { +            return suffix; +        } +        let mut candidate_start = match needle.len().checked_sub(1) { +            None => return suffix, +            Some(candidate_start) => candidate_start, +        }; +        let mut offset = 0; + +        while offset < candidate_start { +            let current = needle[suffix.pos - offset - 1]; +            let candidate = needle[candidate_start - offset - 1]; +            match kind.cmp(current, candidate) { +                SuffixOrdering::Accept => { +                    suffix = Suffix { pos: candidate_start, period: 1 }; +                    candidate_start -= 1; +                    offset = 0; +                } +                SuffixOrdering::Skip => { +                    candidate_start -= offset + 1; +                    offset = 0; +                    suffix.period = suffix.pos - candidate_start; +                } +                SuffixOrdering::Push => { +                    if offset + 1 == suffix.period { +                        candidate_start -= suffix.period; +                        offset = 0; +                    } else { +                        offset += 1; +                    } +                } +            } +        } +        suffix +    } +} + +/// The kind of suffix to extract. +#[derive(Clone, Copy, Debug)] +enum SuffixKind { +    /// Extract the smallest lexicographic suffix from a string. +    /// +    /// Technically, this doesn't actually pick the smallest lexicographic +    /// suffix. e.g., Given the choice between `a` and `aa`, this will choose +    /// the latter over the former, even though `a < aa`. The reasoning for +    /// this isn't clear from the paper, but it still smells like a minimal +    /// suffix. +    Minimal, +    /// Extract the largest lexicographic suffix from a string. +    /// +    /// Unlike `Minimal`, this really does pick the maximum suffix. e.g., Given +    /// the choice between `z` and `zz`, this will choose the latter over the +    /// former. +    Maximal, +} + +/// The result of comparing corresponding bytes between two suffixes. +#[derive(Clone, Copy, Debug)] +enum SuffixOrdering { +    /// This occurs when the given candidate byte indicates that the candidate +    /// suffix is better than the current maximal (or minimal) suffix. That is, +    /// the current candidate suffix should supplant the current maximal (or +    /// minimal) suffix. +    Accept, +    /// This occurs when the given candidate byte excludes the candidate suffix +    /// from being better than the current maximal (or minimal) suffix. That +    /// is, the current candidate suffix should be dropped and the next one +    /// should be considered. +    Skip, +    /// This occurs when no decision to accept or skip the candidate suffix +    /// can be made, e.g., when corresponding bytes are equivalent. In this +    /// case, the next corresponding bytes should be compared. +    Push, +} + +impl SuffixKind { +    /// Returns true if and only if the given candidate byte indicates that +    /// it should replace the current suffix as the maximal (or minimal) +    /// suffix. +    fn cmp(self, current: u8, candidate: u8) -> SuffixOrdering { +        use self::SuffixOrdering::*; + +        match self { +            SuffixKind::Minimal if candidate < current => Accept, +            SuffixKind::Minimal if candidate > current => Skip, +            SuffixKind::Minimal => Push, +            SuffixKind::Maximal if candidate > current => Accept, +            SuffixKind::Maximal if candidate < current => Skip, +            SuffixKind::Maximal => Push, +        } +    } +} + +/// A bitset used to track whether a particular byte exists in a needle or not. +/// +/// Namely, bit 'i' is set if and only if byte%64==i for any byte in the +/// needle. If a particular byte in the haystack is NOT in this set, then one +/// can conclude that it is also not in the needle, and thus, one can advance +/// in the haystack by needle.len() bytes. +#[derive(Clone, Copy, Debug)] +struct ApproximateByteSet(u64); + +impl ApproximateByteSet { +    /// Create a new set from the given needle. +    fn new(needle: &[u8]) -> ApproximateByteSet { +        let mut bits = 0; +        for &b in needle { +            bits |= 1 << (b % 64); +        } +        ApproximateByteSet(bits) +    } + +    /// Return true if and only if the given byte might be in this set. This +    /// may return a false positive, but will never return a false negative. +    #[inline(always)] +    fn contains(&self, byte: u8) -> bool { +        self.0 & (1 << (byte % 64)) != 0 +    } +} + +#[cfg(test)] +mod tests { +    use alloc::vec::Vec; + +    use super::*; + +    /// Convenience wrapper for computing the suffix as a byte string. +    fn get_suffix_forward(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) { +        let s = Suffix::forward(needle, kind); +        (&needle[s.pos..], s.period) +    } + +    /// Convenience wrapper for computing the reverse suffix as a byte string. +    fn get_suffix_reverse(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) { +        let s = Suffix::reverse(needle, kind); +        (&needle[..s.pos], s.period) +    } + +    /// Return all of the non-empty suffixes in the given byte string. +    fn suffixes(bytes: &[u8]) -> Vec<&[u8]> { +        (0..bytes.len()).map(|i| &bytes[i..]).collect() +    } + +    /// Return the lexicographically maximal suffix of the given byte string. +    fn naive_maximal_suffix_forward(needle: &[u8]) -> &[u8] { +        let mut sufs = suffixes(needle); +        sufs.sort(); +        sufs.pop().unwrap() +    } + +    /// Return the lexicographically maximal suffix of the reverse of the given +    /// byte string. +    fn naive_maximal_suffix_reverse(needle: &[u8]) -> Vec<u8> { +        let mut reversed = needle.to_vec(); +        reversed.reverse(); +        let mut got = naive_maximal_suffix_forward(&reversed).to_vec(); +        got.reverse(); +        got +    } + +    define_substring_forward_quickcheck!(|h, n| Some( +        Finder::new(n).find(h, n) +    )); +    define_substring_reverse_quickcheck!(|h, n| Some( +        FinderRev::new(n).rfind(h, n) +    )); + +    #[test] +    fn forward() { +        crate::tests::substring::Runner::new() +            .fwd(|h, n| Some(Finder::new(n).find(h, n))) +            .run(); +    } + +    #[test] +    fn reverse() { +        crate::tests::substring::Runner::new() +            .rev(|h, n| Some(FinderRev::new(n).rfind(h, n))) +            .run(); +    } + +    #[test] +    fn suffix_forward() { +        macro_rules! assert_suffix_min { +            ($given:expr, $expected:expr, $period:expr) => { +                let (got_suffix, got_period) = +                    get_suffix_forward($given.as_bytes(), SuffixKind::Minimal); +                let got_suffix = core::str::from_utf8(got_suffix).unwrap(); +                assert_eq!(($expected, $period), (got_suffix, got_period)); +            }; +        } + +        macro_rules! assert_suffix_max { +            ($given:expr, $expected:expr, $period:expr) => { +                let (got_suffix, got_period) = +                    get_suffix_forward($given.as_bytes(), SuffixKind::Maximal); +                let got_suffix = core::str::from_utf8(got_suffix).unwrap(); +                assert_eq!(($expected, $period), (got_suffix, got_period)); +            }; +        } + +        assert_suffix_min!("a", "a", 1); +        assert_suffix_max!("a", "a", 1); + +        assert_suffix_min!("ab", "ab", 2); +        assert_suffix_max!("ab", "b", 1); + +        assert_suffix_min!("ba", "a", 1); +        assert_suffix_max!("ba", "ba", 2); + +        assert_suffix_min!("abc", "abc", 3); +        assert_suffix_max!("abc", "c", 1); + +        assert_suffix_min!("acb", "acb", 3); +        assert_suffix_max!("acb", "cb", 2); + +        assert_suffix_min!("cba", "a", 1); +        assert_suffix_max!("cba", "cba", 3); + +        assert_suffix_min!("abcabc", "abcabc", 3); +        assert_suffix_max!("abcabc", "cabc", 3); + +        assert_suffix_min!("abcabcabc", "abcabcabc", 3); +        assert_suffix_max!("abcabcabc", "cabcabc", 3); + +        assert_suffix_min!("abczz", "abczz", 5); +        assert_suffix_max!("abczz", "zz", 1); + +        assert_suffix_min!("zzabc", "abc", 3); +        assert_suffix_max!("zzabc", "zzabc", 5); + +        assert_suffix_min!("aaa", "aaa", 1); +        assert_suffix_max!("aaa", "aaa", 1); + +        assert_suffix_min!("foobar", "ar", 2); +        assert_suffix_max!("foobar", "r", 1); +    } + +    #[test] +    fn suffix_reverse() { +        macro_rules! assert_suffix_min { +            ($given:expr, $expected:expr, $period:expr) => { +                let (got_suffix, got_period) = +                    get_suffix_reverse($given.as_bytes(), SuffixKind::Minimal); +                let got_suffix = core::str::from_utf8(got_suffix).unwrap(); +                assert_eq!(($expected, $period), (got_suffix, got_period)); +            }; +        } + +        macro_rules! assert_suffix_max { +            ($given:expr, $expected:expr, $period:expr) => { +                let (got_suffix, got_period) = +                    get_suffix_reverse($given.as_bytes(), SuffixKind::Maximal); +                let got_suffix = core::str::from_utf8(got_suffix).unwrap(); +                assert_eq!(($expected, $period), (got_suffix, got_period)); +            }; +        } + +        assert_suffix_min!("a", "a", 1); +        assert_suffix_max!("a", "a", 1); + +        assert_suffix_min!("ab", "a", 1); +        assert_suffix_max!("ab", "ab", 2); + +        assert_suffix_min!("ba", "ba", 2); +        assert_suffix_max!("ba", "b", 1); + +        assert_suffix_min!("abc", "a", 1); +        assert_suffix_max!("abc", "abc", 3); + +        assert_suffix_min!("acb", "a", 1); +        assert_suffix_max!("acb", "ac", 2); + +        assert_suffix_min!("cba", "cba", 3); +        assert_suffix_max!("cba", "c", 1); + +        assert_suffix_min!("abcabc", "abca", 3); +        assert_suffix_max!("abcabc", "abcabc", 3); + +        assert_suffix_min!("abcabcabc", "abcabca", 3); +        assert_suffix_max!("abcabcabc", "abcabcabc", 3); + +        assert_suffix_min!("abczz", "a", 1); +        assert_suffix_max!("abczz", "abczz", 5); + +        assert_suffix_min!("zzabc", "zza", 3); +        assert_suffix_max!("zzabc", "zz", 1); + +        assert_suffix_min!("aaa", "aaa", 1); +        assert_suffix_max!("aaa", "aaa", 1); +    } + +    #[cfg(not(miri))] +    quickcheck::quickcheck! { +        fn qc_suffix_forward_maximal(bytes: Vec<u8>) -> bool { +            if bytes.is_empty() { +                return true; +            } + +            let (got, _) = get_suffix_forward(&bytes, SuffixKind::Maximal); +            let expected = naive_maximal_suffix_forward(&bytes); +            got == expected +        } + +        fn qc_suffix_reverse_maximal(bytes: Vec<u8>) -> bool { +            if bytes.is_empty() { +                return true; +            } + +            let (got, _) = get_suffix_reverse(&bytes, SuffixKind::Maximal); +            let expected = naive_maximal_suffix_reverse(&bytes); +            expected == got +        } +    } + +    // This is a regression test caught by quickcheck that exercised a bug in +    // the reverse small period handling. The bug was that we were using 'if j +    // == shift' to determine if a match occurred, but the correct guard is 'if +    // j >= shift', which matches the corresponding guard in the forward impl. +    #[test] +    fn regression_rev_small_period() { +        let rfind = |h, n| FinderRev::new(n).rfind(h, n); +        let haystack = "ababaz"; +        let needle = "abab"; +        assert_eq!(Some(0), rfind(haystack.as_bytes(), needle.as_bytes())); +    } +} diff --git a/vendor/memchr/src/arch/generic/memchr.rs b/vendor/memchr/src/arch/generic/memchr.rs new file mode 100644 index 0000000..580b3cc --- /dev/null +++ b/vendor/memchr/src/arch/generic/memchr.rs @@ -0,0 +1,1214 @@ +/*! +Generic crate-internal routines for the `memchr` family of functions. +*/ + +// What follows is a vector algorithm generic over the specific vector +// type to detect the position of one, two or three needles in a haystack. +// From what I know, this is a "classic" algorithm, although I don't +// believe it has been published in any peer reviewed journal. I believe +// it can be found in places like glibc and Go's standard library. It +// appears to be well known and is elaborated on in more detail here: +// https://gms.tf/stdfind-and-memchr-optimizations.html +// +// While the routine below is fairly long and perhaps intimidating, the basic +// idea is actually very simple and can be expressed straight-forwardly in +// pseudo code. The psuedo code below is written for 128 bit vectors, but the +// actual code below works for anything that implements the Vector trait. +// +//     needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1 +//     // Note: shift amount is in bytes +// +//     while i <= haystack.len() - 16: +//       // A 16 byte vector. Each byte in chunk corresponds to a byte in +//       // the haystack. +//       chunk = haystack[i:i+16] +//       // Compare bytes in needle with bytes in chunk. The result is a 16 +//       // byte chunk where each byte is 0xFF if the corresponding bytes +//       // in needle and chunk were equal, or 0x00 otherwise. +//       eqs = cmpeq(needle, chunk) +//       // Return a 32 bit integer where the most significant 16 bits +//       // are always 0 and the lower 16 bits correspond to whether the +//       // most significant bit in the correspond byte in `eqs` is set. +//       // In other words, `mask as u16` has bit i set if and only if +//       // needle[i] == chunk[i]. +//       mask = movemask(eqs) +// +//       // Mask is 0 if there is no match, and non-zero otherwise. +//       if mask != 0: +//         // trailing_zeros tells us the position of the least significant +//         // bit that is set. +//         return i + trailing_zeros(mask) +// +//     // haystack length may not be a multiple of 16, so search the rest. +//     while i < haystack.len(): +//       if haystack[i] == n1: +//         return i +// +//     // No match found. +//     return NULL +// +// In fact, we could loosely translate the above code to Rust line-for-line +// and it would be a pretty fast algorithm. But, we pull out all the stops +// to go as fast as possible: +// +// 1. We use aligned loads. That is, we do some finagling to make sure our +//    primary loop not only proceeds in increments of 16 bytes, but that +//    the address of haystack's pointer that we dereference is aligned to +//    16 bytes. 16 is a magic number here because it is the size of SSE2 +//    128-bit vector. (For the AVX2 algorithm, 32 is the magic number.) +//    Therefore, to get aligned loads, our pointer's address must be evenly +//    divisible by 16. +// 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's +//    kind of like loop unrolling, but we combine the equality comparisons +//    using a vector OR such that we only need to extract a single mask to +//    determine whether a match exists or not. If so, then we do some +//    book-keeping to determine the precise location but otherwise mush on. +// 3. We use our "chunk" comparison routine in as many places as possible, +//    even if it means using unaligned loads. In particular, if haystack +//    starts with an unaligned address, then we do an unaligned load to +//    search the first 16 bytes. We then start our primary loop at the +//    smallest subsequent aligned address, which will actually overlap with +//    previously searched bytes. But we're OK with that. We do a similar +//    dance at the end of our primary loop. Finally, to avoid a +//    byte-at-a-time loop at the end, we do a final 16 byte unaligned load +//    that may overlap with a previous load. This is OK because it converts +//    a loop into a small number of very fast vector instructions. The overlap +//    is OK because we know the place where the overlap occurs does not +//    contain a match. +// +// And that's pretty all there is to it. Note that since the below is +// generic and since it's meant to be inlined into routines with a +// `#[target_feature(enable = "...")]` annotation, we must mark all routines as +// both unsafe and `#[inline(always)]`. +// +// The fact that the code below is generic does somewhat inhibit us. For +// example, I've noticed that introducing an unlineable `#[cold]` function to +// handle the match case in the loop generates tighter assembly, but there is +// no way to do this in the generic code below because the generic code doesn't +// know what `target_feature` annotation to apply to the unlineable function. +// We could make such functions part of the `Vector` trait, but we instead live +// with the slightly sub-optimal codegen for now since it doesn't seem to have +// a noticeable perf difference. + +use crate::{ +    ext::Pointer, +    vector::{MoveMask, Vector}, +}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub(crate) struct One<V> { +    s1: u8, +    v1: V, +} + +impl<V: Vector> One<V> { +    /// The number of bytes we examine per each iteration of our search loop. +    const LOOP_SIZE: usize = 4 * V::BYTES; + +    /// Create a new searcher that finds occurrences of the byte given. +    #[inline(always)] +    pub(crate) unsafe fn new(needle: u8) -> One<V> { +        One { s1: needle, v1: V::splat(needle) } +    } + +    /// Returns the needle given to `One::new`. +    #[inline(always)] +    pub(crate) fn needle1(&self) -> u8 { +        self.s1 +    } + +    /// Return a pointer to the first occurrence of the needle in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// # Safety +    /// +    /// * It must be the case that `start < end` and that the distance between +    /// them is at least equal to `V::BYTES`. That is, it must always be valid +    /// to do at least an unaligned load of `V` at `start`. +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    #[inline(always)] +    pub(crate) unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        // If we want to support vectors bigger than 256 bits, we probably +        // need to move up to using a u64 for the masks used below. Currently +        // they are 32 bits, which means we're SOL for vectors that need masks +        // bigger than 32 bits. Overall unclear until there's a use case. +        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + +        let topos = V::Mask::first_offset; +        let len = end.distance(start); +        debug_assert!( +            len >= V::BYTES, +            "haystack has length {}, but must be at least {}", +            len, +            V::BYTES +        ); + +        // Search a possibly unaligned chunk at `start`. This covers any part +        // of the haystack prior to where aligned loads can start. +        if let Some(cur) = self.search_chunk(start, topos) { +            return Some(cur); +        } +        // Set `cur` to the first V-aligned pointer greater than `start`. +        let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); +        debug_assert!(cur > start && end.sub(V::BYTES) >= start); +        if len >= Self::LOOP_SIZE { +            while cur <= end.sub(Self::LOOP_SIZE) { +                debug_assert_eq!(0, cur.as_usize() % V::BYTES); + +                let a = V::load_aligned(cur); +                let b = V::load_aligned(cur.add(1 * V::BYTES)); +                let c = V::load_aligned(cur.add(2 * V::BYTES)); +                let d = V::load_aligned(cur.add(3 * V::BYTES)); +                let eqa = self.v1.cmpeq(a); +                let eqb = self.v1.cmpeq(b); +                let eqc = self.v1.cmpeq(c); +                let eqd = self.v1.cmpeq(d); +                let or1 = eqa.or(eqb); +                let or2 = eqc.or(eqd); +                let or3 = or1.or(or2); +                if or3.movemask_will_have_non_zero() { +                    let mask = eqa.movemask(); +                    if mask.has_non_zero() { +                        return Some(cur.add(topos(mask))); +                    } + +                    let mask = eqb.movemask(); +                    if mask.has_non_zero() { +                        return Some(cur.add(1 * V::BYTES).add(topos(mask))); +                    } + +                    let mask = eqc.movemask(); +                    if mask.has_non_zero() { +                        return Some(cur.add(2 * V::BYTES).add(topos(mask))); +                    } + +                    let mask = eqd.movemask(); +                    debug_assert!(mask.has_non_zero()); +                    return Some(cur.add(3 * V::BYTES).add(topos(mask))); +                } +                cur = cur.add(Self::LOOP_SIZE); +            } +        } +        // Handle any leftovers after the aligned loop above. We use unaligned +        // loads here, but I believe we are guaranteed that they are aligned +        // since `cur` is aligned. +        while cur <= end.sub(V::BYTES) { +            debug_assert!(end.distance(cur) >= V::BYTES); +            if let Some(cur) = self.search_chunk(cur, topos) { +                return Some(cur); +            } +            cur = cur.add(V::BYTES); +        } +        // Finally handle any remaining bytes less than the size of V. In this +        // case, our pointer may indeed be unaligned and the load may overlap +        // with the previous one. But that's okay since we know the previous +        // load didn't lead to a match (otherwise we wouldn't be here). +        if cur < end { +            debug_assert!(end.distance(cur) < V::BYTES); +            cur = cur.sub(V::BYTES - end.distance(cur)); +            debug_assert_eq!(end.distance(cur), V::BYTES); +            return self.search_chunk(cur, topos); +        } +        None +    } + +    /// Return a pointer to the last occurrence of the needle in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// # Safety +    /// +    /// * It must be the case that `start < end` and that the distance between +    /// them is at least equal to `V::BYTES`. That is, it must always be valid +    /// to do at least an unaligned load of `V` at `start`. +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    #[inline(always)] +    pub(crate) unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        // If we want to support vectors bigger than 256 bits, we probably +        // need to move up to using a u64 for the masks used below. Currently +        // they are 32 bits, which means we're SOL for vectors that need masks +        // bigger than 32 bits. Overall unclear until there's a use case. +        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + +        let topos = V::Mask::last_offset; +        let len = end.distance(start); +        debug_assert!( +            len >= V::BYTES, +            "haystack has length {}, but must be at least {}", +            len, +            V::BYTES +        ); + +        if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { +            return Some(cur); +        } +        let mut cur = end.sub(end.as_usize() & V::ALIGN); +        debug_assert!(start <= cur && cur <= end); +        if len >= Self::LOOP_SIZE { +            while cur >= start.add(Self::LOOP_SIZE) { +                debug_assert_eq!(0, cur.as_usize() % V::BYTES); + +                cur = cur.sub(Self::LOOP_SIZE); +                let a = V::load_aligned(cur); +                let b = V::load_aligned(cur.add(1 * V::BYTES)); +                let c = V::load_aligned(cur.add(2 * V::BYTES)); +                let d = V::load_aligned(cur.add(3 * V::BYTES)); +                let eqa = self.v1.cmpeq(a); +                let eqb = self.v1.cmpeq(b); +                let eqc = self.v1.cmpeq(c); +                let eqd = self.v1.cmpeq(d); +                let or1 = eqa.or(eqb); +                let or2 = eqc.or(eqd); +                let or3 = or1.or(or2); +                if or3.movemask_will_have_non_zero() { +                    let mask = eqd.movemask(); +                    if mask.has_non_zero() { +                        return Some(cur.add(3 * V::BYTES).add(topos(mask))); +                    } + +                    let mask = eqc.movemask(); +                    if mask.has_non_zero() { +                        return Some(cur.add(2 * V::BYTES).add(topos(mask))); +                    } + +                    let mask = eqb.movemask(); +                    if mask.has_non_zero() { +                        return Some(cur.add(1 * V::BYTES).add(topos(mask))); +                    } + +                    let mask = eqa.movemask(); +                    debug_assert!(mask.has_non_zero()); +                    return Some(cur.add(topos(mask))); +                } +            } +        } +        while cur >= start.add(V::BYTES) { +            debug_assert!(cur.distance(start) >= V::BYTES); +            cur = cur.sub(V::BYTES); +            if let Some(cur) = self.search_chunk(cur, topos) { +                return Some(cur); +            } +        } +        if cur > start { +            debug_assert!(cur.distance(start) < V::BYTES); +            return self.search_chunk(start, topos); +        } +        None +    } + +    /// Return a count of all matching bytes in the given haystack. +    /// +    /// # Safety +    /// +    /// * It must be the case that `start < end` and that the distance between +    /// them is at least equal to `V::BYTES`. That is, it must always be valid +    /// to do at least an unaligned load of `V` at `start`. +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    #[inline(always)] +    pub(crate) unsafe fn count_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> usize { +        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + +        let confirm = |b| b == self.needle1(); +        let len = end.distance(start); +        debug_assert!( +            len >= V::BYTES, +            "haystack has length {}, but must be at least {}", +            len, +            V::BYTES +        ); + +        // Set `cur` to the first V-aligned pointer greater than `start`. +        let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); +        // Count any matching bytes before we start our aligned loop. +        let mut count = count_byte_by_byte(start, cur, confirm); +        debug_assert!(cur > start && end.sub(V::BYTES) >= start); +        if len >= Self::LOOP_SIZE { +            while cur <= end.sub(Self::LOOP_SIZE) { +                debug_assert_eq!(0, cur.as_usize() % V::BYTES); + +                let a = V::load_aligned(cur); +                let b = V::load_aligned(cur.add(1 * V::BYTES)); +                let c = V::load_aligned(cur.add(2 * V::BYTES)); +                let d = V::load_aligned(cur.add(3 * V::BYTES)); +                let eqa = self.v1.cmpeq(a); +                let eqb = self.v1.cmpeq(b); +                let eqc = self.v1.cmpeq(c); +                let eqd = self.v1.cmpeq(d); +                count += eqa.movemask().count_ones(); +                count += eqb.movemask().count_ones(); +                count += eqc.movemask().count_ones(); +                count += eqd.movemask().count_ones(); +                cur = cur.add(Self::LOOP_SIZE); +            } +        } +        // Handle any leftovers after the aligned loop above. We use unaligned +        // loads here, but I believe we are guaranteed that they are aligned +        // since `cur` is aligned. +        while cur <= end.sub(V::BYTES) { +            debug_assert!(end.distance(cur) >= V::BYTES); +            let chunk = V::load_unaligned(cur); +            count += self.v1.cmpeq(chunk).movemask().count_ones(); +            cur = cur.add(V::BYTES); +        } +        // And finally count any leftovers that weren't caught above. +        count += count_byte_by_byte(cur, end, confirm); +        count +    } + +    /// Search `V::BYTES` starting at `cur` via an unaligned load. +    /// +    /// `mask_to_offset` should be a function that converts a `movemask` to +    /// an offset such that `cur.add(offset)` corresponds to a pointer to the +    /// match location if one is found. Generally it is expected to use either +    /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether +    /// one is implementing a forward or reverse search, respectively. +    /// +    /// # Safety +    /// +    /// `cur` must be a valid pointer and it must be valid to do an unaligned +    /// load of size `V::BYTES` at `cur`. +    #[inline(always)] +    unsafe fn search_chunk( +        &self, +        cur: *const u8, +        mask_to_offset: impl Fn(V::Mask) -> usize, +    ) -> Option<*const u8> { +        let chunk = V::load_unaligned(cur); +        let mask = self.v1.cmpeq(chunk).movemask(); +        if mask.has_non_zero() { +            Some(cur.add(mask_to_offset(mask))) +        } else { +            None +        } +    } +} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Two<V> { +    s1: u8, +    s2: u8, +    v1: V, +    v2: V, +} + +impl<V: Vector> Two<V> { +    /// The number of bytes we examine per each iteration of our search loop. +    const LOOP_SIZE: usize = 2 * V::BYTES; + +    /// Create a new searcher that finds occurrences of the byte given. +    #[inline(always)] +    pub(crate) unsafe fn new(needle1: u8, needle2: u8) -> Two<V> { +        Two { +            s1: needle1, +            s2: needle2, +            v1: V::splat(needle1), +            v2: V::splat(needle2), +        } +    } + +    /// Returns the first needle given to `Two::new`. +    #[inline(always)] +    pub(crate) fn needle1(&self) -> u8 { +        self.s1 +    } + +    /// Returns the second needle given to `Two::new`. +    #[inline(always)] +    pub(crate) fn needle2(&self) -> u8 { +        self.s2 +    } + +    /// Return a pointer to the first occurrence of one of the needles in the +    /// given haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// # Safety +    /// +    /// * It must be the case that `start < end` and that the distance between +    /// them is at least equal to `V::BYTES`. That is, it must always be valid +    /// to do at least an unaligned load of `V` at `start`. +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    #[inline(always)] +    pub(crate) unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        // If we want to support vectors bigger than 256 bits, we probably +        // need to move up to using a u64 for the masks used below. Currently +        // they are 32 bits, which means we're SOL for vectors that need masks +        // bigger than 32 bits. Overall unclear until there's a use case. +        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + +        let topos = V::Mask::first_offset; +        let len = end.distance(start); +        debug_assert!( +            len >= V::BYTES, +            "haystack has length {}, but must be at least {}", +            len, +            V::BYTES +        ); + +        // Search a possibly unaligned chunk at `start`. This covers any part +        // of the haystack prior to where aligned loads can start. +        if let Some(cur) = self.search_chunk(start, topos) { +            return Some(cur); +        } +        // Set `cur` to the first V-aligned pointer greater than `start`. +        let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); +        debug_assert!(cur > start && end.sub(V::BYTES) >= start); +        if len >= Self::LOOP_SIZE { +            while cur <= end.sub(Self::LOOP_SIZE) { +                debug_assert_eq!(0, cur.as_usize() % V::BYTES); + +                let a = V::load_aligned(cur); +                let b = V::load_aligned(cur.add(V::BYTES)); +                let eqa1 = self.v1.cmpeq(a); +                let eqb1 = self.v1.cmpeq(b); +                let eqa2 = self.v2.cmpeq(a); +                let eqb2 = self.v2.cmpeq(b); +                let or1 = eqa1.or(eqb1); +                let or2 = eqa2.or(eqb2); +                let or3 = or1.or(or2); +                if or3.movemask_will_have_non_zero() { +                    let mask = eqa1.movemask().or(eqa2.movemask()); +                    if mask.has_non_zero() { +                        return Some(cur.add(topos(mask))); +                    } + +                    let mask = eqb1.movemask().or(eqb2.movemask()); +                    debug_assert!(mask.has_non_zero()); +                    return Some(cur.add(V::BYTES).add(topos(mask))); +                } +                cur = cur.add(Self::LOOP_SIZE); +            } +        } +        // Handle any leftovers after the aligned loop above. We use unaligned +        // loads here, but I believe we are guaranteed that they are aligned +        // since `cur` is aligned. +        while cur <= end.sub(V::BYTES) { +            debug_assert!(end.distance(cur) >= V::BYTES); +            if let Some(cur) = self.search_chunk(cur, topos) { +                return Some(cur); +            } +            cur = cur.add(V::BYTES); +        } +        // Finally handle any remaining bytes less than the size of V. In this +        // case, our pointer may indeed be unaligned and the load may overlap +        // with the previous one. But that's okay since we know the previous +        // load didn't lead to a match (otherwise we wouldn't be here). +        if cur < end { +            debug_assert!(end.distance(cur) < V::BYTES); +            cur = cur.sub(V::BYTES - end.distance(cur)); +            debug_assert_eq!(end.distance(cur), V::BYTES); +            return self.search_chunk(cur, topos); +        } +        None +    } + +    /// Return a pointer to the last occurrence of the needle in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// # Safety +    /// +    /// * It must be the case that `start < end` and that the distance between +    /// them is at least equal to `V::BYTES`. That is, it must always be valid +    /// to do at least an unaligned load of `V` at `start`. +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    #[inline(always)] +    pub(crate) unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        // If we want to support vectors bigger than 256 bits, we probably +        // need to move up to using a u64 for the masks used below. Currently +        // they are 32 bits, which means we're SOL for vectors that need masks +        // bigger than 32 bits. Overall unclear until there's a use case. +        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + +        let topos = V::Mask::last_offset; +        let len = end.distance(start); +        debug_assert!( +            len >= V::BYTES, +            "haystack has length {}, but must be at least {}", +            len, +            V::BYTES +        ); + +        if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { +            return Some(cur); +        } +        let mut cur = end.sub(end.as_usize() & V::ALIGN); +        debug_assert!(start <= cur && cur <= end); +        if len >= Self::LOOP_SIZE { +            while cur >= start.add(Self::LOOP_SIZE) { +                debug_assert_eq!(0, cur.as_usize() % V::BYTES); + +                cur = cur.sub(Self::LOOP_SIZE); +                let a = V::load_aligned(cur); +                let b = V::load_aligned(cur.add(V::BYTES)); +                let eqa1 = self.v1.cmpeq(a); +                let eqb1 = self.v1.cmpeq(b); +                let eqa2 = self.v2.cmpeq(a); +                let eqb2 = self.v2.cmpeq(b); +                let or1 = eqa1.or(eqb1); +                let or2 = eqa2.or(eqb2); +                let or3 = or1.or(or2); +                if or3.movemask_will_have_non_zero() { +                    let mask = eqb1.movemask().or(eqb2.movemask()); +                    if mask.has_non_zero() { +                        return Some(cur.add(V::BYTES).add(topos(mask))); +                    } + +                    let mask = eqa1.movemask().or(eqa2.movemask()); +                    debug_assert!(mask.has_non_zero()); +                    return Some(cur.add(topos(mask))); +                } +            } +        } +        while cur >= start.add(V::BYTES) { +            debug_assert!(cur.distance(start) >= V::BYTES); +            cur = cur.sub(V::BYTES); +            if let Some(cur) = self.search_chunk(cur, topos) { +                return Some(cur); +            } +        } +        if cur > start { +            debug_assert!(cur.distance(start) < V::BYTES); +            return self.search_chunk(start, topos); +        } +        None +    } + +    /// Search `V::BYTES` starting at `cur` via an unaligned load. +    /// +    /// `mask_to_offset` should be a function that converts a `movemask` to +    /// an offset such that `cur.add(offset)` corresponds to a pointer to the +    /// match location if one is found. Generally it is expected to use either +    /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether +    /// one is implementing a forward or reverse search, respectively. +    /// +    /// # Safety +    /// +    /// `cur` must be a valid pointer and it must be valid to do an unaligned +    /// load of size `V::BYTES` at `cur`. +    #[inline(always)] +    unsafe fn search_chunk( +        &self, +        cur: *const u8, +        mask_to_offset: impl Fn(V::Mask) -> usize, +    ) -> Option<*const u8> { +        let chunk = V::load_unaligned(cur); +        let eq1 = self.v1.cmpeq(chunk); +        let eq2 = self.v2.cmpeq(chunk); +        let mask = eq1.or(eq2).movemask(); +        if mask.has_non_zero() { +            let mask1 = eq1.movemask(); +            let mask2 = eq2.movemask(); +            Some(cur.add(mask_to_offset(mask1.or(mask2)))) +        } else { +            None +        } +    } +} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Three<V> { +    s1: u8, +    s2: u8, +    s3: u8, +    v1: V, +    v2: V, +    v3: V, +} + +impl<V: Vector> Three<V> { +    /// The number of bytes we examine per each iteration of our search loop. +    const LOOP_SIZE: usize = 2 * V::BYTES; + +    /// Create a new searcher that finds occurrences of the byte given. +    #[inline(always)] +    pub(crate) unsafe fn new( +        needle1: u8, +        needle2: u8, +        needle3: u8, +    ) -> Three<V> { +        Three { +            s1: needle1, +            s2: needle2, +            s3: needle3, +            v1: V::splat(needle1), +            v2: V::splat(needle2), +            v3: V::splat(needle3), +        } +    } + +    /// Returns the first needle given to `Three::new`. +    #[inline(always)] +    pub(crate) fn needle1(&self) -> u8 { +        self.s1 +    } + +    /// Returns the second needle given to `Three::new`. +    #[inline(always)] +    pub(crate) fn needle2(&self) -> u8 { +        self.s2 +    } + +    /// Returns the third needle given to `Three::new`. +    #[inline(always)] +    pub(crate) fn needle3(&self) -> u8 { +        self.s3 +    } + +    /// Return a pointer to the first occurrence of one of the needles in the +    /// given haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// # Safety +    /// +    /// * It must be the case that `start < end` and that the distance between +    /// them is at least equal to `V::BYTES`. That is, it must always be valid +    /// to do at least an unaligned load of `V` at `start`. +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    #[inline(always)] +    pub(crate) unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        // If we want to support vectors bigger than 256 bits, we probably +        // need to move up to using a u64 for the masks used below. Currently +        // they are 32 bits, which means we're SOL for vectors that need masks +        // bigger than 32 bits. Overall unclear until there's a use case. +        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + +        let topos = V::Mask::first_offset; +        let len = end.distance(start); +        debug_assert!( +            len >= V::BYTES, +            "haystack has length {}, but must be at least {}", +            len, +            V::BYTES +        ); + +        // Search a possibly unaligned chunk at `start`. This covers any part +        // of the haystack prior to where aligned loads can start. +        if let Some(cur) = self.search_chunk(start, topos) { +            return Some(cur); +        } +        // Set `cur` to the first V-aligned pointer greater than `start`. +        let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); +        debug_assert!(cur > start && end.sub(V::BYTES) >= start); +        if len >= Self::LOOP_SIZE { +            while cur <= end.sub(Self::LOOP_SIZE) { +                debug_assert_eq!(0, cur.as_usize() % V::BYTES); + +                let a = V::load_aligned(cur); +                let b = V::load_aligned(cur.add(V::BYTES)); +                let eqa1 = self.v1.cmpeq(a); +                let eqb1 = self.v1.cmpeq(b); +                let eqa2 = self.v2.cmpeq(a); +                let eqb2 = self.v2.cmpeq(b); +                let eqa3 = self.v3.cmpeq(a); +                let eqb3 = self.v3.cmpeq(b); +                let or1 = eqa1.or(eqb1); +                let or2 = eqa2.or(eqb2); +                let or3 = eqa3.or(eqb3); +                let or4 = or1.or(or2); +                let or5 = or3.or(or4); +                if or5.movemask_will_have_non_zero() { +                    let mask = eqa1 +                        .movemask() +                        .or(eqa2.movemask()) +                        .or(eqa3.movemask()); +                    if mask.has_non_zero() { +                        return Some(cur.add(topos(mask))); +                    } + +                    let mask = eqb1 +                        .movemask() +                        .or(eqb2.movemask()) +                        .or(eqb3.movemask()); +                    debug_assert!(mask.has_non_zero()); +                    return Some(cur.add(V::BYTES).add(topos(mask))); +                } +                cur = cur.add(Self::LOOP_SIZE); +            } +        } +        // Handle any leftovers after the aligned loop above. We use unaligned +        // loads here, but I believe we are guaranteed that they are aligned +        // since `cur` is aligned. +        while cur <= end.sub(V::BYTES) { +            debug_assert!(end.distance(cur) >= V::BYTES); +            if let Some(cur) = self.search_chunk(cur, topos) { +                return Some(cur); +            } +            cur = cur.add(V::BYTES); +        } +        // Finally handle any remaining bytes less than the size of V. In this +        // case, our pointer may indeed be unaligned and the load may overlap +        // with the previous one. But that's okay since we know the previous +        // load didn't lead to a match (otherwise we wouldn't be here). +        if cur < end { +            debug_assert!(end.distance(cur) < V::BYTES); +            cur = cur.sub(V::BYTES - end.distance(cur)); +            debug_assert_eq!(end.distance(cur), V::BYTES); +            return self.search_chunk(cur, topos); +        } +        None +    } + +    /// Return a pointer to the last occurrence of the needle in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// # Safety +    /// +    /// * It must be the case that `start < end` and that the distance between +    /// them is at least equal to `V::BYTES`. That is, it must always be valid +    /// to do at least an unaligned load of `V` at `start`. +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    #[inline(always)] +    pub(crate) unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        // If we want to support vectors bigger than 256 bits, we probably +        // need to move up to using a u64 for the masks used below. Currently +        // they are 32 bits, which means we're SOL for vectors that need masks +        // bigger than 32 bits. Overall unclear until there's a use case. +        debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + +        let topos = V::Mask::last_offset; +        let len = end.distance(start); +        debug_assert!( +            len >= V::BYTES, +            "haystack has length {}, but must be at least {}", +            len, +            V::BYTES +        ); + +        if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { +            return Some(cur); +        } +        let mut cur = end.sub(end.as_usize() & V::ALIGN); +        debug_assert!(start <= cur && cur <= end); +        if len >= Self::LOOP_SIZE { +            while cur >= start.add(Self::LOOP_SIZE) { +                debug_assert_eq!(0, cur.as_usize() % V::BYTES); + +                cur = cur.sub(Self::LOOP_SIZE); +                let a = V::load_aligned(cur); +                let b = V::load_aligned(cur.add(V::BYTES)); +                let eqa1 = self.v1.cmpeq(a); +                let eqb1 = self.v1.cmpeq(b); +                let eqa2 = self.v2.cmpeq(a); +                let eqb2 = self.v2.cmpeq(b); +                let eqa3 = self.v3.cmpeq(a); +                let eqb3 = self.v3.cmpeq(b); +                let or1 = eqa1.or(eqb1); +                let or2 = eqa2.or(eqb2); +                let or3 = eqa3.or(eqb3); +                let or4 = or1.or(or2); +                let or5 = or3.or(or4); +                if or5.movemask_will_have_non_zero() { +                    let mask = eqb1 +                        .movemask() +                        .or(eqb2.movemask()) +                        .or(eqb3.movemask()); +                    if mask.has_non_zero() { +                        return Some(cur.add(V::BYTES).add(topos(mask))); +                    } + +                    let mask = eqa1 +                        .movemask() +                        .or(eqa2.movemask()) +                        .or(eqa3.movemask()); +                    debug_assert!(mask.has_non_zero()); +                    return Some(cur.add(topos(mask))); +                } +            } +        } +        while cur >= start.add(V::BYTES) { +            debug_assert!(cur.distance(start) >= V::BYTES); +            cur = cur.sub(V::BYTES); +            if let Some(cur) = self.search_chunk(cur, topos) { +                return Some(cur); +            } +        } +        if cur > start { +            debug_assert!(cur.distance(start) < V::BYTES); +            return self.search_chunk(start, topos); +        } +        None +    } + +    /// Search `V::BYTES` starting at `cur` via an unaligned load. +    /// +    /// `mask_to_offset` should be a function that converts a `movemask` to +    /// an offset such that `cur.add(offset)` corresponds to a pointer to the +    /// match location if one is found. Generally it is expected to use either +    /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether +    /// one is implementing a forward or reverse search, respectively. +    /// +    /// # Safety +    /// +    /// `cur` must be a valid pointer and it must be valid to do an unaligned +    /// load of size `V::BYTES` at `cur`. +    #[inline(always)] +    unsafe fn search_chunk( +        &self, +        cur: *const u8, +        mask_to_offset: impl Fn(V::Mask) -> usize, +    ) -> Option<*const u8> { +        let chunk = V::load_unaligned(cur); +        let eq1 = self.v1.cmpeq(chunk); +        let eq2 = self.v2.cmpeq(chunk); +        let eq3 = self.v3.cmpeq(chunk); +        let mask = eq1.or(eq2).or(eq3).movemask(); +        if mask.has_non_zero() { +            let mask1 = eq1.movemask(); +            let mask2 = eq2.movemask(); +            let mask3 = eq3.movemask(); +            Some(cur.add(mask_to_offset(mask1.or(mask2).or(mask3)))) +        } else { +            None +        } +    } +} + +/// An iterator over all occurrences of a set of bytes in a haystack. +/// +/// This iterator implements the routines necessary to provide a +/// `DoubleEndedIterator` impl, which means it can also be used to find +/// occurrences in reverse order. +/// +/// The lifetime parameters are as follows: +/// +/// * `'h` refers to the lifetime of the haystack being searched. +/// +/// This type is intended to be used to implement all iterators for the +/// `memchr` family of functions. It handles a tiny bit of marginally tricky +/// raw pointer math, but otherwise expects the caller to provide `find_raw` +/// and `rfind_raw` routines for each call of `next` and `next_back`, +/// respectively. +#[derive(Clone, Debug)] +pub(crate) struct Iter<'h> { +    /// The original starting point into the haystack. We use this to convert +    /// pointers to offsets. +    original_start: *const u8, +    /// The current starting point into the haystack. That is, where the next +    /// search will begin. +    start: *const u8, +    /// The current ending point into the haystack. That is, where the next +    /// reverse search will begin. +    end: *const u8, +    /// A marker for tracking the lifetime of the start/cur_start/cur_end +    /// pointers above, which all point into the haystack. +    haystack: core::marker::PhantomData<&'h [u8]>, +} + +// SAFETY: Iter contains no shared references to anything that performs any +// interior mutations. Also, the lifetime guarantees that Iter will not outlive +// the haystack. +unsafe impl<'h> Send for Iter<'h> {} + +// SAFETY: Iter perform no interior mutations, therefore no explicit +// synchronization is necessary. Also, the lifetime guarantees that Iter will +// not outlive the haystack. +unsafe impl<'h> Sync for Iter<'h> {} + +impl<'h> Iter<'h> { +    /// Create a new generic memchr iterator. +    #[inline(always)] +    pub(crate) fn new(haystack: &'h [u8]) -> Iter<'h> { +        Iter { +            original_start: haystack.as_ptr(), +            start: haystack.as_ptr(), +            end: haystack.as_ptr().wrapping_add(haystack.len()), +            haystack: core::marker::PhantomData, +        } +    } + +    /// Returns the next occurrence in the forward direction. +    /// +    /// # Safety +    /// +    /// Callers must ensure that if a pointer is returned from the closure +    /// provided, then it must be greater than or equal to the start pointer +    /// and less than the end pointer. +    #[inline(always)] +    pub(crate) unsafe fn next( +        &mut self, +        mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, +    ) -> Option<usize> { +        // SAFETY: Pointers are derived directly from the same &[u8] haystack. +        // We only ever modify start/end corresponding to a matching offset +        // found between start and end. Thus all changes to start/end maintain +        // our safety requirements. +        // +        // The only other assumption we rely on is that the pointer returned +        // by `find_raw` satisfies `self.start <= found < self.end`, and that +        // safety contract is forwarded to the caller. +        let found = find_raw(self.start, self.end)?; +        let result = found.distance(self.original_start); +        self.start = found.add(1); +        Some(result) +    } + +    /// Returns the number of remaining elements in this iterator. +    #[inline(always)] +    pub(crate) fn count( +        self, +        mut count_raw: impl FnMut(*const u8, *const u8) -> usize, +    ) -> usize { +        // SAFETY: Pointers are derived directly from the same &[u8] haystack. +        // We only ever modify start/end corresponding to a matching offset +        // found between start and end. Thus all changes to start/end maintain +        // our safety requirements. +        count_raw(self.start, self.end) +    } + +    /// Returns the next occurrence in reverse. +    /// +    /// # Safety +    /// +    /// Callers must ensure that if a pointer is returned from the closure +    /// provided, then it must be greater than or equal to the start pointer +    /// and less than the end pointer. +    #[inline(always)] +    pub(crate) unsafe fn next_back( +        &mut self, +        mut rfind_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, +    ) -> Option<usize> { +        // SAFETY: Pointers are derived directly from the same &[u8] haystack. +        // We only ever modify start/end corresponding to a matching offset +        // found between start and end. Thus all changes to start/end maintain +        // our safety requirements. +        // +        // The only other assumption we rely on is that the pointer returned +        // by `rfind_raw` satisfies `self.start <= found < self.end`, and that +        // safety contract is forwarded to the caller. +        let found = rfind_raw(self.start, self.end)?; +        let result = found.distance(self.original_start); +        self.end = found; +        Some(result) +    } + +    /// Provides an implementation of `Iterator::size_hint`. +    #[inline(always)] +    pub(crate) fn size_hint(&self) -> (usize, Option<usize>) { +        (0, Some(self.end.as_usize().saturating_sub(self.start.as_usize()))) +    } +} + +/// Search a slice using a function that operates on raw pointers. +/// +/// Given a function to search a contiguous sequence of memory for the location +/// of a non-empty set of bytes, this will execute that search on a slice of +/// bytes. The pointer returned by the given function will be converted to an +/// offset relative to the starting point of the given slice. That is, if a +/// match is found, the offset returned by this routine is guaranteed to be a +/// valid index into `haystack`. +/// +/// Callers may use this for a forward or reverse search. +/// +/// # Safety +/// +/// Callers must ensure that if a pointer is returned by `find_raw`, then the +/// pointer must be greater than or equal to the starting pointer and less than +/// the end pointer. +#[inline(always)] +pub(crate) unsafe fn search_slice_with_raw( +    haystack: &[u8], +    mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, +) -> Option<usize> { +    // SAFETY: We rely on `find_raw` to return a correct and valid pointer, but +    // otherwise, `start` and `end` are valid due to the guarantees provided by +    // a &[u8]. +    let start = haystack.as_ptr(); +    let end = start.add(haystack.len()); +    let found = find_raw(start, end)?; +    Some(found.distance(start)) +} + +/// Performs a forward byte-at-a-time loop until either `ptr >= end_ptr` or +/// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is +/// returned. If the latter occurs, then the pointer at which `confirm` returns +/// `true` is returned. +/// +/// # Safety +/// +/// Callers must provide valid pointers and they must satisfy `start_ptr <= +/// ptr` and `ptr <= end_ptr`. +#[inline(always)] +pub(crate) unsafe fn fwd_byte_by_byte<F: Fn(u8) -> bool>( +    start: *const u8, +    end: *const u8, +    confirm: F, +) -> Option<*const u8> { +    debug_assert!(start <= end); +    let mut ptr = start; +    while ptr < end { +        if confirm(*ptr) { +            return Some(ptr); +        } +        ptr = ptr.offset(1); +    } +    None +} + +/// Performs a reverse byte-at-a-time loop until either `ptr < start_ptr` or +/// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is +/// returned. If the latter occurs, then the pointer at which `confirm` returns +/// `true` is returned. +/// +/// # Safety +/// +/// Callers must provide valid pointers and they must satisfy `start_ptr <= +/// ptr` and `ptr <= end_ptr`. +#[inline(always)] +pub(crate) unsafe fn rev_byte_by_byte<F: Fn(u8) -> bool>( +    start: *const u8, +    end: *const u8, +    confirm: F, +) -> Option<*const u8> { +    debug_assert!(start <= end); + +    let mut ptr = end; +    while ptr > start { +        ptr = ptr.offset(-1); +        if confirm(*ptr) { +            return Some(ptr); +        } +    } +    None +} + +/// Performs a forward byte-at-a-time loop until `ptr >= end_ptr` and returns +/// the number of times `confirm(*ptr)` returns `true`. +/// +/// # Safety +/// +/// Callers must provide valid pointers and they must satisfy `start_ptr <= +/// ptr` and `ptr <= end_ptr`. +#[inline(always)] +pub(crate) unsafe fn count_byte_by_byte<F: Fn(u8) -> bool>( +    start: *const u8, +    end: *const u8, +    confirm: F, +) -> usize { +    debug_assert!(start <= end); +    let mut ptr = start; +    let mut count = 0; +    while ptr < end { +        if confirm(*ptr) { +            count += 1; +        } +        ptr = ptr.offset(1); +    } +    count +} diff --git a/vendor/memchr/src/arch/generic/mod.rs b/vendor/memchr/src/arch/generic/mod.rs new file mode 100644 index 0000000..63ee3f0 --- /dev/null +++ b/vendor/memchr/src/arch/generic/mod.rs @@ -0,0 +1,14 @@ +/*! +This module defines "generic" routines that can be specialized to specific +architectures. + +We don't expose this module primarily because it would require exposing all +of the internal infrastructure required to write these generic routines. +That infrastructure should be treated as an implementation detail so that +it is allowed to evolve. Instead, what we expose are architecture specific +instantiations of these generic implementations. The generic code just lets us +write the code once (usually). +*/ + +pub(crate) mod memchr; +pub(crate) mod packedpair; diff --git a/vendor/memchr/src/arch/generic/packedpair.rs b/vendor/memchr/src/arch/generic/packedpair.rs new file mode 100644 index 0000000..8d97cf2 --- /dev/null +++ b/vendor/memchr/src/arch/generic/packedpair.rs @@ -0,0 +1,317 @@ +/*! +Generic crate-internal routines for the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use crate::{ +    arch::all::{is_equal_raw, packedpair::Pair}, +    ext::Pointer, +    vector::{MoveMask, Vector}, +}; + +/// A generic architecture dependent "packed pair" finder. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +/// +/// This is architecture dependent because it uses specific vector operations +/// to look for occurrences of the pair of bytes. +/// +/// This type is not meant to be exported and is instead meant to be used as +/// the implementation for architecture specific facades. Why? Because it's a +/// bit of a quirky API that requires `inline(always)` annotations. And pretty +/// much everything has safety obligations due (at least) to the caller needing +/// to inline calls into routines marked with +/// `#[target_feature(enable = "...")]`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Finder<V> { +    pair: Pair, +    v1: V, +    v2: V, +    min_haystack_len: usize, +} + +impl<V: Vector> Finder<V> { +    /// Create a new pair searcher. The searcher returned can either report +    /// exact matches of `needle` or act as a prefilter and report candidate +    /// positions of `needle`. +    /// +    /// # Safety +    /// +    /// Callers must ensure that whatever vector type this routine is called +    /// with is supported by the current environment. +    /// +    /// Callers must also ensure that `needle.len() >= 2`. +    #[inline(always)] +    pub(crate) unsafe fn new(needle: &[u8], pair: Pair) -> Finder<V> { +        let max_index = pair.index1().max(pair.index2()); +        let min_haystack_len = +            core::cmp::max(needle.len(), usize::from(max_index) + V::BYTES); +        let v1 = V::splat(needle[usize::from(pair.index1())]); +        let v2 = V::splat(needle[usize::from(pair.index2())]); +        Finder { pair, v1, v2, min_haystack_len } +    } + +    /// Searches the given haystack for the given needle. The needle given +    /// should be the same as the needle that this finder was initialized +    /// with. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    /// +    /// # Safety +    /// +    /// Since this is meant to be used with vector functions, callers need to +    /// specialize this inside of a function with a `target_feature` attribute. +    /// Therefore, callers must ensure that whatever target feature is being +    /// used supports the vector functions that this function is specialized +    /// for. (For the specific vector functions used, see the Vector trait +    /// implementations.) +    #[inline(always)] +    pub(crate) unsafe fn find( +        &self, +        haystack: &[u8], +        needle: &[u8], +    ) -> Option<usize> { +        assert!( +            haystack.len() >= self.min_haystack_len, +            "haystack too small, should be at least {} but got {}", +            self.min_haystack_len, +            haystack.len(), +        ); + +        let all = V::Mask::all_zeros_except_least_significant(0); +        let start = haystack.as_ptr(); +        let end = start.add(haystack.len()); +        let max = end.sub(self.min_haystack_len); +        let mut cur = start; + +        // N.B. I did experiment with unrolling the loop to deal with size(V) +        // bytes at a time and 2*size(V) bytes at a time. The double unroll +        // was marginally faster while the quadruple unroll was unambiguously +        // slower. In the end, I decided the complexity from unrolling wasn't +        // worth it. I used the memmem/krate/prebuilt/huge-en/ benchmarks to +        // compare. +        while cur <= max { +            if let Some(chunki) = self.find_in_chunk(needle, cur, end, all) { +                return Some(matched(start, cur, chunki)); +            } +            cur = cur.add(V::BYTES); +        } +        if cur < end { +            let remaining = end.distance(cur); +            debug_assert!( +                remaining < self.min_haystack_len, +                "remaining bytes should be smaller than the minimum haystack \ +                 length of {}, but there are {} bytes remaining", +                self.min_haystack_len, +                remaining, +            ); +            if remaining < needle.len() { +                return None; +            } +            debug_assert!( +                max < cur, +                "after main loop, cur should have exceeded max", +            ); +            let overlap = cur.distance(max); +            debug_assert!( +                overlap > 0, +                "overlap ({}) must always be non-zero", +                overlap, +            ); +            debug_assert!( +                overlap < V::BYTES, +                "overlap ({}) cannot possibly be >= than a vector ({})", +                overlap, +                V::BYTES, +            ); +            // The mask has all of its bits set except for the first N least +            // significant bits, where N=overlap. This way, any matches that +            // occur in find_in_chunk within the overlap are automatically +            // ignored. +            let mask = V::Mask::all_zeros_except_least_significant(overlap); +            cur = max; +            let m = self.find_in_chunk(needle, cur, end, mask); +            if let Some(chunki) = m { +                return Some(matched(start, cur, chunki)); +            } +        } +        None +    } + +    /// Searches the given haystack for offsets that represent candidate +    /// matches of the `needle` given to this finder's constructor. The offsets +    /// returned, if they are a match, correspond to the starting offset of +    /// `needle` in the given `haystack`. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    /// +    /// # Safety +    /// +    /// Since this is meant to be used with vector functions, callers need to +    /// specialize this inside of a function with a `target_feature` attribute. +    /// Therefore, callers must ensure that whatever target feature is being +    /// used supports the vector functions that this function is specialized +    /// for. (For the specific vector functions used, see the Vector trait +    /// implementations.) +    #[inline(always)] +    pub(crate) unsafe fn find_prefilter( +        &self, +        haystack: &[u8], +    ) -> Option<usize> { +        assert!( +            haystack.len() >= self.min_haystack_len, +            "haystack too small, should be at least {} but got {}", +            self.min_haystack_len, +            haystack.len(), +        ); + +        let start = haystack.as_ptr(); +        let end = start.add(haystack.len()); +        let max = end.sub(self.min_haystack_len); +        let mut cur = start; + +        // N.B. I did experiment with unrolling the loop to deal with size(V) +        // bytes at a time and 2*size(V) bytes at a time. The double unroll +        // was marginally faster while the quadruple unroll was unambiguously +        // slower. In the end, I decided the complexity from unrolling wasn't +        // worth it. I used the memmem/krate/prebuilt/huge-en/ benchmarks to +        // compare. +        while cur <= max { +            if let Some(chunki) = self.find_prefilter_in_chunk(cur) { +                return Some(matched(start, cur, chunki)); +            } +            cur = cur.add(V::BYTES); +        } +        if cur < end { +            // This routine immediately quits if a candidate match is found. +            // That means that if we're here, no candidate matches have been +            // found at or before 'ptr'. Thus, we don't need to mask anything +            // out even though we might technically search part of the haystack +            // that we've already searched (because we know it can't match). +            cur = max; +            if let Some(chunki) = self.find_prefilter_in_chunk(cur) { +                return Some(matched(start, cur, chunki)); +            } +        } +        None +    } + +    /// Search for an occurrence of our byte pair from the needle in the chunk +    /// pointed to by cur, with the end of the haystack pointed to by end. +    /// When an occurrence is found, memcmp is run to check if a match occurs +    /// at the corresponding position. +    /// +    /// `mask` should have bits set corresponding the positions in the chunk +    /// in which matches are considered. This is only used for the last vector +    /// load where the beginning of the vector might have overlapped with the +    /// last load in the main loop. The mask lets us avoid visiting positions +    /// that have already been discarded as matches. +    /// +    /// # Safety +    /// +    /// It must be safe to do an unaligned read of size(V) bytes starting at +    /// both (cur + self.index1) and (cur + self.index2). It must also be safe +    /// to do unaligned loads on cur up to (end - needle.len()). +    #[inline(always)] +    unsafe fn find_in_chunk( +        &self, +        needle: &[u8], +        cur: *const u8, +        end: *const u8, +        mask: V::Mask, +    ) -> Option<usize> { +        let index1 = usize::from(self.pair.index1()); +        let index2 = usize::from(self.pair.index2()); +        let chunk1 = V::load_unaligned(cur.add(index1)); +        let chunk2 = V::load_unaligned(cur.add(index2)); +        let eq1 = chunk1.cmpeq(self.v1); +        let eq2 = chunk2.cmpeq(self.v2); + +        let mut offsets = eq1.and(eq2).movemask().and(mask); +        while offsets.has_non_zero() { +            let offset = offsets.first_offset(); +            let cur = cur.add(offset); +            if end.sub(needle.len()) < cur { +                return None; +            } +            if is_equal_raw(needle.as_ptr(), cur, needle.len()) { +                return Some(offset); +            } +            offsets = offsets.clear_least_significant_bit(); +        } +        None +    } + +    /// Search for an occurrence of our byte pair from the needle in the chunk +    /// pointed to by cur, with the end of the haystack pointed to by end. +    /// When an occurrence is found, memcmp is run to check if a match occurs +    /// at the corresponding position. +    /// +    /// # Safety +    /// +    /// It must be safe to do an unaligned read of size(V) bytes starting at +    /// both (cur + self.index1) and (cur + self.index2). It must also be safe +    /// to do unaligned reads on cur up to (end - needle.len()). +    #[inline(always)] +    unsafe fn find_prefilter_in_chunk(&self, cur: *const u8) -> Option<usize> { +        let index1 = usize::from(self.pair.index1()); +        let index2 = usize::from(self.pair.index2()); +        let chunk1 = V::load_unaligned(cur.add(index1)); +        let chunk2 = V::load_unaligned(cur.add(index2)); +        let eq1 = chunk1.cmpeq(self.v1); +        let eq2 = chunk2.cmpeq(self.v2); + +        let offsets = eq1.and(eq2).movemask(); +        if !offsets.has_non_zero() { +            return None; +        } +        Some(offsets.first_offset()) +    } + +    /// Returns the pair of offsets (into the needle) used to check as a +    /// predicate before confirming whether a needle exists at a particular +    /// position. +    #[inline] +    pub(crate) fn pair(&self) -> &Pair { +        &self.pair +    } + +    /// Returns the minimum haystack length that this `Finder` can search. +    /// +    /// Providing a haystack to this `Finder` shorter than this length is +    /// guaranteed to result in a panic. +    #[inline(always)] +    pub(crate) fn min_haystack_len(&self) -> usize { +        self.min_haystack_len +    } +} + +/// Accepts a chunk-relative offset and returns a haystack relative offset. +/// +/// This used to be marked `#[cold]` and `#[inline(never)]`, but I couldn't +/// observe a consistent measureable difference between that and just inlining +/// it. So we go with inlining it. +/// +/// # Safety +/// +/// Same at `ptr::offset_from` in addition to `cur >= start`. +#[inline(always)] +unsafe fn matched(start: *const u8, cur: *const u8, chunki: usize) -> usize { +    cur.distance(start) + chunki +} + +// If you're looking for tests, those are run for each instantiation of the +// above code. So for example, see arch::x86_64::sse2::packedpair. diff --git a/vendor/memchr/src/arch/mod.rs b/vendor/memchr/src/arch/mod.rs new file mode 100644 index 0000000..2f63a1a --- /dev/null +++ b/vendor/memchr/src/arch/mod.rs @@ -0,0 +1,16 @@ +/*! +A module with low-level architecture dependent routines. + +These routines are useful as primitives for tasks not covered by the higher +level crate API. +*/ + +pub mod all; +pub(crate) mod generic; + +#[cfg(target_arch = "aarch64")] +pub mod aarch64; +#[cfg(target_arch = "wasm32")] +pub mod wasm32; +#[cfg(target_arch = "x86_64")] +pub mod x86_64; diff --git a/vendor/memchr/src/arch/wasm32/memchr.rs b/vendor/memchr/src/arch/wasm32/memchr.rs new file mode 100644 index 0000000..b0bbd1c --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/memchr.rs @@ -0,0 +1,137 @@ +/*! +Wrapper routines for `memchr` and friends. + +These routines choose the best implementation at compile time. (This is +different from `x86_64` because it is expected that `simd128` is almost always +available for `wasm32` targets.) +*/ + +macro_rules! defraw { +    ($ty:ident, $find:ident, $start:ident, $end:ident, $($needles:ident),+) => {{ +        #[cfg(target_feature = "simd128")] +        { +            use crate::arch::wasm32::simd128::memchr::$ty; + +            debug!("chose simd128 for {}", stringify!($ty)); +            debug_assert!($ty::is_available()); +            // SAFETY: We know that wasm memchr is always available whenever +            // code is compiled for `wasm32` with the `simd128` target feature +            // enabled. +            $ty::new_unchecked($($needles),+).$find($start, $end) +        } +        #[cfg(not(target_feature = "simd128"))] +        { +            use crate::arch::all::memchr::$ty; + +            debug!( +                "no simd128 feature available, using fallback for {}", +                stringify!($ty), +            ); +            $ty::new($($needles),+).$find($start, $end) +        } +    }} +} + +/// memchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr_raw( +    n1: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    defraw!(One, find_raw, start, end, n1) +} + +/// memrchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr_raw( +    n1: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    defraw!(One, rfind_raw, start, end, n1) +} + +/// memchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr2_raw( +    n1: u8, +    n2: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    defraw!(Two, find_raw, start, end, n1, n2) +} + +/// memrchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr2_raw( +    n1: u8, +    n2: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    defraw!(Two, rfind_raw, start, end, n1, n2) +} + +/// memchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr3_raw( +    n1: u8, +    n2: u8, +    n3: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    defraw!(Three, find_raw, start, end, n1, n2, n3) +} + +/// memrchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr3_raw( +    n1: u8, +    n2: u8, +    n3: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    defraw!(Three, rfind_raw, start, end, n1, n2, n3) +} + +/// Count all matching bytes, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::count_raw`. +#[inline(always)] +pub(crate) unsafe fn count_raw( +    n1: u8, +    start: *const u8, +    end: *const u8, +) -> usize { +    defraw!(One, count_raw, start, end, n1) +} diff --git a/vendor/memchr/src/arch/wasm32/mod.rs b/vendor/memchr/src/arch/wasm32/mod.rs new file mode 100644 index 0000000..209f876 --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/mod.rs @@ -0,0 +1,7 @@ +/*! +Vector algorithms for the `wasm32` target. +*/ + +pub mod simd128; + +pub(crate) mod memchr; diff --git a/vendor/memchr/src/arch/wasm32/simd128/memchr.rs b/vendor/memchr/src/arch/wasm32/simd128/memchr.rs new file mode 100644 index 0000000..fa314c9 --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/simd128/memchr.rs @@ -0,0 +1,1020 @@ +/*! +This module defines 128-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::wasm32::v128; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One(generic::One<v128>); + +impl One { +    /// Create a new searcher that finds occurrences of the needle byte given. +    /// +    /// This particular searcher is specialized to use simd128 vector +    /// instructions that typically make it quite fast. +    /// +    /// If simd128 is unavailable in the current environment, then `None` is +    /// returned. +    #[inline] +    pub fn new(needle: u8) -> Option<One> { +        if One::is_available() { +            // SAFETY: we check that simd128 is available above. +            unsafe { Some(One::new_unchecked(needle)) } +        } else { +            None +        } +    } + +    /// Create a new finder specific to simd128 vectors and routines without +    /// checking that simd128 is available. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that it is safe to execute `simd128` +    /// instructions in the current environment. +    #[target_feature(enable = "simd128")] +    #[inline] +    pub unsafe fn new_unchecked(needle: u8) -> One { +        One(generic::One::new(needle)) +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`One::new`] will return +    /// a `Some` value. Similarly, when it is false, it is guaranteed that +    /// `One::new` will return a `None` value. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(target_feature = "simd128")] +        { +            true +        } +        #[cfg(not(target_feature = "simd128"))] +        { +            false +        } +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Counts all occurrences of this byte in the given haystack. +    #[inline] +    pub fn count(&self, haystack: &[u8]) -> usize { +        // SAFETY: All of our pointers are derived directly from a borrowed +        // slice, which is guaranteed to be valid. +        unsafe { +            let start = haystack.as_ptr(); +            let end = start.add(haystack.len()); +            self.count_raw(start, end) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < v128::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::fwd_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +            }); +        } +        // SAFETY: Building a `One` means it's safe to call 'simd128' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        self.find_raw_impl(start, end) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < v128::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::rev_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +            }); +        } +        // SAFETY: Building a `One` means it's safe to call 'simd128' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        self.rfind_raw_impl(start, end) +    } + +    /// Counts all occurrences of this byte in the given haystack represented +    /// by raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { +        if start >= end { +            return 0; +        } +        if end.distance(start) < v128::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::count_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +            }); +        } +        // SAFETY: Building a `One` means it's safe to call 'simd128' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        self.count_raw_impl(start, end) +    } + +    /// Execute a search using simd128 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of a simd128 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `simd128` routines.) +    #[target_feature(enable = "simd128")] +    #[inline] +    unsafe fn find_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.find_raw(start, end) +    } + +    /// Execute a search using simd128 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of a simd128 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `simd128` routines.) +    #[target_feature(enable = "simd128")] +    #[inline] +    unsafe fn rfind_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.rfind_raw(start, end) +    } + +    /// Execute a count using simd128 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::count_raw`], except the distance between `start` and +    /// `end` must be at least the size of a simd128 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `simd128` routines.) +    #[target_feature(enable = "simd128")] +    #[inline] +    unsafe fn count_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> usize { +        self.0.count_raw(start, end) +    } + +    /// Returns an iterator over all occurrences of the needle byte in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { +        OneIter { searcher: self, it: generic::Iter::new(haystack) } +    } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { +    searcher: &'a One, +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn count(self) -> usize { +        self.it.count(|s, e| { +            // SAFETY: We rely on our generic iterator to return valid start +            // and end pointers. +            unsafe { self.searcher.count_raw(s, e) } +        }) +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two(generic::Two<v128>); + +impl Two { +    /// Create a new searcher that finds occurrences of the needle bytes given. +    /// +    /// This particular searcher is specialized to use simd128 vector +    /// instructions that typically make it quite fast. +    /// +    /// If simd128 is unavailable in the current environment, then `None` is +    /// returned. +    #[inline] +    pub fn new(needle1: u8, needle2: u8) -> Option<Two> { +        if Two::is_available() { +            // SAFETY: we check that simd128 is available above. +            unsafe { Some(Two::new_unchecked(needle1, needle2)) } +        } else { +            None +        } +    } + +    /// Create a new finder specific to simd128 vectors and routines without +    /// checking that simd128 is available. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that it is safe to execute `simd128` +    /// instructions in the current environment. +    #[target_feature(enable = "simd128")] +    #[inline] +    pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { +        Two(generic::Two::new(needle1, needle2)) +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`Two::new`] will return +    /// a `Some` value. Similarly, when it is false, it is guaranteed that +    /// `Two::new` will return a `None` value. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(target_feature = "simd128")] +        { +            true +        } +        #[cfg(not(target_feature = "simd128"))] +        { +            false +        } +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < v128::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::fwd_byte_by_byte(start, end, |b| { +                b == self.0.needle1() || b == self.0.needle2() +            }); +        } +        // SAFETY: Building a `Two` means it's safe to call 'simd128' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        self.find_raw_impl(start, end) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < v128::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::rev_byte_by_byte(start, end, |b| { +                b == self.0.needle1() || b == self.0.needle2() +            }); +        } +        // SAFETY: Building a `Two` means it's safe to call 'simd128' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        self.rfind_raw_impl(start, end) +    } + +    /// Execute a search using simd128 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Two::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of a simd128 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Two`, which can only be constructed +    /// when it is safe to call `simd128` routines.) +    #[target_feature(enable = "simd128")] +    #[inline] +    unsafe fn find_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.find_raw(start, end) +    } + +    /// Execute a search using simd128 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Two::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of a simd128 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Two`, which can only be constructed +    /// when it is safe to call `simd128` routines.) +    #[target_feature(enable = "simd128")] +    #[inline] +    unsafe fn rfind_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.rfind_raw(start, end) +    } + +    /// Returns an iterator over all occurrences of the needle bytes in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { +        TwoIter { searcher: self, it: generic::Iter::new(haystack) } +    } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { +    searcher: &'a Two, +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three(generic::Three<v128>); + +impl Three { +    /// Create a new searcher that finds occurrences of the needle bytes given. +    /// +    /// This particular searcher is specialized to use simd128 vector +    /// instructions that typically make it quite fast. +    /// +    /// If simd128 is unavailable in the current environment, then `None` is +    /// returned. +    #[inline] +    pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> { +        if Three::is_available() { +            // SAFETY: we check that simd128 is available above. +            unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } +        } else { +            None +        } +    } + +    /// Create a new finder specific to simd128 vectors and routines without +    /// checking that simd128 is available. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that it is safe to execute `simd128` +    /// instructions in the current environment. +    #[target_feature(enable = "simd128")] +    #[inline] +    pub unsafe fn new_unchecked( +        needle1: u8, +        needle2: u8, +        needle3: u8, +    ) -> Three { +        Three(generic::Three::new(needle1, needle2, needle3)) +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`Three::new`] will return +    /// a `Some` value. Similarly, when it is false, it is guaranteed that +    /// `Three::new` will return a `None` value. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(target_feature = "simd128")] +        { +            true +        } +        #[cfg(not(target_feature = "simd128"))] +        { +            false +        } +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < v128::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::fwd_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +                    || b == self.0.needle2() +                    || b == self.0.needle3() +            }); +        } +        // SAFETY: Building a `Three` means it's safe to call 'simd128' +        // routines. Also, we've checked that our haystack is big enough to run +        // on the vector routine. Pointer validity is caller's responsibility. +        self.find_raw_impl(start, end) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < v128::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::rev_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +                    || b == self.0.needle2() +                    || b == self.0.needle3() +            }); +        } +        // SAFETY: Building a `Three` means it's safe to call 'simd128' +        // routines. Also, we've checked that our haystack is big enough to run +        // on the vector routine. Pointer validity is caller's responsibility. +        self.rfind_raw_impl(start, end) +    } + +    /// Execute a search using simd128 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Three::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of a simd128 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Three`, which can only be constructed +    /// when it is safe to call `simd128` routines.) +    #[target_feature(enable = "simd128")] +    #[inline] +    unsafe fn find_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.find_raw(start, end) +    } + +    /// Execute a search using simd128 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Three::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of a simd128 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Three`, which can only be constructed +    /// when it is safe to call `simd128` routines.) +    #[target_feature(enable = "simd128")] +    #[inline] +    unsafe fn rfind_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.rfind_raw(start, end) +    } + +    /// Returns an iterator over all occurrences of the needle byte in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { +        ThreeIter { searcher: self, it: generic::Iter::new(haystack) } +    } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { +    searcher: &'a Three, +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { +    use super::*; + +    define_memchr_quickcheck!(super); + +    #[test] +    fn forward_one() { +        crate::tests::memchr::Runner::new(1).forward_iter( +            |haystack, needles| { +                Some(One::new(needles[0])?.iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_one() { +        crate::tests::memchr::Runner::new(1).reverse_iter( +            |haystack, needles| { +                Some(One::new(needles[0])?.iter(haystack).rev().collect()) +            }, +        ) +    } + +    #[test] +    fn count_one() { +        crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { +            Some(One::new(needles[0])?.iter(haystack).count()) +        }) +    } + +    #[test] +    fn forward_two() { +        crate::tests::memchr::Runner::new(2).forward_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(Two::new(n1, n2)?.iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_two() { +        crate::tests::memchr::Runner::new(2).reverse_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) +            }, +        ) +    } + +    #[test] +    fn forward_three() { +        crate::tests::memchr::Runner::new(3).forward_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_three() { +        crate::tests::memchr::Runner::new(3).reverse_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) +            }, +        ) +    } +} diff --git a/vendor/memchr/src/arch/wasm32/simd128/mod.rs b/vendor/memchr/src/arch/wasm32/simd128/mod.rs new file mode 100644 index 0000000..b55d1f0 --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/simd128/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `wasm32` target using 128-bit vectors via simd128. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/wasm32/simd128/packedpair.rs b/vendor/memchr/src/arch/wasm32/simd128/packedpair.rs new file mode 100644 index 0000000..b629377 --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/simd128/packedpair.rs @@ -0,0 +1,229 @@ +/*! +A 128-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::wasm32::v128; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 128-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder(packedpair::Finder<v128>); + +impl Finder { +    /// Create a new pair searcher. The searcher returned can either report +    /// exact matches of `needle` or act as a prefilter and report candidate +    /// positions of `needle`. +    /// +    /// If simd128 is unavailable in the current environment or if a [`Pair`] +    /// could not be constructed from the needle given, then `None` is +    /// returned. +    #[inline] +    pub fn new(needle: &[u8]) -> Option<Finder> { +        Finder::with_pair(needle, Pair::new(needle)?) +    } + +    /// Create a new "packed pair" finder using the pair of bytes given. +    /// +    /// This constructor permits callers to control precisely which pair of +    /// bytes is used as a predicate. +    /// +    /// If simd128 is unavailable in the current environment, then `None` is +    /// returned. +    #[inline] +    pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> { +        if Finder::is_available() { +            // SAFETY: we check that simd128 is available above. We are also +            // guaranteed to have needle.len() > 1 because we have a valid +            // Pair. +            unsafe { Some(Finder::with_pair_impl(needle, pair)) } +        } else { +            None +        } +    } + +    /// Create a new `Finder` specific to simd128 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as the safety for `packedpair::Finder::new`, and callers must also +    /// ensure that simd128 is available. +    #[target_feature(enable = "simd128")] +    #[inline] +    unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { +        let finder = packedpair::Finder::<v128>::new(needle, pair); +        Finder(finder) +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`Finder::with_pair`] will +    /// return a `Some` value. Similarly, when it is false, it is guaranteed +    /// that `Finder::with_pair` will return a `None` value. Notice that this +    /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, +    /// even when `Finder::is_available` is true, it is not guaranteed that a +    /// valid [`Pair`] can be found from the needle given. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(target_feature = "simd128")] +        { +            true +        } +        #[cfg(not(target_feature = "simd128"))] +        { +            false +        } +    } + +    /// Execute a search using wasm32 v128 vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    #[inline] +    pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { +        self.find_impl(haystack, needle) +    } + +    /// Execute a search using wasm32 v128 vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    #[inline] +    pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> { +        self.find_prefilter_impl(haystack) +    } + +    /// Execute a search using wasm32 v128 vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    /// +    /// # Safety +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Finder`, which can only be constructed +    /// when it is safe to call `simd128` routines.) +    #[target_feature(enable = "simd128")] +    #[inline] +    fn find_impl(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { +        // SAFETY: The target feature safety obligation is automatically +        // fulfilled by virtue of being a method on `Finder`, which can only be +        // constructed when it is safe to call `simd128` routines. +        unsafe { self.0.find(haystack, needle) } +    } + +    /// Execute a prefilter search using wasm32 v128 vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    /// +    /// # Safety +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Finder`, which can only be constructed +    /// when it is safe to call `simd128` routines.) +    #[target_feature(enable = "simd128")] +    #[inline] +    fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: The target feature safety obligation is automatically +        // fulfilled by virtue of being a method on `Finder`, which can only be +        // constructed when it is safe to call `simd128` routines. +        unsafe { self.0.find_prefilter(haystack) } +    } + +    /// Returns the pair of offsets (into the needle) used to check as a +    /// predicate before confirming whether a needle exists at a particular +    /// position. +    #[inline] +    pub fn pair(&self) -> &Pair { +        self.0.pair() +    } + +    /// Returns the minimum haystack length that this `Finder` can search. +    /// +    /// Using a haystack with length smaller than this in a search will result +    /// in a panic. The reason for this restriction is that this finder is +    /// meant to be a low-level component that is part of a larger substring +    /// strategy. In that sense, it avoids trying to handle all cases and +    /// instead only handles the cases that it can handle very well. +    #[inline] +    pub fn min_haystack_len(&self) -> usize { +        self.0.min_haystack_len() +    } +} + +#[cfg(test)] +mod tests { +    use super::*; + +    fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> { +        let f = Finder::new(needle)?; +        if haystack.len() < f.min_haystack_len() { +            return None; +        } +        Some(f.find(haystack, needle)) +    } + +    define_substring_forward_quickcheck!(find); + +    #[test] +    fn forward_substring() { +        crate::tests::substring::Runner::new().fwd(find).run() +    } + +    #[test] +    fn forward_packedpair() { +        fn find( +            haystack: &[u8], +            needle: &[u8], +            index1: u8, +            index2: u8, +        ) -> Option<Option<usize>> { +            let pair = Pair::with_indices(needle, index1, index2)?; +            let f = Finder::with_pair(needle, pair)?; +            if haystack.len() < f.min_haystack_len() { +                return None; +            } +            Some(f.find(haystack, needle)) +        } +        crate::tests::packedpair::Runner::new().fwd(find).run() +    } + +    #[test] +    fn forward_packedpair_prefilter() { +        fn find( +            haystack: &[u8], +            needle: &[u8], +            index1: u8, +            index2: u8, +        ) -> Option<Option<usize>> { +            let pair = Pair::with_indices(needle, index1, index2)?; +            let f = Finder::with_pair(needle, pair)?; +            if haystack.len() < f.min_haystack_len() { +                return None; +            } +            Some(f.find_prefilter(haystack)) +        } +        crate::tests::packedpair::Runner::new().fwd(find).run() +    } +} diff --git a/vendor/memchr/src/arch/x86_64/avx2/memchr.rs b/vendor/memchr/src/arch/x86_64/avx2/memchr.rs new file mode 100644 index 0000000..59f8c7f --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/avx2/memchr.rs @@ -0,0 +1,1352 @@ +/*! +This module defines 256-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::x86_64::{__m128i, __m256i}; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One { +    /// Used for haystacks less than 32 bytes. +    sse2: generic::One<__m128i>, +    /// Used for haystacks bigger than 32 bytes. +    avx2: generic::One<__m256i>, +} + +impl One { +    /// Create a new searcher that finds occurrences of the needle byte given. +    /// +    /// This particular searcher is specialized to use AVX2 vector instructions +    /// that typically make it quite fast. (SSE2 is used for haystacks that +    /// are too short to accommodate an AVX2 vector.) +    /// +    /// If either SSE2 or AVX2 is unavailable in the current environment, then +    /// `None` is returned. +    #[inline] +    pub fn new(needle: u8) -> Option<One> { +        if One::is_available() { +            // SAFETY: we check that sse2 and avx2 are available above. +            unsafe { Some(One::new_unchecked(needle)) } +        } else { +            None +        } +    } + +    /// Create a new finder specific to AVX2 vectors and routines without +    /// checking that either SSE2 or AVX2 is available. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that it is safe to execute both `sse2` and +    /// `avx2` instructions in the current environment. +    /// +    /// Note that it is a common misconception that if one compiles for an +    /// `x86_64` target, then they therefore automatically have access to SSE2 +    /// instructions. While this is almost always the case, it isn't true in +    /// 100% of cases. +    #[target_feature(enable = "sse2", enable = "avx2")] +    #[inline] +    pub unsafe fn new_unchecked(needle: u8) -> One { +        One { +            sse2: generic::One::new(needle), +            avx2: generic::One::new(needle), +        } +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`One::new`] will return +    /// a `Some` value. Similarly, when it is false, it is guaranteed that +    /// `One::new` will return a `None` value. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(not(target_feature = "sse2"))] +        { +            false +        } +        #[cfg(target_feature = "sse2")] +        { +            #[cfg(target_feature = "avx2")] +            { +                true +            } +            #[cfg(not(target_feature = "avx2"))] +            { +                #[cfg(feature = "std")] +                { +                    std::is_x86_feature_detected!("avx2") +                } +                #[cfg(not(feature = "std"))] +                { +                    false +                } +            } +        } +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Counts all occurrences of this byte in the given haystack. +    #[inline] +    pub fn count(&self, haystack: &[u8]) -> usize { +        // SAFETY: All of our pointers are derived directly from a borrowed +        // slice, which is guaranteed to be valid. +        unsafe { +            let start = haystack.as_ptr(); +            let end = start.add(haystack.len()); +            self.count_raw(start, end) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        let len = end.distance(start); +        if len < __m256i::BYTES { +            return if len < __m128i::BYTES { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                generic::fwd_byte_by_byte(start, end, |b| { +                    b == self.sse2.needle1() +                }) +            } else { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                self.find_raw_sse2(start, end) +            }; +        } +        // SAFETY: Building a `One` means it's safe to call both 'sse2' and +        // 'avx2' routines. Also, we've checked that our haystack is big +        // enough to run on the vector routine. Pointer validity is caller's +        // responsibility. +        // +        // Note that we could call `self.avx2.find_raw` directly here. But that +        // means we'd have to annotate this routine with `target_feature`. +        // Which is fine, because this routine is `unsafe` anyway and the +        // `target_feature` obligation is met by virtue of building a `One`. +        // The real problem is that a routine with a `target_feature` +        // annotation generally can't be inlined into caller code unless +        // the caller code has the same target feature annotations. Namely, +        // the common case (at time of writing) is for calling code to not +        // have the `avx2` target feature enabled *at compile time*. Without +        // `target_feature` on this routine, it can be inlined which will +        // handle some of the short-haystack cases above without touching the +        // architecture specific code. +        self.find_raw_avx2(start, end) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        let len = end.distance(start); +        if len < __m256i::BYTES { +            return if len < __m128i::BYTES { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                generic::rev_byte_by_byte(start, end, |b| { +                    b == self.sse2.needle1() +                }) +            } else { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                self.rfind_raw_sse2(start, end) +            }; +        } +        // SAFETY: Building a `One` means it's safe to call both 'sse2' and +        // 'avx2' routines. Also, we've checked that our haystack is big +        // enough to run on the vector routine. Pointer validity is caller's +        // responsibility. +        // +        // See note in forward routine above for why we don't just call +        // `self.avx2.rfind_raw` directly here. +        self.rfind_raw_avx2(start, end) +    } + +    /// Counts all occurrences of this byte in the given haystack represented +    /// by raw pointers. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `0` will always be returned. +    #[inline] +    pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { +        if start >= end { +            return 0; +        } +        let len = end.distance(start); +        if len < __m256i::BYTES { +            return if len < __m128i::BYTES { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                generic::count_byte_by_byte(start, end, |b| { +                    b == self.sse2.needle1() +                }) +            } else { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                self.count_raw_sse2(start, end) +            }; +        } +        // SAFETY: Building a `One` means it's safe to call both 'sse2' and +        // 'avx2' routines. Also, we've checked that our haystack is big +        // enough to run on the vector routine. Pointer validity is caller's +        // responsibility. +        self.count_raw_avx2(start, end) +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn find_raw_sse2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.sse2.find_raw(start, end) +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn rfind_raw_sse2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.sse2.rfind_raw(start, end) +    } + +    /// Execute a count using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::count_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn count_raw_sse2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> usize { +        self.sse2.count_raw(start, end) +    } + +    /// Execute a search using AVX2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of an AVX2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "avx2")] +    #[inline] +    unsafe fn find_raw_avx2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.avx2.find_raw(start, end) +    } + +    /// Execute a search using AVX2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of an AVX2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "avx2")] +    #[inline] +    unsafe fn rfind_raw_avx2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.avx2.rfind_raw(start, end) +    } + +    /// Execute a count using AVX2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::count_raw`], except the distance between `start` and +    /// `end` must be at least the size of an AVX2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "avx2")] +    #[inline] +    unsafe fn count_raw_avx2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> usize { +        self.avx2.count_raw(start, end) +    } + +    /// Returns an iterator over all occurrences of the needle byte in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { +        OneIter { searcher: self, it: generic::Iter::new(haystack) } +    } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { +    searcher: &'a One, +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn count(self) -> usize { +        self.it.count(|s, e| { +            // SAFETY: We rely on our generic iterator to return valid start +            // and end pointers. +            unsafe { self.searcher.count_raw(s, e) } +        }) +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two { +    /// Used for haystacks less than 32 bytes. +    sse2: generic::Two<__m128i>, +    /// Used for haystacks bigger than 32 bytes. +    avx2: generic::Two<__m256i>, +} + +impl Two { +    /// Create a new searcher that finds occurrences of the needle bytes given. +    /// +    /// This particular searcher is specialized to use AVX2 vector instructions +    /// that typically make it quite fast. (SSE2 is used for haystacks that +    /// are too short to accommodate an AVX2 vector.) +    /// +    /// If either SSE2 or AVX2 is unavailable in the current environment, then +    /// `None` is returned. +    #[inline] +    pub fn new(needle1: u8, needle2: u8) -> Option<Two> { +        if Two::is_available() { +            // SAFETY: we check that sse2 and avx2 are available above. +            unsafe { Some(Two::new_unchecked(needle1, needle2)) } +        } else { +            None +        } +    } + +    /// Create a new finder specific to AVX2 vectors and routines without +    /// checking that either SSE2 or AVX2 is available. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that it is safe to execute both `sse2` and +    /// `avx2` instructions in the current environment. +    /// +    /// Note that it is a common misconception that if one compiles for an +    /// `x86_64` target, then they therefore automatically have access to SSE2 +    /// instructions. While this is almost always the case, it isn't true in +    /// 100% of cases. +    #[target_feature(enable = "sse2", enable = "avx2")] +    #[inline] +    pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { +        Two { +            sse2: generic::Two::new(needle1, needle2), +            avx2: generic::Two::new(needle1, needle2), +        } +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`Two::new`] will return +    /// a `Some` value. Similarly, when it is false, it is guaranteed that +    /// `Two::new` will return a `None` value. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(not(target_feature = "sse2"))] +        { +            false +        } +        #[cfg(target_feature = "sse2")] +        { +            #[cfg(target_feature = "avx2")] +            { +                true +            } +            #[cfg(not(target_feature = "avx2"))] +            { +                #[cfg(feature = "std")] +                { +                    std::is_x86_feature_detected!("avx2") +                } +                #[cfg(not(feature = "std"))] +                { +                    false +                } +            } +        } +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        let len = end.distance(start); +        if len < __m256i::BYTES { +            return if len < __m128i::BYTES { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                generic::fwd_byte_by_byte(start, end, |b| { +                    b == self.sse2.needle1() || b == self.sse2.needle2() +                }) +            } else { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                self.find_raw_sse2(start, end) +            }; +        } +        // SAFETY: Building a `Two` means it's safe to call both 'sse2' and +        // 'avx2' routines. Also, we've checked that our haystack is big +        // enough to run on the vector routine. Pointer validity is caller's +        // responsibility. +        // +        // Note that we could call `self.avx2.find_raw` directly here. But that +        // means we'd have to annotate this routine with `target_feature`. +        // Which is fine, because this routine is `unsafe` anyway and the +        // `target_feature` obligation is met by virtue of building a `Two`. +        // The real problem is that a routine with a `target_feature` +        // annotation generally can't be inlined into caller code unless +        // the caller code has the same target feature annotations. Namely, +        // the common case (at time of writing) is for calling code to not +        // have the `avx2` target feature enabled *at compile time*. Without +        // `target_feature` on this routine, it can be inlined which will +        // handle some of the short-haystack cases above without touching the +        // architecture specific code. +        self.find_raw_avx2(start, end) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        let len = end.distance(start); +        if len < __m256i::BYTES { +            return if len < __m128i::BYTES { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                generic::rev_byte_by_byte(start, end, |b| { +                    b == self.sse2.needle1() || b == self.sse2.needle2() +                }) +            } else { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                self.rfind_raw_sse2(start, end) +            }; +        } +        // SAFETY: Building a `Two` means it's safe to call both 'sse2' and +        // 'avx2' routines. Also, we've checked that our haystack is big +        // enough to run on the vector routine. Pointer validity is caller's +        // responsibility. +        // +        // See note in forward routine above for why we don't just call +        // `self.avx2.rfind_raw` directly here. +        self.rfind_raw_avx2(start, end) +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Two::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Two`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn find_raw_sse2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.sse2.find_raw(start, end) +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Two::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Two`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn rfind_raw_sse2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.sse2.rfind_raw(start, end) +    } + +    /// Execute a search using AVX2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Two::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of an AVX2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Two`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "avx2")] +    #[inline] +    unsafe fn find_raw_avx2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.avx2.find_raw(start, end) +    } + +    /// Execute a search using AVX2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Two::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of an AVX2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Two`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "avx2")] +    #[inline] +    unsafe fn rfind_raw_avx2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.avx2.rfind_raw(start, end) +    } + +    /// Returns an iterator over all occurrences of the needle bytes in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { +        TwoIter { searcher: self, it: generic::Iter::new(haystack) } +    } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { +    searcher: &'a Two, +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three { +    /// Used for haystacks less than 32 bytes. +    sse2: generic::Three<__m128i>, +    /// Used for haystacks bigger than 32 bytes. +    avx2: generic::Three<__m256i>, +} + +impl Three { +    /// Create a new searcher that finds occurrences of the needle bytes given. +    /// +    /// This particular searcher is specialized to use AVX2 vector instructions +    /// that typically make it quite fast. (SSE2 is used for haystacks that +    /// are too short to accommodate an AVX2 vector.) +    /// +    /// If either SSE2 or AVX2 is unavailable in the current environment, then +    /// `None` is returned. +    #[inline] +    pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> { +        if Three::is_available() { +            // SAFETY: we check that sse2 and avx2 are available above. +            unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } +        } else { +            None +        } +    } + +    /// Create a new finder specific to AVX2 vectors and routines without +    /// checking that either SSE2 or AVX2 is available. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that it is safe to execute both `sse2` and +    /// `avx2` instructions in the current environment. +    /// +    /// Note that it is a common misconception that if one compiles for an +    /// `x86_64` target, then they therefore automatically have access to SSE2 +    /// instructions. While this is almost always the case, it isn't true in +    /// 100% of cases. +    #[target_feature(enable = "sse2", enable = "avx2")] +    #[inline] +    pub unsafe fn new_unchecked( +        needle1: u8, +        needle2: u8, +        needle3: u8, +    ) -> Three { +        Three { +            sse2: generic::Three::new(needle1, needle2, needle3), +            avx2: generic::Three::new(needle1, needle2, needle3), +        } +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`Three::new`] will return +    /// a `Some` value. Similarly, when it is false, it is guaranteed that +    /// `Three::new` will return a `None` value. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(not(target_feature = "sse2"))] +        { +            false +        } +        #[cfg(target_feature = "sse2")] +        { +            #[cfg(target_feature = "avx2")] +            { +                true +            } +            #[cfg(not(target_feature = "avx2"))] +            { +                #[cfg(feature = "std")] +                { +                    std::is_x86_feature_detected!("avx2") +                } +                #[cfg(not(feature = "std"))] +                { +                    false +                } +            } +        } +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        let len = end.distance(start); +        if len < __m256i::BYTES { +            return if len < __m128i::BYTES { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                generic::fwd_byte_by_byte(start, end, |b| { +                    b == self.sse2.needle1() +                        || b == self.sse2.needle2() +                        || b == self.sse2.needle3() +                }) +            } else { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                self.find_raw_sse2(start, end) +            }; +        } +        // SAFETY: Building a `Three` means it's safe to call both 'sse2' and +        // 'avx2' routines. Also, we've checked that our haystack is big +        // enough to run on the vector routine. Pointer validity is caller's +        // responsibility. +        // +        // Note that we could call `self.avx2.find_raw` directly here. But that +        // means we'd have to annotate this routine with `target_feature`. +        // Which is fine, because this routine is `unsafe` anyway and the +        // `target_feature` obligation is met by virtue of building a `Three`. +        // The real problem is that a routine with a `target_feature` +        // annotation generally can't be inlined into caller code unless +        // the caller code has the same target feature annotations. Namely, +        // the common case (at time of writing) is for calling code to not +        // have the `avx2` target feature enabled *at compile time*. Without +        // `target_feature` on this routine, it can be inlined which will +        // handle some of the short-haystack cases above without touching the +        // architecture specific code. +        self.find_raw_avx2(start, end) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        let len = end.distance(start); +        if len < __m256i::BYTES { +            return if len < __m128i::BYTES { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                generic::rev_byte_by_byte(start, end, |b| { +                    b == self.sse2.needle1() +                        || b == self.sse2.needle2() +                        || b == self.sse2.needle3() +                }) +            } else { +                // SAFETY: We require the caller to pass valid start/end +                // pointers. +                self.rfind_raw_sse2(start, end) +            }; +        } +        // SAFETY: Building a `Three` means it's safe to call both 'sse2' and +        // 'avx2' routines. Also, we've checked that our haystack is big +        // enough to run on the vector routine. Pointer validity is caller's +        // responsibility. +        // +        // See note in forward routine above for why we don't just call +        // `self.avx2.rfind_raw` directly here. +        self.rfind_raw_avx2(start, end) +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Three::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Three`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn find_raw_sse2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.sse2.find_raw(start, end) +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Three::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Three`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn rfind_raw_sse2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.sse2.rfind_raw(start, end) +    } + +    /// Execute a search using AVX2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Three::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of an AVX2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Three`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "avx2")] +    #[inline] +    unsafe fn find_raw_avx2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.avx2.find_raw(start, end) +    } + +    /// Execute a search using AVX2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Three::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of an AVX2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Three`, which can only be constructed +    /// when it is safe to call `sse2`/`avx2` routines.) +    #[target_feature(enable = "avx2")] +    #[inline] +    unsafe fn rfind_raw_avx2( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.avx2.rfind_raw(start, end) +    } + +    /// Returns an iterator over all occurrences of the needle bytes in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { +        ThreeIter { searcher: self, it: generic::Iter::new(haystack) } +    } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { +    searcher: &'a Three, +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { +    use super::*; + +    define_memchr_quickcheck!(super); + +    #[test] +    fn forward_one() { +        crate::tests::memchr::Runner::new(1).forward_iter( +            |haystack, needles| { +                Some(One::new(needles[0])?.iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_one() { +        crate::tests::memchr::Runner::new(1).reverse_iter( +            |haystack, needles| { +                Some(One::new(needles[0])?.iter(haystack).rev().collect()) +            }, +        ) +    } + +    #[test] +    fn count_one() { +        crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { +            Some(One::new(needles[0])?.iter(haystack).count()) +        }) +    } + +    #[test] +    fn forward_two() { +        crate::tests::memchr::Runner::new(2).forward_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(Two::new(n1, n2)?.iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_two() { +        crate::tests::memchr::Runner::new(2).reverse_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) +            }, +        ) +    } + +    #[test] +    fn forward_three() { +        crate::tests::memchr::Runner::new(3).forward_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_three() { +        crate::tests::memchr::Runner::new(3).reverse_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) +            }, +        ) +    } +} diff --git a/vendor/memchr/src/arch/x86_64/avx2/mod.rs b/vendor/memchr/src/arch/x86_64/avx2/mod.rs new file mode 100644 index 0000000..ee4097d --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/avx2/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `x86_64` target using 256-bit vectors via AVX2. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs b/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs new file mode 100644 index 0000000..efae7b6 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs @@ -0,0 +1,272 @@ +/*! +A 256-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::x86_64::{__m128i, __m256i}; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 256-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder { +    sse2: packedpair::Finder<__m128i>, +    avx2: packedpair::Finder<__m256i>, +} + +impl Finder { +    /// Create a new pair searcher. The searcher returned can either report +    /// exact matches of `needle` or act as a prefilter and report candidate +    /// positions of `needle`. +    /// +    /// If AVX2 is unavailable in the current environment or if a [`Pair`] +    /// could not be constructed from the needle given, then `None` is +    /// returned. +    #[inline] +    pub fn new(needle: &[u8]) -> Option<Finder> { +        Finder::with_pair(needle, Pair::new(needle)?) +    } + +    /// Create a new "packed pair" finder using the pair of bytes given. +    /// +    /// This constructor permits callers to control precisely which pair of +    /// bytes is used as a predicate. +    /// +    /// If AVX2 is unavailable in the current environment, then `None` is +    /// returned. +    #[inline] +    pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> { +        if Finder::is_available() { +            // SAFETY: we check that sse2/avx2 is available above. We are also +            // guaranteed to have needle.len() > 1 because we have a valid +            // Pair. +            unsafe { Some(Finder::with_pair_impl(needle, pair)) } +        } else { +            None +        } +    } + +    /// Create a new `Finder` specific to SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as the safety for `packedpair::Finder::new`, and callers must also +    /// ensure that both SSE2 and AVX2 are available. +    #[target_feature(enable = "sse2", enable = "avx2")] +    #[inline] +    unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { +        let sse2 = packedpair::Finder::<__m128i>::new(needle, pair); +        let avx2 = packedpair::Finder::<__m256i>::new(needle, pair); +        Finder { sse2, avx2 } +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`Finder::with_pair`] will +    /// return a `Some` value. Similarly, when it is false, it is guaranteed +    /// that `Finder::with_pair` will return a `None` value. Notice that this +    /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, +    /// even when `Finder::is_available` is true, it is not guaranteed that a +    /// valid [`Pair`] can be found from the needle given. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(not(target_feature = "sse2"))] +        { +            false +        } +        #[cfg(target_feature = "sse2")] +        { +            #[cfg(target_feature = "avx2")] +            { +                true +            } +            #[cfg(not(target_feature = "avx2"))] +            { +                #[cfg(feature = "std")] +                { +                    std::is_x86_feature_detected!("avx2") +                } +                #[cfg(not(feature = "std"))] +                { +                    false +                } +            } +        } +    } + +    /// Execute a search using AVX2 vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    #[inline] +    pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { +        // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. +        unsafe { self.find_impl(haystack, needle) } +    } + +    /// Run this finder on the given haystack as a prefilter. +    /// +    /// If a candidate match is found, then an offset where the needle *could* +    /// begin in the haystack is returned. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    #[inline] +    pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. +        unsafe { self.find_prefilter_impl(haystack) } +    } + +    /// Execute a search using AVX2 vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    /// +    /// # Safety +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Finder`, which can only be constructed +    /// when it is safe to call `sse2` and `avx2` routines.) +    #[target_feature(enable = "sse2", enable = "avx2")] +    #[inline] +    unsafe fn find_impl( +        &self, +        haystack: &[u8], +        needle: &[u8], +    ) -> Option<usize> { +        if haystack.len() < self.avx2.min_haystack_len() { +            self.sse2.find(haystack, needle) +        } else { +            self.avx2.find(haystack, needle) +        } +    } + +    /// Execute a prefilter search using AVX2 vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    /// +    /// # Safety +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Finder`, which can only be constructed +    /// when it is safe to call `sse2` and `avx2` routines.) +    #[target_feature(enable = "sse2", enable = "avx2")] +    #[inline] +    unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> { +        if haystack.len() < self.avx2.min_haystack_len() { +            self.sse2.find_prefilter(haystack) +        } else { +            self.avx2.find_prefilter(haystack) +        } +    } + +    /// Returns the pair of offsets (into the needle) used to check as a +    /// predicate before confirming whether a needle exists at a particular +    /// position. +    #[inline] +    pub fn pair(&self) -> &Pair { +        self.avx2.pair() +    } + +    /// Returns the minimum haystack length that this `Finder` can search. +    /// +    /// Using a haystack with length smaller than this in a search will result +    /// in a panic. The reason for this restriction is that this finder is +    /// meant to be a low-level component that is part of a larger substring +    /// strategy. In that sense, it avoids trying to handle all cases and +    /// instead only handles the cases that it can handle very well. +    #[inline] +    pub fn min_haystack_len(&self) -> usize { +        // The caller doesn't need to care about AVX2's min_haystack_len +        // since this implementation will automatically switch to the SSE2 +        // implementation if the haystack is too short for AVX2. Therefore, the +        // caller only needs to care about SSE2's min_haystack_len. +        // +        // This does assume that SSE2's min_haystack_len is less than or +        // equal to AVX2's min_haystack_len. In practice, this is true and +        // there is no way it could be false based on how this Finder is +        // implemented. Namely, both SSE2 and AVX2 use the same `Pair`. If +        // they used different pairs, then it's possible (although perhaps +        // pathological) for SSE2's min_haystack_len to be bigger than AVX2's. +        self.sse2.min_haystack_len() +    } +} + +#[cfg(test)] +mod tests { +    use super::*; + +    fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> { +        let f = Finder::new(needle)?; +        if haystack.len() < f.min_haystack_len() { +            return None; +        } +        Some(f.find(haystack, needle)) +    } + +    define_substring_forward_quickcheck!(find); + +    #[test] +    fn forward_substring() { +        crate::tests::substring::Runner::new().fwd(find).run() +    } + +    #[test] +    fn forward_packedpair() { +        fn find( +            haystack: &[u8], +            needle: &[u8], +            index1: u8, +            index2: u8, +        ) -> Option<Option<usize>> { +            let pair = Pair::with_indices(needle, index1, index2)?; +            let f = Finder::with_pair(needle, pair)?; +            if haystack.len() < f.min_haystack_len() { +                return None; +            } +            Some(f.find(haystack, needle)) +        } +        crate::tests::packedpair::Runner::new().fwd(find).run() +    } + +    #[test] +    fn forward_packedpair_prefilter() { +        fn find( +            haystack: &[u8], +            needle: &[u8], +            index1: u8, +            index2: u8, +        ) -> Option<Option<usize>> { +            if !cfg!(target_feature = "sse2") { +                return None; +            } +            let pair = Pair::with_indices(needle, index1, index2)?; +            let f = Finder::with_pair(needle, pair)?; +            if haystack.len() < f.min_haystack_len() { +                return None; +            } +            Some(f.find_prefilter(haystack)) +        } +        crate::tests::packedpair::Runner::new().fwd(find).run() +    } +} diff --git a/vendor/memchr/src/arch/x86_64/memchr.rs b/vendor/memchr/src/arch/x86_64/memchr.rs new file mode 100644 index 0000000..fcb1399 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/memchr.rs @@ -0,0 +1,335 @@ +/*! +Wrapper routines for `memchr` and friends. + +These routines efficiently dispatch to the best implementation based on what +the CPU supports. +*/ + +/// Provides a way to run a memchr-like function while amortizing the cost of +/// runtime CPU feature detection. +/// +/// This works by loading a function pointer from an atomic global. Initially, +/// this global is set to a function that does CPU feature detection. For +/// example, if AVX2 is enabled, then the AVX2 implementation is used. +/// Otherwise, at least on x86_64, the SSE2 implementation is used. (And +/// in some niche cases, if SSE2 isn't available, then the architecture +/// independent fallback implementation is used.) +/// +/// After the first call to this function, the atomic global is replaced with +/// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then +/// will directly call the chosen routine instead of needing to go through the +/// CPU feature detection branching again. +/// +/// This particular macro is specifically written to provide the implementation +/// of functions with the following signature: +/// +/// ```ignore +/// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option<usize>; +/// ``` +/// +/// Where you can also have `memchr2` and `memchr3`, but with `needle2` and +/// `needle3`, respectively. The `start` and `end` parameters correspond to the +/// start and end of the haystack, respectively. +/// +/// We use raw pointers here instead of the more obvious `haystack: &[u8]` so +/// that the function is compatible with our lower level iterator logic that +/// operates on raw pointers. We use this macro to implement "raw" memchr +/// routines with the signature above, and then define memchr routines using +/// regular slices on top of them. +/// +/// Note that we use `#[cfg(target_feature = "sse2")]` below even though +/// it shouldn't be strictly necessary because without it, it seems to +/// cause the compiler to blow up. I guess it can't handle a function +/// pointer being created with a sse target feature? Dunno. See the +/// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with +/// this. +/// +/// # Safety +/// +/// Primarily callers must that `$fnty` is a correct function pointer type and +/// not something else. +/// +/// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a +/// routine that returns a valid function pointer when a match is found. That +/// is, a pointer that is `>= start` and `< end`. +/// +/// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers +/// correspond to valid pointers. +macro_rules! unsafe_ifunc { +    ( +        $memchrty:ident, +        $memchrfind:ident, +        $fnty:ty, +        $retty:ty, +        $hay_start:ident, +        $hay_end:ident, +        $($needle:ident),+ +    ) => {{ +        #![allow(unused_unsafe)] + +        use core::sync::atomic::{AtomicPtr, Ordering}; + +        type Fn = *mut (); +        type RealFn = $fnty; +        static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn); + +        #[cfg(target_feature = "sse2")] +        #[target_feature(enable = "sse2", enable = "avx2")] +        unsafe fn find_avx2( +            $($needle: u8),+, +            $hay_start: *const u8, +            $hay_end: *const u8, +        ) -> $retty { +            use crate::arch::x86_64::avx2::memchr::$memchrty; +            $memchrty::new_unchecked($($needle),+) +                .$memchrfind($hay_start, $hay_end) +        } + +        #[cfg(target_feature = "sse2")] +        #[target_feature(enable = "sse2")] +        unsafe fn find_sse2( +            $($needle: u8),+, +            $hay_start: *const u8, +            $hay_end: *const u8, +        ) -> $retty { +            use crate::arch::x86_64::sse2::memchr::$memchrty; +            $memchrty::new_unchecked($($needle),+) +                .$memchrfind($hay_start, $hay_end) +        } + +        unsafe fn find_fallback( +            $($needle: u8),+, +            $hay_start: *const u8, +            $hay_end: *const u8, +        ) -> $retty { +            use crate::arch::all::memchr::$memchrty; +            $memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end) +        } + +        unsafe fn detect( +            $($needle: u8),+, +            $hay_start: *const u8, +            $hay_end: *const u8, +        ) -> $retty { +            let fun = { +                #[cfg(not(target_feature = "sse2"))] +                { +                    debug!( +                        "no sse2 feature available, using fallback for {}", +                        stringify!($memchrty), +                    ); +                    find_fallback as RealFn +                } +                #[cfg(target_feature = "sse2")] +                { +                    use crate::arch::x86_64::{sse2, avx2}; +                    if avx2::memchr::$memchrty::is_available() { +                        debug!("chose AVX2 for {}", stringify!($memchrty)); +                        find_avx2 as RealFn +                    } else if sse2::memchr::$memchrty::is_available() { +                        debug!("chose SSE2 for {}", stringify!($memchrty)); +                        find_sse2 as RealFn +                    } else { +                        debug!("chose fallback for {}", stringify!($memchrty)); +                        find_fallback as RealFn +                    } +                } +            }; +            FN.store(fun as Fn, Ordering::Relaxed); +            // SAFETY: The only thing we need to uphold here is the +            // `#[target_feature]` requirements. Since we check is_available +            // above before using the corresponding implementation, we are +            // guaranteed to only call code that is supported on the current +            // CPU. +            fun($($needle),+, $hay_start, $hay_end) +        } + +        // SAFETY: By virtue of the caller contract, RealFn is a function +        // pointer, which is always safe to transmute with a *mut (). Also, +        // since we use $memchrty::is_available, it is guaranteed to be safe +        // to call $memchrty::$memchrfind. +        unsafe { +            let fun = FN.load(Ordering::Relaxed); +            core::mem::transmute::<Fn, RealFn>(fun)( +                $($needle),+, +                $hay_start, +                $hay_end, +            ) +        } +    }}; +} + +// The routines below dispatch to AVX2, SSE2 or a fallback routine based on +// what's available in the current environment. The secret sauce here is that +// we only check for which one to use approximately once, and then "cache" that +// choice into a global function pointer. Subsequent invocations then just call +// the appropriate function directly. + +/// memchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::find_raw`. +#[inline(always)] +pub(crate) fn memchr_raw( +    n1: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    // SAFETY: We provide a valid function pointer type. +    unsafe_ifunc!( +        One, +        find_raw, +        unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, +        Option<*const u8>, +        start, +        end, +        n1 +    ) +} + +/// memrchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::rfind_raw`. +#[inline(always)] +pub(crate) fn memrchr_raw( +    n1: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    // SAFETY: We provide a valid function pointer type. +    unsafe_ifunc!( +        One, +        rfind_raw, +        unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, +        Option<*const u8>, +        start, +        end, +        n1 +    ) +} + +/// memchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::find_raw`. +#[inline(always)] +pub(crate) fn memchr2_raw( +    n1: u8, +    n2: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    // SAFETY: We provide a valid function pointer type. +    unsafe_ifunc!( +        Two, +        find_raw, +        unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, +        Option<*const u8>, +        start, +        end, +        n1, +        n2 +    ) +} + +/// memrchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::rfind_raw`. +#[inline(always)] +pub(crate) fn memrchr2_raw( +    n1: u8, +    n2: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    // SAFETY: We provide a valid function pointer type. +    unsafe_ifunc!( +        Two, +        rfind_raw, +        unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, +        Option<*const u8>, +        start, +        end, +        n1, +        n2 +    ) +} + +/// memchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::find_raw`. +#[inline(always)] +pub(crate) fn memchr3_raw( +    n1: u8, +    n2: u8, +    n3: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    // SAFETY: We provide a valid function pointer type. +    unsafe_ifunc!( +        Three, +        find_raw, +        unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, +        Option<*const u8>, +        start, +        end, +        n1, +        n2, +        n3 +    ) +} + +/// memrchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::rfind_raw`. +#[inline(always)] +pub(crate) fn memrchr3_raw( +    n1: u8, +    n2: u8, +    n3: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    // SAFETY: We provide a valid function pointer type. +    unsafe_ifunc!( +        Three, +        rfind_raw, +        unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, +        Option<*const u8>, +        start, +        end, +        n1, +        n2, +        n3 +    ) +} + +/// Count all matching bytes, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::count_raw`. +#[inline(always)] +pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize { +    // SAFETY: We provide a valid function pointer type. +    unsafe_ifunc!( +        One, +        count_raw, +        unsafe fn(u8, *const u8, *const u8) -> usize, +        usize, +        start, +        end, +        n1 +    ) +} diff --git a/vendor/memchr/src/arch/x86_64/mod.rs b/vendor/memchr/src/arch/x86_64/mod.rs new file mode 100644 index 0000000..5dad721 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/mod.rs @@ -0,0 +1,8 @@ +/*! +Vector algorithms for the `x86_64` target. +*/ + +pub mod avx2; +pub mod sse2; + +pub(crate) mod memchr; diff --git a/vendor/memchr/src/arch/x86_64/sse2/memchr.rs b/vendor/memchr/src/arch/x86_64/sse2/memchr.rs new file mode 100644 index 0000000..c6f75df --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/sse2/memchr.rs @@ -0,0 +1,1077 @@ +/*! +This module defines 128-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::x86_64::__m128i; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One(generic::One<__m128i>); + +impl One { +    /// Create a new searcher that finds occurrences of the needle byte given. +    /// +    /// This particular searcher is specialized to use SSE2 vector instructions +    /// that typically make it quite fast. +    /// +    /// If SSE2 is unavailable in the current environment, then `None` is +    /// returned. +    #[inline] +    pub fn new(needle: u8) -> Option<One> { +        if One::is_available() { +            // SAFETY: we check that sse2 is available above. +            unsafe { Some(One::new_unchecked(needle)) } +        } else { +            None +        } +    } + +    /// Create a new finder specific to SSE2 vectors and routines without +    /// checking that SSE2 is available. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that it is safe to execute `sse2` instructions +    /// in the current environment. +    /// +    /// Note that it is a common misconception that if one compiles for an +    /// `x86_64` target, then they therefore automatically have access to SSE2 +    /// instructions. While this is almost always the case, it isn't true in +    /// 100% of cases. +    #[target_feature(enable = "sse2")] +    #[inline] +    pub unsafe fn new_unchecked(needle: u8) -> One { +        One(generic::One::new(needle)) +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`One::new`] will return +    /// a `Some` value. Similarly, when it is false, it is guaranteed that +    /// `One::new` will return a `None` value. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(target_feature = "sse2")] +        { +            true +        } +        #[cfg(not(target_feature = "sse2"))] +        { +            false +        } +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Counts all occurrences of this byte in the given haystack. +    #[inline] +    pub fn count(&self, haystack: &[u8]) -> usize { +        // SAFETY: All of our pointers are derived directly from a borrowed +        // slice, which is guaranteed to be valid. +        unsafe { +            let start = haystack.as_ptr(); +            let end = start.add(haystack.len()); +            self.count_raw(start, end) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < __m128i::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::fwd_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +            }); +        } +        // SAFETY: Building a `One` means it's safe to call 'sse2' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        // +        // Note that we could call `self.0.find_raw` directly here. But that +        // means we'd have to annotate this routine with `target_feature`. +        // Which is fine, because this routine is `unsafe` anyway and the +        // `target_feature` obligation is met by virtue of building a `One`. +        // The real problem is that a routine with a `target_feature` +        // annotation generally can't be inlined into caller code unless the +        // caller code has the same target feature annotations. Which is maybe +        // okay for SSE2, but we do the same thing for AVX2 where caller code +        // probably usually doesn't have AVX2 enabled. That means that this +        // routine can be inlined which will handle some of the short-haystack +        // cases above without touching the architecture specific code. +        self.find_raw_impl(start, end) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < __m128i::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::rev_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +            }); +        } +        // SAFETY: Building a `One` means it's safe to call 'sse2' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        // +        // See note in forward routine above for why we don't just call +        // `self.0.rfind_raw` directly here. +        self.rfind_raw_impl(start, end) +    } + +    /// Counts all occurrences of this byte in the given haystack represented +    /// by raw pointers. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `0` will always be returned. +    #[inline] +    pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { +        if start >= end { +            return 0; +        } +        if end.distance(start) < __m128i::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::count_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +            }); +        } +        // SAFETY: Building a `One` means it's safe to call 'sse2' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        self.count_raw_impl(start, end) +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `sse2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn find_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.find_raw(start, end) +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `sse2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn rfind_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.rfind_raw(start, end) +    } + +    /// Execute a count using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`One::count_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `One`, which can only be constructed +    /// when it is safe to call `sse2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn count_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> usize { +        self.0.count_raw(start, end) +    } + +    /// Returns an iterator over all occurrences of the needle byte in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { +        OneIter { searcher: self, it: generic::Iter::new(haystack) } +    } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { +    searcher: &'a One, +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn count(self) -> usize { +        self.it.count(|s, e| { +            // SAFETY: We rely on our generic iterator to return valid start +            // and end pointers. +            unsafe { self.searcher.count_raw(s, e) } +        }) +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two(generic::Two<__m128i>); + +impl Two { +    /// Create a new searcher that finds occurrences of the needle bytes given. +    /// +    /// This particular searcher is specialized to use SSE2 vector instructions +    /// that typically make it quite fast. +    /// +    /// If SSE2 is unavailable in the current environment, then `None` is +    /// returned. +    #[inline] +    pub fn new(needle1: u8, needle2: u8) -> Option<Two> { +        if Two::is_available() { +            // SAFETY: we check that sse2 is available above. +            unsafe { Some(Two::new_unchecked(needle1, needle2)) } +        } else { +            None +        } +    } + +    /// Create a new finder specific to SSE2 vectors and routines without +    /// checking that SSE2 is available. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that it is safe to execute `sse2` instructions +    /// in the current environment. +    /// +    /// Note that it is a common misconception that if one compiles for an +    /// `x86_64` target, then they therefore automatically have access to SSE2 +    /// instructions. While this is almost always the case, it isn't true in +    /// 100% of cases. +    #[target_feature(enable = "sse2")] +    #[inline] +    pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { +        Two(generic::Two::new(needle1, needle2)) +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`Two::new`] will return +    /// a `Some` value. Similarly, when it is false, it is guaranteed that +    /// `Two::new` will return a `None` value. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(target_feature = "sse2")] +        { +            true +        } +        #[cfg(not(target_feature = "sse2"))] +        { +            false +        } +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < __m128i::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::fwd_byte_by_byte(start, end, |b| { +                b == self.0.needle1() || b == self.0.needle2() +            }); +        } +        // SAFETY: Building a `Two` means it's safe to call 'sse2' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        // +        // Note that we could call `self.0.find_raw` directly here. But that +        // means we'd have to annotate this routine with `target_feature`. +        // Which is fine, because this routine is `unsafe` anyway and the +        // `target_feature` obligation is met by virtue of building a `Two`. +        // The real problem is that a routine with a `target_feature` +        // annotation generally can't be inlined into caller code unless the +        // caller code has the same target feature annotations. Which is maybe +        // okay for SSE2, but we do the same thing for AVX2 where caller code +        // probably usually doesn't have AVX2 enabled. That means that this +        // routine can be inlined which will handle some of the short-haystack +        // cases above without touching the architecture specific code. +        self.find_raw_impl(start, end) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < __m128i::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::rev_byte_by_byte(start, end, |b| { +                b == self.0.needle1() || b == self.0.needle2() +            }); +        } +        // SAFETY: Building a `Two` means it's safe to call 'sse2' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        // +        // See note in forward routine above for why we don't just call +        // `self.0.rfind_raw` directly here. +        self.rfind_raw_impl(start, end) +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Two::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Two`, which can only be constructed +    /// when it is safe to call `sse2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn find_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.find_raw(start, end) +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Two::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Two`, which can only be constructed +    /// when it is safe to call `sse2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn rfind_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.rfind_raw(start, end) +    } + +    /// Returns an iterator over all occurrences of the needle bytes in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { +        TwoIter { searcher: self, it: generic::Iter::new(haystack) } +    } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { +    searcher: &'a Two, +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three(generic::Three<__m128i>); + +impl Three { +    /// Create a new searcher that finds occurrences of the needle bytes given. +    /// +    /// This particular searcher is specialized to use SSE2 vector instructions +    /// that typically make it quite fast. +    /// +    /// If SSE2 is unavailable in the current environment, then `None` is +    /// returned. +    #[inline] +    pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> { +        if Three::is_available() { +            // SAFETY: we check that sse2 is available above. +            unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } +        } else { +            None +        } +    } + +    /// Create a new finder specific to SSE2 vectors and routines without +    /// checking that SSE2 is available. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that it is safe to execute `sse2` instructions +    /// in the current environment. +    /// +    /// Note that it is a common misconception that if one compiles for an +    /// `x86_64` target, then they therefore automatically have access to SSE2 +    /// instructions. While this is almost always the case, it isn't true in +    /// 100% of cases. +    #[target_feature(enable = "sse2")] +    #[inline] +    pub unsafe fn new_unchecked( +        needle1: u8, +        needle2: u8, +        needle3: u8, +    ) -> Three { +        Three(generic::Three::new(needle1, needle2, needle3)) +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`Three::new`] will return +    /// a `Some` value. Similarly, when it is false, it is guaranteed that +    /// `Three::new` will return a `None` value. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(target_feature = "sse2")] +        { +            true +        } +        #[cfg(not(target_feature = "sse2"))] +        { +            false +        } +    } + +    /// Return the first occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `find_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.find_raw(s, e) +            }) +        } +    } + +    /// Return the last occurrence of one of the needle bytes in the given +    /// haystack. If no such occurrence exists, then `None` is returned. +    /// +    /// The occurrence is reported as an offset into `haystack`. Its maximum +    /// value is `haystack.len() - 1`. +    #[inline] +    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it +        // falls within the bounds of the start and end pointers. +        unsafe { +            generic::search_slice_with_raw(haystack, |s, e| { +                self.rfind_raw(s, e) +            }) +        } +    } + +    /// Like `find`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn find_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < __m128i::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::fwd_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +                    || b == self.0.needle2() +                    || b == self.0.needle3() +            }); +        } +        // SAFETY: Building a `Three` means it's safe to call 'sse2' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        // +        // Note that we could call `self.0.find_raw` directly here. But that +        // means we'd have to annotate this routine with `target_feature`. +        // Which is fine, because this routine is `unsafe` anyway and the +        // `target_feature` obligation is met by virtue of building a `Three`. +        // The real problem is that a routine with a `target_feature` +        // annotation generally can't be inlined into caller code unless the +        // caller code has the same target feature annotations. Which is maybe +        // okay for SSE2, but we do the same thing for AVX2 where caller code +        // probably usually doesn't have AVX2 enabled. That means that this +        // routine can be inlined which will handle some of the short-haystack +        // cases above without touching the architecture specific code. +        self.find_raw_impl(start, end) +    } + +    /// Like `rfind`, but accepts and returns raw pointers. +    /// +    /// When a match is found, the pointer returned is guaranteed to be +    /// `>= start` and `< end`. +    /// +    /// This routine is useful if you're already using raw pointers and would +    /// like to avoid converting back to a slice before executing a search. +    /// +    /// # Safety +    /// +    /// * Both `start` and `end` must be valid for reads. +    /// * Both `start` and `end` must point to an initialized value. +    /// * Both `start` and `end` must point to the same allocated object and +    /// must either be in bounds or at most one byte past the end of the +    /// allocated object. +    /// * Both `start` and `end` must be _derived from_ a pointer to the same +    /// object. +    /// * The distance between `start` and `end` must not overflow `isize`. +    /// * The distance being in bounds must not rely on "wrapping around" the +    /// address space. +    /// +    /// Note that callers may pass a pair of pointers such that `start >= end`. +    /// In that case, `None` will always be returned. +    #[inline] +    pub unsafe fn rfind_raw( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        if start >= end { +            return None; +        } +        if end.distance(start) < __m128i::BYTES { +            // SAFETY: We require the caller to pass valid start/end pointers. +            return generic::rev_byte_by_byte(start, end, |b| { +                b == self.0.needle1() +                    || b == self.0.needle2() +                    || b == self.0.needle3() +            }); +        } +        // SAFETY: Building a `Three` means it's safe to call 'sse2' routines. +        // Also, we've checked that our haystack is big enough to run on the +        // vector routine. Pointer validity is caller's responsibility. +        // +        // See note in forward routine above for why we don't just call +        // `self.0.rfind_raw` directly here. +        self.rfind_raw_impl(start, end) +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Three::find_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Three`, which can only be constructed +    /// when it is safe to call `sse2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn find_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.find_raw(start, end) +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as [`Three::rfind_raw`], except the distance between `start` and +    /// `end` must be at least the size of an SSE2 vector (in bytes). +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Three`, which can only be constructed +    /// when it is safe to call `sse2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn rfind_raw_impl( +        &self, +        start: *const u8, +        end: *const u8, +    ) -> Option<*const u8> { +        self.0.rfind_raw(start, end) +    } + +    /// Returns an iterator over all occurrences of the needle byte in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { +        ThreeIter { searcher: self, it: generic::Iter::new(haystack) } +    } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { +    searcher: &'a Three, +    it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'find_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: We rely on the generic iterator to provide valid start +        // and end pointers, but we guarantee that any pointer returned by +        // 'rfind_raw' falls within the bounds of the start and end pointer. +        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } +    } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { +    use super::*; + +    define_memchr_quickcheck!(super); + +    #[test] +    fn forward_one() { +        crate::tests::memchr::Runner::new(1).forward_iter( +            |haystack, needles| { +                Some(One::new(needles[0])?.iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_one() { +        crate::tests::memchr::Runner::new(1).reverse_iter( +            |haystack, needles| { +                Some(One::new(needles[0])?.iter(haystack).rev().collect()) +            }, +        ) +    } + +    #[test] +    fn count_one() { +        crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { +            Some(One::new(needles[0])?.iter(haystack).count()) +        }) +    } + +    #[test] +    fn forward_two() { +        crate::tests::memchr::Runner::new(2).forward_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(Two::new(n1, n2)?.iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_two() { +        crate::tests::memchr::Runner::new(2).reverse_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) +            }, +        ) +    } + +    #[test] +    fn forward_three() { +        crate::tests::memchr::Runner::new(3).forward_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse_three() { +        crate::tests::memchr::Runner::new(3).reverse_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) +            }, +        ) +    } +} diff --git a/vendor/memchr/src/arch/x86_64/sse2/mod.rs b/vendor/memchr/src/arch/x86_64/sse2/mod.rs new file mode 100644 index 0000000..bcb8307 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/sse2/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `x86_64` target using 128-bit vectors via SSE2. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs b/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs new file mode 100644 index 0000000..c8b5b99 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs @@ -0,0 +1,232 @@ +/*! +A 128-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::x86_64::__m128i; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 128-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder(packedpair::Finder<__m128i>); + +impl Finder { +    /// Create a new pair searcher. The searcher returned can either report +    /// exact matches of `needle` or act as a prefilter and report candidate +    /// positions of `needle`. +    /// +    /// If SSE2 is unavailable in the current environment or if a [`Pair`] +    /// could not be constructed from the needle given, then `None` is +    /// returned. +    #[inline] +    pub fn new(needle: &[u8]) -> Option<Finder> { +        Finder::with_pair(needle, Pair::new(needle)?) +    } + +    /// Create a new "packed pair" finder using the pair of bytes given. +    /// +    /// This constructor permits callers to control precisely which pair of +    /// bytes is used as a predicate. +    /// +    /// If SSE2 is unavailable in the current environment, then `None` is +    /// returned. +    #[inline] +    pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> { +        if Finder::is_available() { +            // SAFETY: we check that sse2 is available above. We are also +            // guaranteed to have needle.len() > 1 because we have a valid +            // Pair. +            unsafe { Some(Finder::with_pair_impl(needle, pair)) } +        } else { +            None +        } +    } + +    /// Create a new `Finder` specific to SSE2 vectors and routines. +    /// +    /// # Safety +    /// +    /// Same as the safety for `packedpair::Finder::new`, and callers must also +    /// ensure that SSE2 is available. +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { +        let finder = packedpair::Finder::<__m128i>::new(needle, pair); +        Finder(finder) +    } + +    /// Returns true when this implementation is available in the current +    /// environment. +    /// +    /// When this is true, it is guaranteed that [`Finder::with_pair`] will +    /// return a `Some` value. Similarly, when it is false, it is guaranteed +    /// that `Finder::with_pair` will return a `None` value. Notice that this +    /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, +    /// even when `Finder::is_available` is true, it is not guaranteed that a +    /// valid [`Pair`] can be found from the needle given. +    /// +    /// Note also that for the lifetime of a single program, if this returns +    /// true then it will always return true. +    #[inline] +    pub fn is_available() -> bool { +        #[cfg(not(target_feature = "sse2"))] +        { +            false +        } +        #[cfg(target_feature = "sse2")] +        { +            true +        } +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    #[inline] +    pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { +        // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. +        unsafe { self.find_impl(haystack, needle) } +    } + +    /// Run this finder on the given haystack as a prefilter. +    /// +    /// If a candidate match is found, then an offset where the needle *could* +    /// begin in the haystack is returned. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    #[inline] +    pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. +        unsafe { self.find_prefilter_impl(haystack) } +    } + +    /// Execute a search using SSE2 vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    /// +    /// # Safety +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Finder`, which can only be constructed +    /// when it is safe to call `sse2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn find_impl( +        &self, +        haystack: &[u8], +        needle: &[u8], +    ) -> Option<usize> { +        self.0.find(haystack, needle) +    } + +    /// Execute a prefilter search using SSE2 vectors and routines. +    /// +    /// # Panics +    /// +    /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. +    /// +    /// # Safety +    /// +    /// (The target feature safety obligation is automatically fulfilled by +    /// virtue of being a method on `Finder`, which can only be constructed +    /// when it is safe to call `sse2` routines.) +    #[target_feature(enable = "sse2")] +    #[inline] +    unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> { +        self.0.find_prefilter(haystack) +    } + +    /// Returns the pair of offsets (into the needle) used to check as a +    /// predicate before confirming whether a needle exists at a particular +    /// position. +    #[inline] +    pub fn pair(&self) -> &Pair { +        self.0.pair() +    } + +    /// Returns the minimum haystack length that this `Finder` can search. +    /// +    /// Using a haystack with length smaller than this in a search will result +    /// in a panic. The reason for this restriction is that this finder is +    /// meant to be a low-level component that is part of a larger substring +    /// strategy. In that sense, it avoids trying to handle all cases and +    /// instead only handles the cases that it can handle very well. +    #[inline] +    pub fn min_haystack_len(&self) -> usize { +        self.0.min_haystack_len() +    } +} + +#[cfg(test)] +mod tests { +    use super::*; + +    fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> { +        let f = Finder::new(needle)?; +        if haystack.len() < f.min_haystack_len() { +            return None; +        } +        Some(f.find(haystack, needle)) +    } + +    define_substring_forward_quickcheck!(find); + +    #[test] +    fn forward_substring() { +        crate::tests::substring::Runner::new().fwd(find).run() +    } + +    #[test] +    fn forward_packedpair() { +        fn find( +            haystack: &[u8], +            needle: &[u8], +            index1: u8, +            index2: u8, +        ) -> Option<Option<usize>> { +            let pair = Pair::with_indices(needle, index1, index2)?; +            let f = Finder::with_pair(needle, pair)?; +            if haystack.len() < f.min_haystack_len() { +                return None; +            } +            Some(f.find(haystack, needle)) +        } +        crate::tests::packedpair::Runner::new().fwd(find).run() +    } + +    #[test] +    fn forward_packedpair_prefilter() { +        fn find( +            haystack: &[u8], +            needle: &[u8], +            index1: u8, +            index2: u8, +        ) -> Option<Option<usize>> { +            let pair = Pair::with_indices(needle, index1, index2)?; +            let f = Finder::with_pair(needle, pair)?; +            if haystack.len() < f.min_haystack_len() { +                return None; +            } +            Some(f.find_prefilter(haystack)) +        } +        crate::tests::packedpair::Runner::new().fwd(find).run() +    } +} diff --git a/vendor/memchr/src/cow.rs b/vendor/memchr/src/cow.rs new file mode 100644 index 0000000..f291645 --- /dev/null +++ b/vendor/memchr/src/cow.rs @@ -0,0 +1,107 @@ +use core::ops; + +/// A specialized copy-on-write byte string. +/// +/// The purpose of this type is to permit usage of a "borrowed or owned +/// byte string" in a way that keeps std/no-std compatibility. That is, in +/// no-std/alloc mode, this type devolves into a simple &[u8] with no owned +/// variant available. We can't just use a plain Cow because Cow is not in +/// core. +#[derive(Clone, Debug)] +pub struct CowBytes<'a>(Imp<'a>); + +// N.B. We don't use alloc::borrow::Cow here since we can get away with a +// Box<[u8]> for our use case, which is 1/3 smaller than the Vec<u8> that +// a Cow<[u8]> would use. +#[cfg(feature = "alloc")] +#[derive(Clone, Debug)] +enum Imp<'a> { +    Borrowed(&'a [u8]), +    Owned(alloc::boxed::Box<[u8]>), +} + +#[cfg(not(feature = "alloc"))] +#[derive(Clone, Debug)] +struct Imp<'a>(&'a [u8]); + +impl<'a> ops::Deref for CowBytes<'a> { +    type Target = [u8]; + +    #[inline(always)] +    fn deref(&self) -> &[u8] { +        self.as_slice() +    } +} + +impl<'a> CowBytes<'a> { +    /// Create a new borrowed CowBytes. +    #[inline(always)] +    pub(crate) fn new<B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> CowBytes<'a> { +        CowBytes(Imp::new(bytes.as_ref())) +    } + +    /// Create a new owned CowBytes. +    #[cfg(feature = "alloc")] +    #[inline(always)] +    fn new_owned(bytes: alloc::boxed::Box<[u8]>) -> CowBytes<'static> { +        CowBytes(Imp::Owned(bytes)) +    } + +    /// Return a borrowed byte string, regardless of whether this is an owned +    /// or borrowed byte string internally. +    #[inline(always)] +    pub(crate) fn as_slice(&self) -> &[u8] { +        self.0.as_slice() +    } + +    /// Return an owned version of this copy-on-write byte string. +    /// +    /// If this is already an owned byte string internally, then this is a +    /// no-op. Otherwise, the internal byte string is copied. +    #[cfg(feature = "alloc")] +    #[inline(always)] +    pub(crate) fn into_owned(self) -> CowBytes<'static> { +        match self.0 { +            Imp::Borrowed(b) => { +                CowBytes::new_owned(alloc::boxed::Box::from(b)) +            } +            Imp::Owned(b) => CowBytes::new_owned(b), +        } +    } +} + +impl<'a> Imp<'a> { +    #[inline(always)] +    pub fn new(bytes: &'a [u8]) -> Imp<'a> { +        #[cfg(feature = "alloc")] +        { +            Imp::Borrowed(bytes) +        } +        #[cfg(not(feature = "alloc"))] +        { +            Imp(bytes) +        } +    } + +    #[cfg(feature = "alloc")] +    #[inline(always)] +    pub fn as_slice(&self) -> &[u8] { +        #[cfg(feature = "alloc")] +        { +            match self { +                Imp::Owned(ref x) => x, +                Imp::Borrowed(x) => x, +            } +        } +        #[cfg(not(feature = "alloc"))] +        { +            self.0 +        } +    } + +    #[cfg(not(feature = "alloc"))] +    #[inline(always)] +    pub fn as_slice(&self) -> &[u8] { +        self.0 +    } +} diff --git a/vendor/memchr/src/ext.rs b/vendor/memchr/src/ext.rs new file mode 100644 index 0000000..1bb21dd --- /dev/null +++ b/vendor/memchr/src/ext.rs @@ -0,0 +1,52 @@ +/// A trait for adding some helper routines to pointers. +pub(crate) trait Pointer { +    /// Returns the distance, in units of `T`, between `self` and `origin`. +    /// +    /// # Safety +    /// +    /// Same as `ptr::offset_from` in addition to `self >= origin`. +    unsafe fn distance(self, origin: Self) -> usize; + +    /// Casts this pointer to `usize`. +    /// +    /// Callers should not convert the `usize` back to a pointer if at all +    /// possible. (And if you believe it's necessary, open an issue to discuss +    /// why. Otherwise, it has the potential to violate pointer provenance.) +    /// The purpose of this function is just to be able to do arithmetic, i.e., +    /// computing offsets or alignments. +    fn as_usize(self) -> usize; +} + +impl<T> Pointer for *const T { +    unsafe fn distance(self, origin: *const T) -> usize { +        // TODO: Replace with `ptr::sub_ptr` once stabilized. +        usize::try_from(self.offset_from(origin)).unwrap_unchecked() +    } + +    fn as_usize(self) -> usize { +        self as usize +    } +} + +impl<T> Pointer for *mut T { +    unsafe fn distance(self, origin: *mut T) -> usize { +        (self as *const T).distance(origin as *const T) +    } + +    fn as_usize(self) -> usize { +        (self as *const T).as_usize() +    } +} + +/// A trait for adding some helper routines to raw bytes. +pub(crate) trait Byte { +    /// Converts this byte to a `char` if it's ASCII. Otherwise panics. +    fn to_char(self) -> char; +} + +impl Byte for u8 { +    fn to_char(self) -> char { +        assert!(self.is_ascii()); +        char::from(self) +    } +} diff --git a/vendor/memchr/src/lib.rs b/vendor/memchr/src/lib.rs new file mode 100644 index 0000000..de366fb --- /dev/null +++ b/vendor/memchr/src/lib.rs @@ -0,0 +1,221 @@ +/*! +This library provides heavily optimized routines for string search primitives. + +# Overview + +This section gives a brief high level overview of what this crate offers. + +* The top-level module provides routines for searching for 1, 2 or 3 bytes +  in the forward or reverse direction. When searching for more than one byte, +  positions are considered a match if the byte at that position matches any +  of the bytes. +* The [`memmem`] sub-module provides forward and reverse substring search +  routines. + +In all such cases, routines operate on `&[u8]` without regard to encoding. This +is exactly what you want when searching either UTF-8 or arbitrary bytes. + +# Example: using `memchr` + +This example shows how to use `memchr` to find the first occurrence of `z` in +a haystack: + +``` +use memchr::memchr; + +let haystack = b"foo bar baz quuz"; +assert_eq!(Some(10), memchr(b'z', haystack)); +``` + +# Example: matching one of three possible bytes + +This examples shows how to use `memrchr3` to find occurrences of `a`, `b` or +`c`, starting at the end of the haystack. + +``` +use memchr::memchr3_iter; + +let haystack = b"xyzaxyzbxyzc"; + +let mut it = memchr3_iter(b'a', b'b', b'c', haystack).rev(); +assert_eq!(Some(11), it.next()); +assert_eq!(Some(7), it.next()); +assert_eq!(Some(3), it.next()); +assert_eq!(None, it.next()); +``` + +# Example: iterating over substring matches + +This example shows how to use the [`memmem`] sub-module to find occurrences of +a substring in a haystack. + +``` +use memchr::memmem; + +let haystack = b"foo bar foo baz foo"; + +let mut it = memmem::find_iter(haystack, "foo"); +assert_eq!(Some(0), it.next()); +assert_eq!(Some(8), it.next()); +assert_eq!(Some(16), it.next()); +assert_eq!(None, it.next()); +``` + +# Example: repeating a search for the same needle + +It may be possible for the overhead of constructing a substring searcher to be +measurable in some workloads. In cases where the same needle is used to search +many haystacks, it is possible to do construction once and thus to avoid it for +subsequent searches. This can be done with a [`memmem::Finder`]: + +``` +use memchr::memmem; + +let finder = memmem::Finder::new("foo"); + +assert_eq!(Some(4), finder.find(b"baz foo quux")); +assert_eq!(None, finder.find(b"quux baz bar")); +``` + +# Why use this crate? + +At first glance, the APIs provided by this crate might seem weird. Why provide +a dedicated routine like `memchr` for something that could be implemented +clearly and trivially in one line: + +``` +fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> { +    haystack.iter().position(|&b| b == needle) +} +``` + +Or similarly, why does this crate provide substring search routines when Rust's +core library already provides them? + +``` +fn search(haystack: &str, needle: &str) -> Option<usize> { +    haystack.find(needle) +} +``` + +The primary reason for both of them to exist is performance. When it comes to +performance, at a high level at least, there are two primary ways to look at +it: + +* **Throughput**: For this, think about it as, "given some very large haystack +  and a byte that never occurs in that haystack, how long does it take to +  search through it and determine that it, in fact, does not occur?" +* **Latency**: For this, think about it as, "given a tiny haystack---just a +  few bytes---how long does it take to determine if a byte is in it?" + +The `memchr` routine in this crate has _slightly_ worse latency than the +solution presented above, however, its throughput can easily be over an +order of magnitude faster. This is a good general purpose trade off to make. +You rarely lose, but often gain big. + +**NOTE:** The name `memchr` comes from the corresponding routine in `libc`. A +key advantage of using this library is that its performance is not tied to its +quality of implementation in the `libc` you happen to be using, which can vary +greatly from platform to platform. + +But what about substring search? This one is a bit more complicated. The +primary reason for its existence is still indeed performance, but it's also +useful because Rust's core library doesn't actually expose any substring +search routine on arbitrary bytes. The only substring search routine that +exists works exclusively on valid UTF-8. + +So if you have valid UTF-8, is there a reason to use this over the standard +library substring search routine? Yes. This routine is faster on almost every +metric, including latency. The natural question then, is why isn't this +implementation in the standard library, even if only for searching on UTF-8? +The reason is that the implementation details for using SIMD in the standard +library haven't quite been worked out yet. + +**NOTE:** Currently, only `x86_64`, `wasm32` and `aarch64` targets have vector +accelerated implementations of `memchr` (and friends) and `memmem`. + +# Crate features + +* **std** - When enabled (the default), this will permit features specific to +the standard library. Currently, the only thing used from the standard library +is runtime SIMD CPU feature detection. This means that this feature must be +enabled to get AVX2 accelerated routines on `x86_64` targets without enabling +the `avx2` feature at compile time, for example. When `std` is not enabled, +this crate will still attempt to use SSE2 accelerated routines on `x86_64`. It +will also use AVX2 accelerated routines when the `avx2` feature is enabled at +compile time. In general, enable this feature if you can. +* **alloc** - When enabled (the default), APIs in this crate requiring some +kind of allocation will become available. For example, the +[`memmem::Finder::into_owned`](crate::memmem::Finder::into_owned) API and the +[`arch::all::shiftor`](crate::arch::all::shiftor) substring search +implementation. Otherwise, this crate is designed from the ground up to be +usable in core-only contexts, so the `alloc` feature doesn't add much +currently. Notably, disabling `std` but enabling `alloc` will **not** result +in the use of AVX2 on `x86_64` targets unless the `avx2` feature is enabled +at compile time. (With `std` enabled, AVX2 can be used even without the `avx2` +feature enabled at compile time by way of runtime CPU feature detection.) +* **logging** - When enabled (disabled by default), the `log` crate is used +to emit log messages about what kinds of `memchr` and `memmem` algorithms +are used. Namely, both `memchr` and `memmem` have a number of different +implementation choices depending on the target and CPU, and the log messages +can help show what specific implementations are being used. Generally, this is +useful for debugging performance issues. +* **libc** - **DEPRECATED**. Previously, this enabled the use of the target's +`memchr` function from whatever `libc` was linked into the program. This +feature is now a no-op because this crate's implementation of `memchr` should +now be sufficiently fast on a number of platforms that `libc` should no longer +be needed. (This feature is somewhat of a holdover from this crate's origins. +Originally, this crate was literally just a safe wrapper function around the +`memchr` function from `libc`.) +*/ + +#![deny(missing_docs)] +#![no_std] +// It's just not worth trying to squash all dead code warnings. Pretty +// unfortunate IMO. Not really sure how to fix this other than to either +// live with it or sprinkle a whole mess of `cfg` annotations everywhere. +#![cfg_attr( +    not(any( +        all(target_arch = "x86_64", target_feature = "sse2"), +        target_arch = "wasm32", +        target_arch = "aarch64", +    )), +    allow(dead_code) +)] +// Same deal for miri. +#![cfg_attr(miri, allow(dead_code, unused_macros))] + +// Supporting 8-bit (or others) would be fine. If you need it, please submit a +// bug report at https://github.com/BurntSushi/memchr +#[cfg(not(any( +    target_pointer_width = "16", +    target_pointer_width = "32", +    target_pointer_width = "64" +)))] +compile_error!("memchr currently not supported on non-{16,32,64}"); + +#[cfg(any(test, feature = "std"))] +extern crate std; + +#[cfg(any(test, feature = "alloc"))] +extern crate alloc; + +pub use crate::memchr::{ +    memchr, memchr2, memchr2_iter, memchr3, memchr3_iter, memchr_iter, +    memrchr, memrchr2, memrchr2_iter, memrchr3, memrchr3_iter, memrchr_iter, +    Memchr, Memchr2, Memchr3, +}; + +#[macro_use] +mod macros; + +#[cfg(test)] +#[macro_use] +mod tests; + +pub mod arch; +mod cow; +mod ext; +mod memchr; +pub mod memmem; +mod vector; diff --git a/vendor/memchr/src/macros.rs b/vendor/memchr/src/macros.rs new file mode 100644 index 0000000..31b4ca3 --- /dev/null +++ b/vendor/memchr/src/macros.rs @@ -0,0 +1,20 @@ +// Some feature combinations result in some of these macros never being used. +// Which is fine. Just squash the warnings. +#![allow(unused_macros)] + +macro_rules! log { +    ($($tt:tt)*) => { +        #[cfg(feature = "logging")] +        { +            $($tt)* +        } +    } +} + +macro_rules! debug { +    ($($tt:tt)*) => { log!(log::debug!($($tt)*)) } +} + +macro_rules! trace { +    ($($tt:tt)*) => { log!(log::trace!($($tt)*)) } +} diff --git a/vendor/memchr/src/memchr.rs b/vendor/memchr/src/memchr.rs new file mode 100644 index 0000000..68adb9a --- /dev/null +++ b/vendor/memchr/src/memchr.rs @@ -0,0 +1,903 @@ +use core::iter::Rev; + +use crate::arch::generic::memchr as generic; + +/// Search for the first occurrence of a byte in a slice. +/// +/// This returns the index corresponding to the first occurrence of `needle` in +/// `haystack`, or `None` if one is not found. If an index is returned, it is +/// guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().position(|&b| b == needle)`, this routine will attempt to +/// use highly optimized vector operations that can be an order of magnitude +/// faster (or more). +/// +/// # Example +/// +/// This shows how to find the first position of a byte in a byte string. +/// +/// ``` +/// use memchr::memchr; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memchr(b'k', haystack), Some(8)); +/// ``` +#[inline] +pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> { +    // SAFETY: memchr_raw, when a match is found, always returns a valid +    // pointer between start and end. +    unsafe { +        generic::search_slice_with_raw(haystack, |start, end| { +            memchr_raw(needle, start, end) +        }) +    } +} + +/// Search for the last occurrence of a byte in a slice. +/// +/// This returns the index corresponding to the last occurrence of `needle` in +/// `haystack`, or `None` if one is not found. If an index is returned, it is +/// guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().rposition(|&b| b == needle)`, this routine will attempt to +/// use highly optimized vector operations that can be an order of magnitude +/// faster (or more). +/// +/// # Example +/// +/// This shows how to find the last position of a byte in a byte string. +/// +/// ``` +/// use memchr::memrchr; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memrchr(b'o', haystack), Some(17)); +/// ``` +#[inline] +pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> { +    // SAFETY: memrchr_raw, when a match is found, always returns a valid +    // pointer between start and end. +    unsafe { +        generic::search_slice_with_raw(haystack, |start, end| { +            memrchr_raw(needle, start, end) +        }) +    } +} + +/// Search for the first occurrence of two possible bytes in a haystack. +/// +/// This returns the index corresponding to the first occurrence of one of the +/// needle bytes in `haystack`, or `None` if one is not found. If an index is +/// returned, it is guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().position(|&b| b == needle1 || b == needle2)`, this routine +/// will attempt to use highly optimized vector operations that can be an order +/// of magnitude faster (or more). +/// +/// # Example +/// +/// This shows how to find the first position of one of two possible bytes in a +/// haystack. +/// +/// ``` +/// use memchr::memchr2; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memchr2(b'k', b'q', haystack), Some(4)); +/// ``` +#[inline] +pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option<usize> { +    // SAFETY: memchr2_raw, when a match is found, always returns a valid +    // pointer between start and end. +    unsafe { +        generic::search_slice_with_raw(haystack, |start, end| { +            memchr2_raw(needle1, needle2, start, end) +        }) +    } +} + +/// Search for the last occurrence of two possible bytes in a haystack. +/// +/// This returns the index corresponding to the last occurrence of one of the +/// needle bytes in `haystack`, or `None` if one is not found. If an index is +/// returned, it is guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2)`, this +/// routine will attempt to use highly optimized vector operations that can be +/// an order of magnitude faster (or more). +/// +/// # Example +/// +/// This shows how to find the last position of one of two possible bytes in a +/// haystack. +/// +/// ``` +/// use memchr::memrchr2; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memrchr2(b'k', b'o', haystack), Some(17)); +/// ``` +#[inline] +pub fn memrchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option<usize> { +    // SAFETY: memrchr2_raw, when a match is found, always returns a valid +    // pointer between start and end. +    unsafe { +        generic::search_slice_with_raw(haystack, |start, end| { +            memrchr2_raw(needle1, needle2, start, end) +        }) +    } +} + +/// Search for the first occurrence of three possible bytes in a haystack. +/// +/// This returns the index corresponding to the first occurrence of one of the +/// needle bytes in `haystack`, or `None` if one is not found. If an index is +/// returned, it is guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().position(|&b| b == needle1 || b == needle2 || b == needle3)`, +/// this routine will attempt to use highly optimized vector operations that +/// can be an order of magnitude faster (or more). +/// +/// # Example +/// +/// This shows how to find the first position of one of three possible bytes in +/// a haystack. +/// +/// ``` +/// use memchr::memchr3; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memchr3(b'k', b'q', b'u', haystack), Some(4)); +/// ``` +#[inline] +pub fn memchr3( +    needle1: u8, +    needle2: u8, +    needle3: u8, +    haystack: &[u8], +) -> Option<usize> { +    // SAFETY: memchr3_raw, when a match is found, always returns a valid +    // pointer between start and end. +    unsafe { +        generic::search_slice_with_raw(haystack, |start, end| { +            memchr3_raw(needle1, needle2, needle3, start, end) +        }) +    } +} + +/// Search for the last occurrence of three possible bytes in a haystack. +/// +/// This returns the index corresponding to the last occurrence of one of the +/// needle bytes in `haystack`, or `None` if one is not found. If an index is +/// returned, it is guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2 || b == needle3)`, +/// this routine will attempt to use highly optimized vector operations that +/// can be an order of magnitude faster (or more). +/// +/// # Example +/// +/// This shows how to find the last position of one of three possible bytes in +/// a haystack. +/// +/// ``` +/// use memchr::memrchr3; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memrchr3(b'k', b'o', b'n', haystack), Some(17)); +/// ``` +#[inline] +pub fn memrchr3( +    needle1: u8, +    needle2: u8, +    needle3: u8, +    haystack: &[u8], +) -> Option<usize> { +    // SAFETY: memrchr3_raw, when a match is found, always returns a valid +    // pointer between start and end. +    unsafe { +        generic::search_slice_with_raw(haystack, |start, end| { +            memrchr3_raw(needle1, needle2, needle3, start, end) +        }) +    } +} + +/// Returns an iterator over all occurrences of the needle in a haystack. +/// +/// The iterator returned implements `DoubleEndedIterator`. This means it +/// can also be used to find occurrences in reverse order. +#[inline] +pub fn memchr_iter<'h>(needle: u8, haystack: &'h [u8]) -> Memchr<'h> { +    Memchr::new(needle, haystack) +} + +/// Returns an iterator over all occurrences of the needle in a haystack, in +/// reverse. +#[inline] +pub fn memrchr_iter(needle: u8, haystack: &[u8]) -> Rev<Memchr<'_>> { +    Memchr::new(needle, haystack).rev() +} + +/// Returns an iterator over all occurrences of the needles in a haystack. +/// +/// The iterator returned implements `DoubleEndedIterator`. This means it +/// can also be used to find occurrences in reverse order. +#[inline] +pub fn memchr2_iter<'h>( +    needle1: u8, +    needle2: u8, +    haystack: &'h [u8], +) -> Memchr2<'h> { +    Memchr2::new(needle1, needle2, haystack) +} + +/// Returns an iterator over all occurrences of the needles in a haystack, in +/// reverse. +#[inline] +pub fn memrchr2_iter( +    needle1: u8, +    needle2: u8, +    haystack: &[u8], +) -> Rev<Memchr2<'_>> { +    Memchr2::new(needle1, needle2, haystack).rev() +} + +/// Returns an iterator over all occurrences of the needles in a haystack. +/// +/// The iterator returned implements `DoubleEndedIterator`. This means it +/// can also be used to find occurrences in reverse order. +#[inline] +pub fn memchr3_iter<'h>( +    needle1: u8, +    needle2: u8, +    needle3: u8, +    haystack: &'h [u8], +) -> Memchr3<'h> { +    Memchr3::new(needle1, needle2, needle3, haystack) +} + +/// Returns an iterator over all occurrences of the needles in a haystack, in +/// reverse. +#[inline] +pub fn memrchr3_iter( +    needle1: u8, +    needle2: u8, +    needle3: u8, +    haystack: &[u8], +) -> Rev<Memchr3<'_>> { +    Memchr3::new(needle1, needle2, needle3, haystack).rev() +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`memchr_iter`] or `[memrchr_iter`] +/// functions. It can also be created with the [`Memchr::new`] method. +/// +/// The lifetime parameter `'h` refers to the lifetime of the haystack being +/// searched. +#[derive(Clone, Debug)] +pub struct Memchr<'h> { +    needle1: u8, +    it: crate::arch::generic::memchr::Iter<'h>, +} + +impl<'h> Memchr<'h> { +    /// Returns an iterator over all occurrences of the needle byte in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn new(needle1: u8, haystack: &'h [u8]) -> Memchr<'h> { +        Memchr { +            needle1, +            it: crate::arch::generic::memchr::Iter::new(haystack), +        } +    } +} + +impl<'h> Iterator for Memchr<'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: All of our implementations of memchr ensure that any +        // pointers returns will fall within the start and end bounds, and this +        // upholds the safety contract of `self.it.next`. +        unsafe { +            // NOTE: I attempted to define an enum of previously created +            // searchers and then switch on those here instead of just +            // calling `memchr_raw` (or `One::new(..).find_raw(..)`). But +            // that turned out to have a fair bit of extra overhead when +            // searching very small haystacks. +            self.it.next(|s, e| memchr_raw(self.needle1, s, e)) +        } +    } + +    #[inline] +    fn count(self) -> usize { +        self.it.count(|s, e| { +            // SAFETY: We rely on our generic iterator to return valid start +            // and end pointers. +            unsafe { count_raw(self.needle1, s, e) } +        }) +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'h> DoubleEndedIterator for Memchr<'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: All of our implementations of memchr ensure that any +        // pointers returns will fall within the start and end bounds, and this +        // upholds the safety contract of `self.it.next_back`. +        unsafe { self.it.next_back(|s, e| memrchr_raw(self.needle1, s, e)) } +    } +} + +impl<'h> core::iter::FusedIterator for Memchr<'h> {} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`memchr2_iter`] or `[memrchr2_iter`] +/// functions. It can also be created with the [`Memchr2::new`] method. +/// +/// The lifetime parameter `'h` refers to the lifetime of the haystack being +/// searched. +#[derive(Clone, Debug)] +pub struct Memchr2<'h> { +    needle1: u8, +    needle2: u8, +    it: crate::arch::generic::memchr::Iter<'h>, +} + +impl<'h> Memchr2<'h> { +    /// Returns an iterator over all occurrences of the needle bytes in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn new(needle1: u8, needle2: u8, haystack: &'h [u8]) -> Memchr2<'h> { +        Memchr2 { +            needle1, +            needle2, +            it: crate::arch::generic::memchr::Iter::new(haystack), +        } +    } +} + +impl<'h> Iterator for Memchr2<'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: All of our implementations of memchr ensure that any +        // pointers returns will fall within the start and end bounds, and this +        // upholds the safety contract of `self.it.next`. +        unsafe { +            self.it.next(|s, e| memchr2_raw(self.needle1, self.needle2, s, e)) +        } +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'h> DoubleEndedIterator for Memchr2<'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: All of our implementations of memchr ensure that any +        // pointers returns will fall within the start and end bounds, and this +        // upholds the safety contract of `self.it.next_back`. +        unsafe { +            self.it.next_back(|s, e| { +                memrchr2_raw(self.needle1, self.needle2, s, e) +            }) +        } +    } +} + +impl<'h> core::iter::FusedIterator for Memchr2<'h> {} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`memchr2_iter`] or `[memrchr2_iter`] +/// functions. It can also be created with the [`Memchr3::new`] method. +/// +/// The lifetime parameter `'h` refers to the lifetime of the haystack being +/// searched. +#[derive(Clone, Debug)] +pub struct Memchr3<'h> { +    needle1: u8, +    needle2: u8, +    needle3: u8, +    it: crate::arch::generic::memchr::Iter<'h>, +} + +impl<'h> Memchr3<'h> { +    /// Returns an iterator over all occurrences of the needle bytes in the +    /// given haystack. +    /// +    /// The iterator returned implements `DoubleEndedIterator`. This means it +    /// can also be used to find occurrences in reverse order. +    #[inline] +    pub fn new( +        needle1: u8, +        needle2: u8, +        needle3: u8, +        haystack: &'h [u8], +    ) -> Memchr3<'h> { +        Memchr3 { +            needle1, +            needle2, +            needle3, +            it: crate::arch::generic::memchr::Iter::new(haystack), +        } +    } +} + +impl<'h> Iterator for Memchr3<'h> { +    type Item = usize; + +    #[inline] +    fn next(&mut self) -> Option<usize> { +        // SAFETY: All of our implementations of memchr ensure that any +        // pointers returns will fall within the start and end bounds, and this +        // upholds the safety contract of `self.it.next`. +        unsafe { +            self.it.next(|s, e| { +                memchr3_raw(self.needle1, self.needle2, self.needle3, s, e) +            }) +        } +    } + +    #[inline] +    fn size_hint(&self) -> (usize, Option<usize>) { +        self.it.size_hint() +    } +} + +impl<'h> DoubleEndedIterator for Memchr3<'h> { +    #[inline] +    fn next_back(&mut self) -> Option<usize> { +        // SAFETY: All of our implementations of memchr ensure that any +        // pointers returns will fall within the start and end bounds, and this +        // upholds the safety contract of `self.it.next_back`. +        unsafe { +            self.it.next_back(|s, e| { +                memrchr3_raw(self.needle1, self.needle2, self.needle3, s, e) +            }) +        } +    } +} + +impl<'h> core::iter::FusedIterator for Memchr3<'h> {} + +/// memchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::find_raw`. +#[inline] +unsafe fn memchr_raw( +    needle: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    #[cfg(target_arch = "x86_64")] +    { +        // x86_64 does CPU feature detection at runtime in order to use AVX2 +        // instructions even when the `avx2` feature isn't enabled at compile +        // time. This function also handles using a fallback if neither AVX2 +        // nor SSE2 (unusual) are available. +        crate::arch::x86_64::memchr::memchr_raw(needle, start, end) +    } +    #[cfg(target_arch = "wasm32")] +    { +        crate::arch::wasm32::memchr::memchr_raw(needle, start, end) +    } +    #[cfg(target_arch = "aarch64")] +    { +        crate::arch::aarch64::memchr::memchr_raw(needle, start, end) +    } +    #[cfg(not(any( +        target_arch = "x86_64", +        target_arch = "wasm32", +        target_arch = "aarch64" +    )))] +    { +        crate::arch::all::memchr::One::new(needle).find_raw(start, end) +    } +} + +/// memrchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::rfind_raw`. +#[inline] +unsafe fn memrchr_raw( +    needle: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    #[cfg(target_arch = "x86_64")] +    { +        crate::arch::x86_64::memchr::memrchr_raw(needle, start, end) +    } +    #[cfg(target_arch = "wasm32")] +    { +        crate::arch::wasm32::memchr::memrchr_raw(needle, start, end) +    } +    #[cfg(target_arch = "aarch64")] +    { +        crate::arch::aarch64::memchr::memrchr_raw(needle, start, end) +    } +    #[cfg(not(any( +        target_arch = "x86_64", +        target_arch = "wasm32", +        target_arch = "aarch64" +    )))] +    { +        crate::arch::all::memchr::One::new(needle).rfind_raw(start, end) +    } +} + +/// memchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::find_raw`. +#[inline] +unsafe fn memchr2_raw( +    needle1: u8, +    needle2: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    #[cfg(target_arch = "x86_64")] +    { +        crate::arch::x86_64::memchr::memchr2_raw(needle1, needle2, start, end) +    } +    #[cfg(target_arch = "wasm32")] +    { +        crate::arch::wasm32::memchr::memchr2_raw(needle1, needle2, start, end) +    } +    #[cfg(target_arch = "aarch64")] +    { +        crate::arch::aarch64::memchr::memchr2_raw(needle1, needle2, start, end) +    } +    #[cfg(not(any( +        target_arch = "x86_64", +        target_arch = "wasm32", +        target_arch = "aarch64" +    )))] +    { +        crate::arch::all::memchr::Two::new(needle1, needle2) +            .find_raw(start, end) +    } +} + +/// memrchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::rfind_raw`. +#[inline] +unsafe fn memrchr2_raw( +    needle1: u8, +    needle2: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    #[cfg(target_arch = "x86_64")] +    { +        crate::arch::x86_64::memchr::memrchr2_raw(needle1, needle2, start, end) +    } +    #[cfg(target_arch = "wasm32")] +    { +        crate::arch::wasm32::memchr::memrchr2_raw(needle1, needle2, start, end) +    } +    #[cfg(target_arch = "aarch64")] +    { +        crate::arch::aarch64::memchr::memrchr2_raw( +            needle1, needle2, start, end, +        ) +    } +    #[cfg(not(any( +        target_arch = "x86_64", +        target_arch = "wasm32", +        target_arch = "aarch64" +    )))] +    { +        crate::arch::all::memchr::Two::new(needle1, needle2) +            .rfind_raw(start, end) +    } +} + +/// memchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::find_raw`. +#[inline] +unsafe fn memchr3_raw( +    needle1: u8, +    needle2: u8, +    needle3: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    #[cfg(target_arch = "x86_64")] +    { +        crate::arch::x86_64::memchr::memchr3_raw( +            needle1, needle2, needle3, start, end, +        ) +    } +    #[cfg(target_arch = "wasm32")] +    { +        crate::arch::wasm32::memchr::memchr3_raw( +            needle1, needle2, needle3, start, end, +        ) +    } +    #[cfg(target_arch = "aarch64")] +    { +        crate::arch::aarch64::memchr::memchr3_raw( +            needle1, needle2, needle3, start, end, +        ) +    } +    #[cfg(not(any( +        target_arch = "x86_64", +        target_arch = "wasm32", +        target_arch = "aarch64" +    )))] +    { +        crate::arch::all::memchr::Three::new(needle1, needle2, needle3) +            .find_raw(start, end) +    } +} + +/// memrchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::rfind_raw`. +#[inline] +unsafe fn memrchr3_raw( +    needle1: u8, +    needle2: u8, +    needle3: u8, +    start: *const u8, +    end: *const u8, +) -> Option<*const u8> { +    #[cfg(target_arch = "x86_64")] +    { +        crate::arch::x86_64::memchr::memrchr3_raw( +            needle1, needle2, needle3, start, end, +        ) +    } +    #[cfg(target_arch = "wasm32")] +    { +        crate::arch::wasm32::memchr::memrchr3_raw( +            needle1, needle2, needle3, start, end, +        ) +    } +    #[cfg(target_arch = "aarch64")] +    { +        crate::arch::aarch64::memchr::memrchr3_raw( +            needle1, needle2, needle3, start, end, +        ) +    } +    #[cfg(not(any( +        target_arch = "x86_64", +        target_arch = "wasm32", +        target_arch = "aarch64" +    )))] +    { +        crate::arch::all::memchr::Three::new(needle1, needle2, needle3) +            .rfind_raw(start, end) +    } +} + +/// Count all matching bytes, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::count_raw`. +#[inline] +unsafe fn count_raw(needle: u8, start: *const u8, end: *const u8) -> usize { +    #[cfg(target_arch = "x86_64")] +    { +        crate::arch::x86_64::memchr::count_raw(needle, start, end) +    } +    #[cfg(target_arch = "wasm32")] +    { +        crate::arch::wasm32::memchr::count_raw(needle, start, end) +    } +    #[cfg(target_arch = "aarch64")] +    { +        crate::arch::aarch64::memchr::count_raw(needle, start, end) +    } +    #[cfg(not(any( +        target_arch = "x86_64", +        target_arch = "wasm32", +        target_arch = "aarch64" +    )))] +    { +        crate::arch::all::memchr::One::new(needle).count_raw(start, end) +    } +} + +#[cfg(test)] +mod tests { +    use super::*; + +    #[test] +    fn forward1_iter() { +        crate::tests::memchr::Runner::new(1).forward_iter( +            |haystack, needles| { +                Some(memchr_iter(needles[0], haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn forward1_oneshot() { +        crate::tests::memchr::Runner::new(1).forward_oneshot( +            |haystack, needles| Some(memchr(needles[0], haystack)), +        ) +    } + +    #[test] +    fn reverse1_iter() { +        crate::tests::memchr::Runner::new(1).reverse_iter( +            |haystack, needles| { +                Some(memrchr_iter(needles[0], haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse1_oneshot() { +        crate::tests::memchr::Runner::new(1).reverse_oneshot( +            |haystack, needles| Some(memrchr(needles[0], haystack)), +        ) +    } + +    #[test] +    fn count1_iter() { +        crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { +            Some(memchr_iter(needles[0], haystack).count()) +        }) +    } + +    #[test] +    fn forward2_iter() { +        crate::tests::memchr::Runner::new(2).forward_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(memchr2_iter(n1, n2, haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn forward2_oneshot() { +        crate::tests::memchr::Runner::new(2).forward_oneshot( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(memchr2(n1, n2, haystack)) +            }, +        ) +    } + +    #[test] +    fn reverse2_iter() { +        crate::tests::memchr::Runner::new(2).reverse_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(memrchr2_iter(n1, n2, haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse2_oneshot() { +        crate::tests::memchr::Runner::new(2).reverse_oneshot( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                Some(memrchr2(n1, n2, haystack)) +            }, +        ) +    } + +    #[test] +    fn forward3_iter() { +        crate::tests::memchr::Runner::new(3).forward_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(memchr3_iter(n1, n2, n3, haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn forward3_oneshot() { +        crate::tests::memchr::Runner::new(3).forward_oneshot( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(memchr3(n1, n2, n3, haystack)) +            }, +        ) +    } + +    #[test] +    fn reverse3_iter() { +        crate::tests::memchr::Runner::new(3).reverse_iter( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(memrchr3_iter(n1, n2, n3, haystack).collect()) +            }, +        ) +    } + +    #[test] +    fn reverse3_oneshot() { +        crate::tests::memchr::Runner::new(3).reverse_oneshot( +            |haystack, needles| { +                let n1 = needles.get(0).copied()?; +                let n2 = needles.get(1).copied()?; +                let n3 = needles.get(2).copied()?; +                Some(memrchr3(n1, n2, n3, haystack)) +            }, +        ) +    } + +    // Prior to memchr 2.6, the memchr iterators both implemented Send and +    // Sync. But in memchr 2.6, the iterator changed to use raw pointers +    // internally and I didn't add explicit Send/Sync impls. This ended up +    // regressing the API. This test ensures we don't do that again. +    // +    // See: https://github.com/BurntSushi/memchr/issues/133 +    #[test] +    fn sync_regression() { +        use core::panic::{RefUnwindSafe, UnwindSafe}; + +        fn assert_send_sync<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {} +        assert_send_sync::<Memchr>(); +        assert_send_sync::<Memchr2>(); +        assert_send_sync::<Memchr3>() +    } +} diff --git a/vendor/memchr/src/memmem/mod.rs b/vendor/memchr/src/memmem/mod.rs new file mode 100644 index 0000000..4f04943 --- /dev/null +++ b/vendor/memchr/src/memmem/mod.rs @@ -0,0 +1,737 @@ +/*! +This module provides forward and reverse substring search routines. + +Unlike the standard library's substring search routines, these work on +arbitrary bytes. For all non-empty needles, these routines will report exactly +the same values as the corresponding routines in the standard library. For +the empty needle, the standard library reports matches only at valid UTF-8 +boundaries, where as these routines will report matches at every position. + +Other than being able to work on arbitrary bytes, the primary reason to prefer +these routines over the standard library routines is that these will generally +be faster. In some cases, significantly so. + +# Example: iterating over substring matches + +This example shows how to use [`find_iter`] to find occurrences of a substring +in a haystack. + +``` +use memchr::memmem; + +let haystack = b"foo bar foo baz foo"; + +let mut it = memmem::find_iter(haystack, "foo"); +assert_eq!(Some(0), it.next()); +assert_eq!(Some(8), it.next()); +assert_eq!(Some(16), it.next()); +assert_eq!(None, it.next()); +``` + +# Example: iterating over substring matches in reverse + +This example shows how to use [`rfind_iter`] to find occurrences of a substring +in a haystack starting from the end of the haystack. + +**NOTE:** This module does not implement double ended iterators, so reverse +searches aren't done by calling `rev` on a forward iterator. + +``` +use memchr::memmem; + +let haystack = b"foo bar foo baz foo"; + +let mut it = memmem::rfind_iter(haystack, "foo"); +assert_eq!(Some(16), it.next()); +assert_eq!(Some(8), it.next()); +assert_eq!(Some(0), it.next()); +assert_eq!(None, it.next()); +``` + +# Example: repeating a search for the same needle + +It may be possible for the overhead of constructing a substring searcher to be +measurable in some workloads. In cases where the same needle is used to search +many haystacks, it is possible to do construction once and thus to avoid it for +subsequent searches. This can be done with a [`Finder`] (or a [`FinderRev`] for +reverse searches). + +``` +use memchr::memmem; + +let finder = memmem::Finder::new("foo"); + +assert_eq!(Some(4), finder.find(b"baz foo quux")); +assert_eq!(None, finder.find(b"quux baz bar")); +``` +*/ + +pub use crate::memmem::searcher::PrefilterConfig as Prefilter; + +// This is exported here for use in the crate::arch::all::twoway +// implementation. This is essentially an abstraction breaker. Namely, the +// public API of twoway doesn't support providing a prefilter, but its crate +// internal API does. The main reason for this is that I didn't want to do the +// API design required to support it without a concrete use case. +pub(crate) use crate::memmem::searcher::Pre; + +use crate::{ +    arch::all::{ +        packedpair::{DefaultFrequencyRank, HeuristicFrequencyRank}, +        rabinkarp, +    }, +    cow::CowBytes, +    memmem::searcher::{PrefilterState, Searcher, SearcherRev}, +}; + +mod searcher; + +/// Returns an iterator over all non-overlapping occurrences of a substring in +/// a haystack. +/// +/// # Complexity +/// +/// This routine is guaranteed to have worst case linear time complexity +/// with respect to both the needle and the haystack. That is, this runs +/// in `O(needle.len() + haystack.len())` time. +/// +/// This routine is also guaranteed to have worst case constant space +/// complexity. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use memchr::memmem; +/// +/// let haystack = b"foo bar foo baz foo"; +/// let mut it = memmem::find_iter(haystack, b"foo"); +/// assert_eq!(Some(0), it.next()); +/// assert_eq!(Some(8), it.next()); +/// assert_eq!(Some(16), it.next()); +/// assert_eq!(None, it.next()); +/// ``` +#[inline] +pub fn find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( +    haystack: &'h [u8], +    needle: &'n N, +) -> FindIter<'h, 'n> { +    FindIter::new(haystack, Finder::new(needle)) +} + +/// Returns a reverse iterator over all non-overlapping occurrences of a +/// substring in a haystack. +/// +/// # Complexity +/// +/// This routine is guaranteed to have worst case linear time complexity +/// with respect to both the needle and the haystack. That is, this runs +/// in `O(needle.len() + haystack.len())` time. +/// +/// This routine is also guaranteed to have worst case constant space +/// complexity. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use memchr::memmem; +/// +/// let haystack = b"foo bar foo baz foo"; +/// let mut it = memmem::rfind_iter(haystack, b"foo"); +/// assert_eq!(Some(16), it.next()); +/// assert_eq!(Some(8), it.next()); +/// assert_eq!(Some(0), it.next()); +/// assert_eq!(None, it.next()); +/// ``` +#[inline] +pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( +    haystack: &'h [u8], +    needle: &'n N, +) -> FindRevIter<'h, 'n> { +    FindRevIter::new(haystack, FinderRev::new(needle)) +} + +/// Returns the index of the first occurrence of the given needle. +/// +/// Note that if you're are searching for the same needle in many different +/// small haystacks, it may be faster to initialize a [`Finder`] once, +/// and reuse it for each search. +/// +/// # Complexity +/// +/// This routine is guaranteed to have worst case linear time complexity +/// with respect to both the needle and the haystack. That is, this runs +/// in `O(needle.len() + haystack.len())` time. +/// +/// This routine is also guaranteed to have worst case constant space +/// complexity. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use memchr::memmem; +/// +/// let haystack = b"foo bar baz"; +/// assert_eq!(Some(0), memmem::find(haystack, b"foo")); +/// assert_eq!(Some(4), memmem::find(haystack, b"bar")); +/// assert_eq!(None, memmem::find(haystack, b"quux")); +/// ``` +#[inline] +pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { +    if haystack.len() < 64 { +        rabinkarp::Finder::new(needle).find(haystack, needle) +    } else { +        Finder::new(needle).find(haystack) +    } +} + +/// Returns the index of the last occurrence of the given needle. +/// +/// Note that if you're are searching for the same needle in many different +/// small haystacks, it may be faster to initialize a [`FinderRev`] once, +/// and reuse it for each search. +/// +/// # Complexity +/// +/// This routine is guaranteed to have worst case linear time complexity +/// with respect to both the needle and the haystack. That is, this runs +/// in `O(needle.len() + haystack.len())` time. +/// +/// This routine is also guaranteed to have worst case constant space +/// complexity. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use memchr::memmem; +/// +/// let haystack = b"foo bar baz"; +/// assert_eq!(Some(0), memmem::rfind(haystack, b"foo")); +/// assert_eq!(Some(4), memmem::rfind(haystack, b"bar")); +/// assert_eq!(Some(8), memmem::rfind(haystack, b"ba")); +/// assert_eq!(None, memmem::rfind(haystack, b"quux")); +/// ``` +#[inline] +pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { +    if haystack.len() < 64 { +        rabinkarp::FinderRev::new(needle).rfind(haystack, needle) +    } else { +        FinderRev::new(needle).rfind(haystack) +    } +} + +/// An iterator over non-overlapping substring matches. +/// +/// Matches are reported by the byte offset at which they begin. +/// +/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the +/// needle. +#[derive(Debug, Clone)] +pub struct FindIter<'h, 'n> { +    haystack: &'h [u8], +    prestate: PrefilterState, +    finder: Finder<'n>, +    pos: usize, +} + +impl<'h, 'n> FindIter<'h, 'n> { +    #[inline(always)] +    pub(crate) fn new( +        haystack: &'h [u8], +        finder: Finder<'n>, +    ) -> FindIter<'h, 'n> { +        let prestate = PrefilterState::new(); +        FindIter { haystack, prestate, finder, pos: 0 } +    } + +    /// Convert this iterator into its owned variant, such that it no longer +    /// borrows the finder and needle. +    /// +    /// If this is already an owned iterator, then this is a no-op. Otherwise, +    /// this copies the needle. +    /// +    /// This is only available when the `alloc` feature is enabled. +    #[cfg(feature = "alloc")] +    #[inline] +    pub fn into_owned(self) -> FindIter<'h, 'static> { +        FindIter { +            haystack: self.haystack, +            prestate: self.prestate, +            finder: self.finder.into_owned(), +            pos: self.pos, +        } +    } +} + +impl<'h, 'n> Iterator for FindIter<'h, 'n> { +    type Item = usize; + +    fn next(&mut self) -> Option<usize> { +        let needle = self.finder.needle(); +        let haystack = self.haystack.get(self.pos..)?; +        let idx = +            self.finder.searcher.find(&mut self.prestate, haystack, needle)?; + +        let pos = self.pos + idx; +        self.pos = pos + needle.len().max(1); + +        Some(pos) +    } + +    fn size_hint(&self) -> (usize, Option<usize>) { +        // The largest possible number of non-overlapping matches is the +        // quotient of the haystack and the needle (or the length of the +        // haystack, if the needle is empty) +        match self.haystack.len().checked_sub(self.pos) { +            None => (0, Some(0)), +            Some(haystack_len) => match self.finder.needle().len() { +                // Empty needles always succeed and match at every point +                // (including the very end) +                0 => ( +                    haystack_len.saturating_add(1), +                    haystack_len.checked_add(1), +                ), +                needle_len => (0, Some(haystack_len / needle_len)), +            }, +        } +    } +} + +/// An iterator over non-overlapping substring matches in reverse. +/// +/// Matches are reported by the byte offset at which they begin. +/// +/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the +/// needle. +#[derive(Clone, Debug)] +pub struct FindRevIter<'h, 'n> { +    haystack: &'h [u8], +    finder: FinderRev<'n>, +    /// When searching with an empty needle, this gets set to `None` after +    /// we've yielded the last element at `0`. +    pos: Option<usize>, +} + +impl<'h, 'n> FindRevIter<'h, 'n> { +    #[inline(always)] +    pub(crate) fn new( +        haystack: &'h [u8], +        finder: FinderRev<'n>, +    ) -> FindRevIter<'h, 'n> { +        let pos = Some(haystack.len()); +        FindRevIter { haystack, finder, pos } +    } + +    /// Convert this iterator into its owned variant, such that it no longer +    /// borrows the finder and needle. +    /// +    /// If this is already an owned iterator, then this is a no-op. Otherwise, +    /// this copies the needle. +    /// +    /// This is only available when the `std` feature is enabled. +    #[cfg(feature = "alloc")] +    #[inline] +    pub fn into_owned(self) -> FindRevIter<'h, 'static> { +        FindRevIter { +            haystack: self.haystack, +            finder: self.finder.into_owned(), +            pos: self.pos, +        } +    } +} + +impl<'h, 'n> Iterator for FindRevIter<'h, 'n> { +    type Item = usize; + +    fn next(&mut self) -> Option<usize> { +        let pos = match self.pos { +            None => return None, +            Some(pos) => pos, +        }; +        let result = self.finder.rfind(&self.haystack[..pos]); +        match result { +            None => None, +            Some(i) => { +                if pos == i { +                    self.pos = pos.checked_sub(1); +                } else { +                    self.pos = Some(i); +                } +                Some(i) +            } +        } +    } +} + +/// A single substring searcher fixed to a particular needle. +/// +/// The purpose of this type is to permit callers to construct a substring +/// searcher that can be used to search haystacks without the overhead of +/// constructing the searcher in the first place. This is a somewhat niche +/// concern when it's necessary to re-use the same needle to search multiple +/// different haystacks with as little overhead as possible. In general, using +/// [`find`] is good enough, but `Finder` is useful when you can meaningfully +/// observe searcher construction time in a profile. +/// +/// When the `std` feature is enabled, then this type has an `into_owned` +/// version which permits building a `Finder` that is not connected to +/// the lifetime of its needle. +#[derive(Clone, Debug)] +pub struct Finder<'n> { +    needle: CowBytes<'n>, +    searcher: Searcher, +} + +impl<'n> Finder<'n> { +    /// Create a new finder for the given needle. +    #[inline] +    pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n> { +        FinderBuilder::new().build_forward(needle) +    } + +    /// Returns the index of the first occurrence of this needle in the given +    /// haystack. +    /// +    /// # Complexity +    /// +    /// This routine is guaranteed to have worst case linear time complexity +    /// with respect to both the needle and the haystack. That is, this runs +    /// in `O(needle.len() + haystack.len())` time. +    /// +    /// This routine is also guaranteed to have worst case constant space +    /// complexity. +    /// +    /// # Examples +    /// +    /// Basic usage: +    /// +    /// ``` +    /// use memchr::memmem::Finder; +    /// +    /// let haystack = b"foo bar baz"; +    /// assert_eq!(Some(0), Finder::new("foo").find(haystack)); +    /// assert_eq!(Some(4), Finder::new("bar").find(haystack)); +    /// assert_eq!(None, Finder::new("quux").find(haystack)); +    /// ``` +    #[inline] +    pub fn find(&self, haystack: &[u8]) -> Option<usize> { +        let mut prestate = PrefilterState::new(); +        let needle = self.needle.as_slice(); +        self.searcher.find(&mut prestate, haystack, needle) +    } + +    /// Returns an iterator over all occurrences of a substring in a haystack. +    /// +    /// # Complexity +    /// +    /// This routine is guaranteed to have worst case linear time complexity +    /// with respect to both the needle and the haystack. That is, this runs +    /// in `O(needle.len() + haystack.len())` time. +    /// +    /// This routine is also guaranteed to have worst case constant space +    /// complexity. +    /// +    /// # Examples +    /// +    /// Basic usage: +    /// +    /// ``` +    /// use memchr::memmem::Finder; +    /// +    /// let haystack = b"foo bar foo baz foo"; +    /// let finder = Finder::new(b"foo"); +    /// let mut it = finder.find_iter(haystack); +    /// assert_eq!(Some(0), it.next()); +    /// assert_eq!(Some(8), it.next()); +    /// assert_eq!(Some(16), it.next()); +    /// assert_eq!(None, it.next()); +    /// ``` +    #[inline] +    pub fn find_iter<'a, 'h>( +        &'a self, +        haystack: &'h [u8], +    ) -> FindIter<'h, 'a> { +        FindIter::new(haystack, self.as_ref()) +    } + +    /// Convert this finder into its owned variant, such that it no longer +    /// borrows the needle. +    /// +    /// If this is already an owned finder, then this is a no-op. Otherwise, +    /// this copies the needle. +    /// +    /// This is only available when the `alloc` feature is enabled. +    #[cfg(feature = "alloc")] +    #[inline] +    pub fn into_owned(self) -> Finder<'static> { +        Finder { +            needle: self.needle.into_owned(), +            searcher: self.searcher.clone(), +        } +    } + +    /// Convert this finder into its borrowed variant. +    /// +    /// This is primarily useful if your finder is owned and you'd like to +    /// store its borrowed variant in some intermediate data structure. +    /// +    /// Note that the lifetime parameter of the returned finder is tied to the +    /// lifetime of `self`, and may be shorter than the `'n` lifetime of the +    /// needle itself. Namely, a finder's needle can be either borrowed or +    /// owned, so the lifetime of the needle returned must necessarily be the +    /// shorter of the two. +    #[inline] +    pub fn as_ref(&self) -> Finder<'_> { +        Finder { +            needle: CowBytes::new(self.needle()), +            searcher: self.searcher.clone(), +        } +    } + +    /// Returns the needle that this finder searches for. +    /// +    /// Note that the lifetime of the needle returned is tied to the lifetime +    /// of the finder, and may be shorter than the `'n` lifetime. Namely, a +    /// finder's needle can be either borrowed or owned, so the lifetime of the +    /// needle returned must necessarily be the shorter of the two. +    #[inline] +    pub fn needle(&self) -> &[u8] { +        self.needle.as_slice() +    } +} + +/// A single substring reverse searcher fixed to a particular needle. +/// +/// The purpose of this type is to permit callers to construct a substring +/// searcher that can be used to search haystacks without the overhead of +/// constructing the searcher in the first place. This is a somewhat niche +/// concern when it's necessary to re-use the same needle to search multiple +/// different haystacks with as little overhead as possible. In general, +/// using [`rfind`] is good enough, but `FinderRev` is useful when you can +/// meaningfully observe searcher construction time in a profile. +/// +/// When the `std` feature is enabled, then this type has an `into_owned` +/// version which permits building a `FinderRev` that is not connected to +/// the lifetime of its needle. +#[derive(Clone, Debug)] +pub struct FinderRev<'n> { +    needle: CowBytes<'n>, +    searcher: SearcherRev, +} + +impl<'n> FinderRev<'n> { +    /// Create a new reverse finder for the given needle. +    #[inline] +    pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n> { +        FinderBuilder::new().build_reverse(needle) +    } + +    /// Returns the index of the last occurrence of this needle in the given +    /// haystack. +    /// +    /// The haystack may be any type that can be cheaply converted into a +    /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. +    /// +    /// # Complexity +    /// +    /// This routine is guaranteed to have worst case linear time complexity +    /// with respect to both the needle and the haystack. That is, this runs +    /// in `O(needle.len() + haystack.len())` time. +    /// +    /// This routine is also guaranteed to have worst case constant space +    /// complexity. +    /// +    /// # Examples +    /// +    /// Basic usage: +    /// +    /// ``` +    /// use memchr::memmem::FinderRev; +    /// +    /// let haystack = b"foo bar baz"; +    /// assert_eq!(Some(0), FinderRev::new("foo").rfind(haystack)); +    /// assert_eq!(Some(4), FinderRev::new("bar").rfind(haystack)); +    /// assert_eq!(None, FinderRev::new("quux").rfind(haystack)); +    /// ``` +    pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { +        self.searcher.rfind(haystack.as_ref(), self.needle.as_slice()) +    } + +    /// Returns a reverse iterator over all occurrences of a substring in a +    /// haystack. +    /// +    /// # Complexity +    /// +    /// This routine is guaranteed to have worst case linear time complexity +    /// with respect to both the needle and the haystack. That is, this runs +    /// in `O(needle.len() + haystack.len())` time. +    /// +    /// This routine is also guaranteed to have worst case constant space +    /// complexity. +    /// +    /// # Examples +    /// +    /// Basic usage: +    /// +    /// ``` +    /// use memchr::memmem::FinderRev; +    /// +    /// let haystack = b"foo bar foo baz foo"; +    /// let finder = FinderRev::new(b"foo"); +    /// let mut it = finder.rfind_iter(haystack); +    /// assert_eq!(Some(16), it.next()); +    /// assert_eq!(Some(8), it.next()); +    /// assert_eq!(Some(0), it.next()); +    /// assert_eq!(None, it.next()); +    /// ``` +    #[inline] +    pub fn rfind_iter<'a, 'h>( +        &'a self, +        haystack: &'h [u8], +    ) -> FindRevIter<'h, 'a> { +        FindRevIter::new(haystack, self.as_ref()) +    } + +    /// Convert this finder into its owned variant, such that it no longer +    /// borrows the needle. +    /// +    /// If this is already an owned finder, then this is a no-op. Otherwise, +    /// this copies the needle. +    /// +    /// This is only available when the `std` feature is enabled. +    #[cfg(feature = "alloc")] +    #[inline] +    pub fn into_owned(self) -> FinderRev<'static> { +        FinderRev { +            needle: self.needle.into_owned(), +            searcher: self.searcher.clone(), +        } +    } + +    /// Convert this finder into its borrowed variant. +    /// +    /// This is primarily useful if your finder is owned and you'd like to +    /// store its borrowed variant in some intermediate data structure. +    /// +    /// Note that the lifetime parameter of the returned finder is tied to the +    /// lifetime of `self`, and may be shorter than the `'n` lifetime of the +    /// needle itself. Namely, a finder's needle can be either borrowed or +    /// owned, so the lifetime of the needle returned must necessarily be the +    /// shorter of the two. +    #[inline] +    pub fn as_ref(&self) -> FinderRev<'_> { +        FinderRev { +            needle: CowBytes::new(self.needle()), +            searcher: self.searcher.clone(), +        } +    } + +    /// Returns the needle that this finder searches for. +    /// +    /// Note that the lifetime of the needle returned is tied to the lifetime +    /// of the finder, and may be shorter than the `'n` lifetime. Namely, a +    /// finder's needle can be either borrowed or owned, so the lifetime of the +    /// needle returned must necessarily be the shorter of the two. +    #[inline] +    pub fn needle(&self) -> &[u8] { +        self.needle.as_slice() +    } +} + +/// A builder for constructing non-default forward or reverse memmem finders. +/// +/// A builder is primarily useful for configuring a substring searcher. +/// Currently, the only configuration exposed is the ability to disable +/// heuristic prefilters used to speed up certain searches. +#[derive(Clone, Debug, Default)] +pub struct FinderBuilder { +    prefilter: Prefilter, +} + +impl FinderBuilder { +    /// Create a new finder builder with default settings. +    pub fn new() -> FinderBuilder { +        FinderBuilder::default() +    } + +    /// Build a forward finder using the given needle from the current +    /// settings. +    pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>( +        &self, +        needle: &'n B, +    ) -> Finder<'n> { +        self.build_forward_with_ranker(DefaultFrequencyRank, needle) +    } + +    /// Build a forward finder using the given needle and a custom heuristic for +    /// determining the frequency of a given byte in the dataset. +    /// See [`HeuristicFrequencyRank`] for more details. +    pub fn build_forward_with_ranker< +        'n, +        R: HeuristicFrequencyRank, +        B: ?Sized + AsRef<[u8]>, +    >( +        &self, +        ranker: R, +        needle: &'n B, +    ) -> Finder<'n> { +        let needle = needle.as_ref(); +        Finder { +            needle: CowBytes::new(needle), +            searcher: Searcher::new(self.prefilter, ranker, needle), +        } +    } + +    /// Build a reverse finder using the given needle from the current +    /// settings. +    pub fn build_reverse<'n, B: ?Sized + AsRef<[u8]>>( +        &self, +        needle: &'n B, +    ) -> FinderRev<'n> { +        let needle = needle.as_ref(); +        FinderRev { +            needle: CowBytes::new(needle), +            searcher: SearcherRev::new(needle), +        } +    } + +    /// Configure the prefilter setting for the finder. +    /// +    /// See the documentation for [`Prefilter`] for more discussion on why +    /// you might want to configure this. +    pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder { +        self.prefilter = prefilter; +        self +    } +} + +#[cfg(test)] +mod tests { +    use super::*; + +    define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n).find(h))); +    define_substring_reverse_quickcheck!(|h, n| Some( +        FinderRev::new(n).rfind(h) +    )); + +    #[test] +    fn forward() { +        crate::tests::substring::Runner::new() +            .fwd(|h, n| Some(Finder::new(n).find(h))) +            .run(); +    } + +    #[test] +    fn reverse() { +        crate::tests::substring::Runner::new() +            .rev(|h, n| Some(FinderRev::new(n).rfind(h))) +            .run(); +    } +} diff --git a/vendor/memchr/src/memmem/searcher.rs b/vendor/memchr/src/memmem/searcher.rs new file mode 100644 index 0000000..98b9bd6 --- /dev/null +++ b/vendor/memchr/src/memmem/searcher.rs @@ -0,0 +1,1030 @@ +use crate::arch::all::{ +    packedpair::{HeuristicFrequencyRank, Pair}, +    rabinkarp, twoway, +}; + +#[cfg(target_arch = "aarch64")] +use crate::arch::aarch64::neon::packedpair as neon; +#[cfg(target_arch = "wasm32")] +use crate::arch::wasm32::simd128::packedpair as simd128; +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +use crate::arch::x86_64::{ +    avx2::packedpair as avx2, sse2::packedpair as sse2, +}; + +/// A "meta" substring searcher. +/// +/// To a first approximation, this chooses what it believes to be the "best" +/// substring search implemnetation based on the needle at construction time. +/// Then, every call to `find` will execute that particular implementation. To +/// a second approximation, multiple substring search algorithms may be used, +/// depending on the haystack. For example, for supremely short haystacks, +/// Rabin-Karp is typically used. +/// +/// See the documentation on `Prefilter` for an explanation of the dispatching +/// mechanism. The quick summary is that an enum has too much overhead and +/// we can't use dynamic dispatch via traits because we need to work in a +/// core-only environment. (Dynamic dispatch works in core-only, but you +/// need `&dyn Trait` and we really need a `Box<dyn Trait>` here. The latter +/// requires `alloc`.) So instead, we use a union and an appropriately paired +/// free function to read from the correct field on the union and execute the +/// chosen substring search implementation. +#[derive(Clone)] +pub(crate) struct Searcher { +    call: SearcherKindFn, +    kind: SearcherKind, +    rabinkarp: rabinkarp::Finder, +} + +impl Searcher { +    /// Creates a new "meta" substring searcher that attempts to choose the +    /// best algorithm based on the needle, heuristics and what the current +    /// target supports. +    #[inline] +    pub(crate) fn new<R: HeuristicFrequencyRank>( +        prefilter: PrefilterConfig, +        ranker: R, +        needle: &[u8], +    ) -> Searcher { +        let rabinkarp = rabinkarp::Finder::new(needle); +        if needle.len() <= 1 { +            return if needle.is_empty() { +                trace!("building empty substring searcher"); +                Searcher { +                    call: searcher_kind_empty, +                    kind: SearcherKind { empty: () }, +                    rabinkarp, +                } +            } else { +                trace!("building one-byte substring searcher"); +                debug_assert_eq!(1, needle.len()); +                Searcher { +                    call: searcher_kind_one_byte, +                    kind: SearcherKind { one_byte: needle[0] }, +                    rabinkarp, +                } +            }; +        } +        let pair = match Pair::with_ranker(needle, &ranker) { +            Some(pair) => pair, +            None => return Searcher::twoway(needle, rabinkarp, None), +        }; +        debug_assert_ne!( +            pair.index1(), +            pair.index2(), +            "pair offsets should not be equivalent" +        ); +        #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +        { +            if let Some(pp) = avx2::Finder::with_pair(needle, pair) { +                if do_packed_search(needle) { +                    trace!("building x86_64 AVX2 substring searcher"); +                    let kind = SearcherKind { avx2: pp }; +                    Searcher { call: searcher_kind_avx2, kind, rabinkarp } +                } else if prefilter.is_none() { +                    Searcher::twoway(needle, rabinkarp, None) +                } else { +                    let prestrat = Prefilter::avx2(pp, needle); +                    Searcher::twoway(needle, rabinkarp, Some(prestrat)) +                } +            } else if let Some(pp) = sse2::Finder::with_pair(needle, pair) { +                if do_packed_search(needle) { +                    trace!("building x86_64 SSE2 substring searcher"); +                    let kind = SearcherKind { sse2: pp }; +                    Searcher { call: searcher_kind_sse2, kind, rabinkarp } +                } else if prefilter.is_none() { +                    Searcher::twoway(needle, rabinkarp, None) +                } else { +                    let prestrat = Prefilter::sse2(pp, needle); +                    Searcher::twoway(needle, rabinkarp, Some(prestrat)) +                } +            } else if prefilter.is_none() { +                Searcher::twoway(needle, rabinkarp, None) +            } else { +                // We're pretty unlikely to get to this point, but it is +                // possible to be running on x86_64 without SSE2. Namely, it's +                // really up to the OS whether it wants to support vector +                // registers or not. +                let prestrat = Prefilter::fallback(ranker, pair, needle); +                Searcher::twoway(needle, rabinkarp, prestrat) +            } +        } +        #[cfg(target_arch = "wasm32")] +        { +            if let Some(pp) = simd128::Finder::with_pair(needle, pair) { +                if do_packed_search(needle) { +                    trace!("building wasm32 simd128 substring searcher"); +                    let kind = SearcherKind { simd128: pp }; +                    Searcher { call: searcher_kind_simd128, kind, rabinkarp } +                } else if prefilter.is_none() { +                    Searcher::twoway(needle, rabinkarp, None) +                } else { +                    let prestrat = Prefilter::simd128(pp, needle); +                    Searcher::twoway(needle, rabinkarp, Some(prestrat)) +                } +            } else if prefilter.is_none() { +                Searcher::twoway(needle, rabinkarp, None) +            } else { +                let prestrat = Prefilter::fallback(ranker, pair, needle); +                Searcher::twoway(needle, rabinkarp, prestrat) +            } +        } +        #[cfg(target_arch = "aarch64")] +        { +            if let Some(pp) = neon::Finder::with_pair(needle, pair) { +                if do_packed_search(needle) { +                    trace!("building aarch64 neon substring searcher"); +                    let kind = SearcherKind { neon: pp }; +                    Searcher { call: searcher_kind_neon, kind, rabinkarp } +                } else if prefilter.is_none() { +                    Searcher::twoway(needle, rabinkarp, None) +                } else { +                    let prestrat = Prefilter::neon(pp, needle); +                    Searcher::twoway(needle, rabinkarp, Some(prestrat)) +                } +            } else if prefilter.is_none() { +                Searcher::twoway(needle, rabinkarp, None) +            } else { +                let prestrat = Prefilter::fallback(ranker, pair, needle); +                Searcher::twoway(needle, rabinkarp, prestrat) +            } +        } +        #[cfg(not(any( +            all(target_arch = "x86_64", target_feature = "sse2"), +            target_arch = "wasm32", +            target_arch = "aarch64" +        )))] +        { +            if prefilter.is_none() { +                Searcher::twoway(needle, rabinkarp, None) +            } else { +                let prestrat = Prefilter::fallback(ranker, pair, needle); +                Searcher::twoway(needle, rabinkarp, prestrat) +            } +        } +    } + +    /// Creates a new searcher that always uses the Two-Way algorithm. This is +    /// typically used when vector algorithms are unavailable or inappropriate. +    /// (For example, when the needle is "too long.") +    /// +    /// If a prefilter is given, then the searcher returned will be accelerated +    /// by the prefilter. +    #[inline] +    fn twoway( +        needle: &[u8], +        rabinkarp: rabinkarp::Finder, +        prestrat: Option<Prefilter>, +    ) -> Searcher { +        let finder = twoway::Finder::new(needle); +        match prestrat { +            None => { +                trace!("building scalar two-way substring searcher"); +                let kind = SearcherKind { two_way: finder }; +                Searcher { call: searcher_kind_two_way, kind, rabinkarp } +            } +            Some(prestrat) => { +                trace!( +                    "building scalar two-way \ +                     substring searcher with a prefilter" +                ); +                let two_way_with_prefilter = +                    TwoWayWithPrefilter { finder, prestrat }; +                let kind = SearcherKind { two_way_with_prefilter }; +                Searcher { +                    call: searcher_kind_two_way_with_prefilter, +                    kind, +                    rabinkarp, +                } +            } +        } +    } + +    /// Searches the given haystack for the given needle. The needle given +    /// should be the same as the needle that this finder was initialized +    /// with. +    /// +    /// Inlining this can lead to big wins for latency, and #[inline] doesn't +    /// seem to be enough in some cases. +    #[inline(always)] +    pub(crate) fn find( +        &self, +        prestate: &mut PrefilterState, +        haystack: &[u8], +        needle: &[u8], +    ) -> Option<usize> { +        if haystack.len() < needle.len() { +            None +        } else { +            // SAFETY: By construction, we've ensured that the function +            // in `self.call` is properly paired with the union used in +            // `self.kind`. +            unsafe { (self.call)(self, prestate, haystack, needle) } +        } +    } +} + +impl core::fmt::Debug for Searcher { +    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { +        f.debug_struct("Searcher") +            .field("call", &"<searcher function>") +            .field("kind", &"<searcher kind union>") +            .field("rabinkarp", &self.rabinkarp) +            .finish() +    } +} + +/// A union indicating one of several possible substring search implementations +/// that are in active use. +/// +/// This union should only be read by one of the functions prefixed with +/// `searcher_kind_`. Namely, the correct function is meant to be paired with +/// the union by the caller, such that the function always reads from the +/// designated union field. +#[derive(Clone, Copy)] +union SearcherKind { +    empty: (), +    one_byte: u8, +    two_way: twoway::Finder, +    two_way_with_prefilter: TwoWayWithPrefilter, +    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +    sse2: crate::arch::x86_64::sse2::packedpair::Finder, +    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +    avx2: crate::arch::x86_64::avx2::packedpair::Finder, +    #[cfg(target_arch = "wasm32")] +    simd128: crate::arch::wasm32::simd128::packedpair::Finder, +    #[cfg(target_arch = "aarch64")] +    neon: crate::arch::aarch64::neon::packedpair::Finder, +} + +/// A two-way substring searcher with a prefilter. +#[derive(Copy, Clone, Debug)] +struct TwoWayWithPrefilter { +    finder: twoway::Finder, +    prestrat: Prefilter, +} + +/// The type of a substring search function. +/// +/// # Safety +/// +/// When using a function of this type, callers must ensure that the correct +/// function is paired with the value populated in `SearcherKind` union. +type SearcherKindFn = unsafe fn( +    searcher: &Searcher, +    prestate: &mut PrefilterState, +    haystack: &[u8], +    needle: &[u8], +) -> Option<usize>; + +/// Reads from the `empty` field of `SearcherKind` to handle the case of +/// searching for the empty needle. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.empty` union field is set. +unsafe fn searcher_kind_empty( +    _searcher: &Searcher, +    _prestate: &mut PrefilterState, +    _haystack: &[u8], +    _needle: &[u8], +) -> Option<usize> { +    Some(0) +} + +/// Reads from the `one_byte` field of `SearcherKind` to handle the case of +/// searching for a single byte needle. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.one_byte` union field is set. +unsafe fn searcher_kind_one_byte( +    searcher: &Searcher, +    _prestate: &mut PrefilterState, +    haystack: &[u8], +    _needle: &[u8], +) -> Option<usize> { +    let needle = searcher.kind.one_byte; +    crate::memchr(needle, haystack) +} + +/// Reads from the `two_way` field of `SearcherKind` to handle the case of +/// searching for an arbitrary needle without prefilter acceleration. Works on +/// all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.two_way` union field is set. +unsafe fn searcher_kind_two_way( +    searcher: &Searcher, +    _prestate: &mut PrefilterState, +    haystack: &[u8], +    needle: &[u8], +) -> Option<usize> { +    if rabinkarp::is_fast(haystack, needle) { +        searcher.rabinkarp.find(haystack, needle) +    } else { +        searcher.kind.two_way.find(haystack, needle) +    } +} + +/// Reads from the `two_way_with_prefilter` field of `SearcherKind` to handle +/// the case of searching for an arbitrary needle with prefilter acceleration. +/// Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.two_way_with_prefilter` union +/// field is set. +unsafe fn searcher_kind_two_way_with_prefilter( +    searcher: &Searcher, +    prestate: &mut PrefilterState, +    haystack: &[u8], +    needle: &[u8], +) -> Option<usize> { +    if rabinkarp::is_fast(haystack, needle) { +        searcher.rabinkarp.find(haystack, needle) +    } else { +        let TwoWayWithPrefilter { ref finder, ref prestrat } = +            searcher.kind.two_way_with_prefilter; +        let pre = Pre { prestate, prestrat }; +        finder.find_with_prefilter(Some(pre), haystack, needle) +    } +} + +/// Reads from the `sse2` field of `SearcherKind` to execute the x86_64 SSE2 +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.sse2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn searcher_kind_sse2( +    searcher: &Searcher, +    _prestate: &mut PrefilterState, +    haystack: &[u8], +    needle: &[u8], +) -> Option<usize> { +    let finder = &searcher.kind.sse2; +    if haystack.len() < finder.min_haystack_len() { +        searcher.rabinkarp.find(haystack, needle) +    } else { +        finder.find(haystack, needle) +    } +} + +/// Reads from the `avx2` field of `SearcherKind` to execute the x86_64 AVX2 +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.avx2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn searcher_kind_avx2( +    searcher: &Searcher, +    _prestate: &mut PrefilterState, +    haystack: &[u8], +    needle: &[u8], +) -> Option<usize> { +    let finder = &searcher.kind.avx2; +    if haystack.len() < finder.min_haystack_len() { +        searcher.rabinkarp.find(haystack, needle) +    } else { +        finder.find(haystack, needle) +    } +} + +/// Reads from the `simd128` field of `SearcherKind` to execute the wasm32 +/// simd128 vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.simd128` union field is set. +#[cfg(target_arch = "wasm32")] +unsafe fn searcher_kind_simd128( +    searcher: &Searcher, +    _prestate: &mut PrefilterState, +    haystack: &[u8], +    needle: &[u8], +) -> Option<usize> { +    let finder = &searcher.kind.simd128; +    if haystack.len() < finder.min_haystack_len() { +        searcher.rabinkarp.find(haystack, needle) +    } else { +        finder.find(haystack, needle) +    } +} + +/// Reads from the `neon` field of `SearcherKind` to execute the aarch64 neon +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.neon` union field is set. +#[cfg(target_arch = "aarch64")] +unsafe fn searcher_kind_neon( +    searcher: &Searcher, +    _prestate: &mut PrefilterState, +    haystack: &[u8], +    needle: &[u8], +) -> Option<usize> { +    let finder = &searcher.kind.neon; +    if haystack.len() < finder.min_haystack_len() { +        searcher.rabinkarp.find(haystack, needle) +    } else { +        finder.find(haystack, needle) +    } +} + +/// A reverse substring searcher. +#[derive(Clone, Debug)] +pub(crate) struct SearcherRev { +    kind: SearcherRevKind, +    rabinkarp: rabinkarp::FinderRev, +} + +/// The kind of the reverse searcher. +/// +/// For the reverse case, we don't do any SIMD acceleration or prefilters. +/// There is no specific technical reason why we don't, but rather don't do it +/// because it's not clear it's worth the extra code to do so. If you have a +/// use case for it, please file an issue. +/// +/// We also don't do the union trick as we do with the forward case and +/// prefilters. Basically for the same reason we don't have prefilters or +/// vector algorithms for reverse searching: it's not clear it's worth doing. +/// Please file an issue if you have a compelling use case for fast reverse +/// substring search. +#[derive(Clone, Debug)] +enum SearcherRevKind { +    Empty, +    OneByte { needle: u8 }, +    TwoWay { finder: twoway::FinderRev }, +} + +impl SearcherRev { +    /// Creates a new searcher for finding occurrences of the given needle in +    /// reverse. That is, it reports the last (instead of the first) occurrence +    /// of a needle in a haystack. +    #[inline] +    pub(crate) fn new(needle: &[u8]) -> SearcherRev { +        let kind = if needle.len() <= 1 { +            if needle.is_empty() { +                trace!("building empty reverse substring searcher"); +                SearcherRevKind::Empty +            } else { +                trace!("building one-byte reverse substring searcher"); +                debug_assert_eq!(1, needle.len()); +                SearcherRevKind::OneByte { needle: needle[0] } +            } +        } else { +            trace!("building scalar two-way reverse substring searcher"); +            let finder = twoway::FinderRev::new(needle); +            SearcherRevKind::TwoWay { finder } +        }; +        let rabinkarp = rabinkarp::FinderRev::new(needle); +        SearcherRev { kind, rabinkarp } +    } + +    /// Searches the given haystack for the last occurrence of the given +    /// needle. The needle given should be the same as the needle that this +    /// finder was initialized with. +    #[inline] +    pub(crate) fn rfind( +        &self, +        haystack: &[u8], +        needle: &[u8], +    ) -> Option<usize> { +        if haystack.len() < needle.len() { +            return None; +        } +        match self.kind { +            SearcherRevKind::Empty => Some(haystack.len()), +            SearcherRevKind::OneByte { needle } => { +                crate::memrchr(needle, haystack) +            } +            SearcherRevKind::TwoWay { ref finder } => { +                if rabinkarp::is_fast(haystack, needle) { +                    self.rabinkarp.rfind(haystack, needle) +                } else { +                    finder.rfind(haystack, needle) +                } +            } +        } +    } +} + +/// Prefilter controls whether heuristics are used to accelerate searching. +/// +/// A prefilter refers to the idea of detecting candidate matches very quickly, +/// and then confirming whether those candidates are full matches. This +/// idea can be quite effective since it's often the case that looking for +/// candidates can be a lot faster than running a complete substring search +/// over the entire input. Namely, looking for candidates can be done with +/// extremely fast vectorized code. +/// +/// The downside of a prefilter is that it assumes false positives (which are +/// candidates generated by a prefilter that aren't matches) are somewhat rare +/// relative to the frequency of full matches. That is, if a lot of false +/// positives are generated, then it's possible for search time to be worse +/// than if the prefilter wasn't enabled in the first place. +/// +/// Another downside of a prefilter is that it can result in highly variable +/// performance, where some cases are extraordinarily fast and others aren't. +/// Typically, variable performance isn't a problem, but it may be for your use +/// case. +/// +/// The use of prefilters in this implementation does use a heuristic to detect +/// when a prefilter might not be carrying its weight, and will dynamically +/// disable its use. Nevertheless, this configuration option gives callers +/// the ability to disable prefilters if you have knowledge that they won't be +/// useful. +#[derive(Clone, Copy, Debug)] +#[non_exhaustive] +pub enum PrefilterConfig { +    /// Never used a prefilter in substring search. +    None, +    /// Automatically detect whether a heuristic prefilter should be used. If +    /// it is used, then heuristics will be used to dynamically disable the +    /// prefilter if it is believed to not be carrying its weight. +    Auto, +} + +impl Default for PrefilterConfig { +    fn default() -> PrefilterConfig { +        PrefilterConfig::Auto +    } +} + +impl PrefilterConfig { +    /// Returns true when this prefilter is set to the `None` variant. +    fn is_none(&self) -> bool { +        matches!(*self, PrefilterConfig::None) +    } +} + +/// The implementation of a prefilter. +/// +/// This type encapsulates dispatch to one of several possible choices for a +/// prefilter. Generally speaking, all prefilters have the same approximate +/// algorithm: they choose a couple of bytes from the needle that are believed +/// to be rare, use a fast vector algorithm to look for those bytes and return +/// positions as candidates for some substring search algorithm (currently only +/// Two-Way) to confirm as a match or not. +/// +/// The differences between the algorithms are actually at the vector +/// implementation level. Namely, we need different routines based on both +/// which target architecture we're on and what CPU features are supported. +/// +/// The straight-forwardly obvious approach here is to use an enum, and make +/// `Prefilter::find` do case analysis to determine which algorithm was +/// selected and invoke it. However, I've observed that this leads to poor +/// codegen in some cases, especially in latency sensitive benchmarks. That is, +/// this approach comes with overhead that I wasn't able to eliminate. +/// +/// The second obvious approach is to use dynamic dispatch with traits. Doing +/// that in this context where `Prefilter` owns the selection generally +/// requires heap allocation, and this code is designed to run in core-only +/// environments. +/// +/// So we settle on using a union (that's `PrefilterKind`) and a function +/// pointer (that's `PrefilterKindFn`). We select the right function pointer +/// based on which field in the union we set, and that function in turn +/// knows which field of the union to access. The downside of this approach +/// is that it forces us to think about safety, but the upside is that +/// there are some nice latency improvements to benchmarks. (Especially the +/// `memmem/sliceslice/short` benchmark.) +/// +/// In cases where we've selected a vector algorithm and the haystack given +/// is too short, we fallback to the scalar version of `memchr` on the +/// `rarest_byte`. (The scalar version of `memchr` is still better than a naive +/// byte-at-a-time loop because it will read in `usize`-sized chunks at a +/// time.) +#[derive(Clone, Copy)] +struct Prefilter { +    call: PrefilterKindFn, +    kind: PrefilterKind, +    rarest_byte: u8, +    rarest_offset: u8, +} + +impl Prefilter { +    /// Return a "fallback" prefilter, but only if it is believed to be +    /// effective. +    #[inline] +    fn fallback<R: HeuristicFrequencyRank>( +        ranker: R, +        pair: Pair, +        needle: &[u8], +    ) -> Option<Prefilter> { +        /// The maximum frequency rank permitted for the fallback prefilter. +        /// If the rarest byte in the needle has a frequency rank above this +        /// value, then no prefilter is used if the fallback prefilter would +        /// otherwise be selected. +        const MAX_FALLBACK_RANK: u8 = 250; + +        trace!("building fallback prefilter"); +        let rarest_offset = pair.index1(); +        let rarest_byte = needle[usize::from(rarest_offset)]; +        let rarest_rank = ranker.rank(rarest_byte); +        if rarest_rank > MAX_FALLBACK_RANK { +            None +        } else { +            let finder = crate::arch::all::packedpair::Finder::with_pair( +                needle, +                pair.clone(), +            )?; +            let call = prefilter_kind_fallback; +            let kind = PrefilterKind { fallback: finder }; +            Some(Prefilter { call, kind, rarest_byte, rarest_offset }) +        } +    } + +    /// Return a prefilter using a x86_64 SSE2 vector algorithm. +    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +    #[inline] +    fn sse2(finder: sse2::Finder, needle: &[u8]) -> Prefilter { +        trace!("building x86_64 SSE2 prefilter"); +        let rarest_offset = finder.pair().index1(); +        let rarest_byte = needle[usize::from(rarest_offset)]; +        Prefilter { +            call: prefilter_kind_sse2, +            kind: PrefilterKind { sse2: finder }, +            rarest_byte, +            rarest_offset, +        } +    } + +    /// Return a prefilter using a x86_64 AVX2 vector algorithm. +    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +    #[inline] +    fn avx2(finder: avx2::Finder, needle: &[u8]) -> Prefilter { +        trace!("building x86_64 AVX2 prefilter"); +        let rarest_offset = finder.pair().index1(); +        let rarest_byte = needle[usize::from(rarest_offset)]; +        Prefilter { +            call: prefilter_kind_avx2, +            kind: PrefilterKind { avx2: finder }, +            rarest_byte, +            rarest_offset, +        } +    } + +    /// Return a prefilter using a wasm32 simd128 vector algorithm. +    #[cfg(target_arch = "wasm32")] +    #[inline] +    fn simd128(finder: simd128::Finder, needle: &[u8]) -> Prefilter { +        trace!("building wasm32 simd128 prefilter"); +        let rarest_offset = finder.pair().index1(); +        let rarest_byte = needle[usize::from(rarest_offset)]; +        Prefilter { +            call: prefilter_kind_simd128, +            kind: PrefilterKind { simd128: finder }, +            rarest_byte, +            rarest_offset, +        } +    } + +    /// Return a prefilter using a aarch64 neon vector algorithm. +    #[cfg(target_arch = "aarch64")] +    #[inline] +    fn neon(finder: neon::Finder, needle: &[u8]) -> Prefilter { +        trace!("building aarch64 neon prefilter"); +        let rarest_offset = finder.pair().index1(); +        let rarest_byte = needle[usize::from(rarest_offset)]; +        Prefilter { +            call: prefilter_kind_neon, +            kind: PrefilterKind { neon: finder }, +            rarest_byte, +            rarest_offset, +        } +    } + +    /// Return a *candidate* position for a match. +    /// +    /// When this returns an offset, it implies that a match could begin at +    /// that offset, but it may not. That is, it is possible for a false +    /// positive to be returned. +    /// +    /// When `None` is returned, then it is guaranteed that there are no +    /// matches for the needle in the given haystack. That is, it is impossible +    /// for a false negative to be returned. +    /// +    /// The purpose of this routine is to look for candidate matching positions +    /// as quickly as possible before running a (likely) slower confirmation +    /// step. +    #[inline] +    fn find(&self, haystack: &[u8]) -> Option<usize> { +        // SAFETY: By construction, we've ensured that the function in +        // `self.call` is properly paired with the union used in `self.kind`. +        unsafe { (self.call)(self, haystack) } +    } + +    /// A "simple" prefilter that just looks for the occurrence of the rarest +    /// byte from the needle. This is generally only used for very small +    /// haystacks. +    #[inline] +    fn find_simple(&self, haystack: &[u8]) -> Option<usize> { +        // We don't use crate::memchr here because the haystack should be small +        // enough that memchr won't be able to use vector routines anyway. So +        // we just skip straight to the fallback implementation which is likely +        // faster. (A byte-at-a-time loop is only used when the haystack is +        // smaller than `size_of::<usize>()`.) +        crate::arch::all::memchr::One::new(self.rarest_byte) +            .find(haystack) +            .map(|i| i.saturating_sub(usize::from(self.rarest_offset))) +    } +} + +impl core::fmt::Debug for Prefilter { +    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { +        f.debug_struct("Prefilter") +            .field("call", &"<prefilter function>") +            .field("kind", &"<prefilter kind union>") +            .field("rarest_byte", &self.rarest_byte) +            .field("rarest_offset", &self.rarest_offset) +            .finish() +    } +} + +/// A union indicating one of several possible prefilters that are in active +/// use. +/// +/// This union should only be read by one of the functions prefixed with +/// `prefilter_kind_`. Namely, the correct function is meant to be paired with +/// the union by the caller, such that the function always reads from the +/// designated union field. +#[derive(Clone, Copy)] +union PrefilterKind { +    fallback: crate::arch::all::packedpair::Finder, +    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +    sse2: crate::arch::x86_64::sse2::packedpair::Finder, +    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +    avx2: crate::arch::x86_64::avx2::packedpair::Finder, +    #[cfg(target_arch = "wasm32")] +    simd128: crate::arch::wasm32::simd128::packedpair::Finder, +    #[cfg(target_arch = "aarch64")] +    neon: crate::arch::aarch64::neon::packedpair::Finder, +} + +/// The type of a prefilter function. +/// +/// # Safety +/// +/// When using a function of this type, callers must ensure that the correct +/// function is paired with the value populated in `PrefilterKind` union. +type PrefilterKindFn = +    unsafe fn(strat: &Prefilter, haystack: &[u8]) -> Option<usize>; + +/// Reads from the `fallback` field of `PrefilterKind` to execute the fallback +/// prefilter. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.fallback` union field is set. +unsafe fn prefilter_kind_fallback( +    strat: &Prefilter, +    haystack: &[u8], +) -> Option<usize> { +    strat.kind.fallback.find_prefilter(haystack) +} + +/// Reads from the `sse2` field of `PrefilterKind` to execute the x86_64 SSE2 +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.sse2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn prefilter_kind_sse2( +    strat: &Prefilter, +    haystack: &[u8], +) -> Option<usize> { +    let finder = &strat.kind.sse2; +    if haystack.len() < finder.min_haystack_len() { +        strat.find_simple(haystack) +    } else { +        finder.find_prefilter(haystack) +    } +} + +/// Reads from the `avx2` field of `PrefilterKind` to execute the x86_64 AVX2 +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.avx2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn prefilter_kind_avx2( +    strat: &Prefilter, +    haystack: &[u8], +) -> Option<usize> { +    let finder = &strat.kind.avx2; +    if haystack.len() < finder.min_haystack_len() { +        strat.find_simple(haystack) +    } else { +        finder.find_prefilter(haystack) +    } +} + +/// Reads from the `simd128` field of `PrefilterKind` to execute the wasm32 +/// simd128 prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.simd128` union field is set. +#[cfg(target_arch = "wasm32")] +unsafe fn prefilter_kind_simd128( +    strat: &Prefilter, +    haystack: &[u8], +) -> Option<usize> { +    let finder = &strat.kind.simd128; +    if haystack.len() < finder.min_haystack_len() { +        strat.find_simple(haystack) +    } else { +        finder.find_prefilter(haystack) +    } +} + +/// Reads from the `neon` field of `PrefilterKind` to execute the aarch64 neon +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.neon` union field is set. +#[cfg(target_arch = "aarch64")] +unsafe fn prefilter_kind_neon( +    strat: &Prefilter, +    haystack: &[u8], +) -> Option<usize> { +    let finder = &strat.kind.neon; +    if haystack.len() < finder.min_haystack_len() { +        strat.find_simple(haystack) +    } else { +        finder.find_prefilter(haystack) +    } +} + +/// PrefilterState tracks state associated with the effectiveness of a +/// prefilter. It is used to track how many bytes, on average, are skipped by +/// the prefilter. If this average dips below a certain threshold over time, +/// then the state renders the prefilter inert and stops using it. +/// +/// A prefilter state should be created for each search. (Where creating an +/// iterator is treated as a single search.) A prefilter state should only be +/// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert +/// `PrefilterState`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct PrefilterState { +    /// The number of skips that has been executed. This is always 1 greater +    /// than the actual number of skips. The special sentinel value of 0 +    /// indicates that the prefilter is inert. This is useful to avoid +    /// additional checks to determine whether the prefilter is still +    /// "effective." Once a prefilter becomes inert, it should no longer be +    /// used (according to our heuristics). +    skips: u32, +    /// The total number of bytes that have been skipped. +    skipped: u32, +} + +impl PrefilterState { +    /// The minimum number of skip attempts to try before considering whether +    /// a prefilter is effective or not. +    const MIN_SKIPS: u32 = 50; + +    /// The minimum amount of bytes that skipping must average. +    /// +    /// This value was chosen based on varying it and checking +    /// the microbenchmarks. In particular, this can impact the +    /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set +    /// too low. +    const MIN_SKIP_BYTES: u32 = 8; + +    /// Create a fresh prefilter state. +    #[inline] +    pub(crate) fn new() -> PrefilterState { +        PrefilterState { skips: 1, skipped: 0 } +    } + +    /// Update this state with the number of bytes skipped on the last +    /// invocation of the prefilter. +    #[inline] +    fn update(&mut self, skipped: usize) { +        self.skips = self.skips.saturating_add(1); +        // We need to do this dance since it's technically possible for +        // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the +        // size of a prefilter state.) +        self.skipped = match u32::try_from(skipped) { +            Err(_) => core::u32::MAX, +            Ok(skipped) => self.skipped.saturating_add(skipped), +        }; +    } + +    /// Return true if and only if this state indicates that a prefilter is +    /// still effective. +    #[inline] +    fn is_effective(&mut self) -> bool { +        if self.is_inert() { +            return false; +        } +        if self.skips() < PrefilterState::MIN_SKIPS { +            return true; +        } +        if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() { +            return true; +        } + +        // We're inert. +        self.skips = 0; +        false +    } + +    /// Returns true if the prefilter this state represents should no longer +    /// be used. +    #[inline] +    fn is_inert(&self) -> bool { +        self.skips == 0 +    } + +    /// Returns the total number of times the prefilter has been used. +    #[inline] +    fn skips(&self) -> u32 { +        // Remember, `0` is a sentinel value indicating inertness, so we +        // always need to subtract `1` to get our actual number of skips. +        self.skips.saturating_sub(1) +    } +} + +/// A combination of prefilter effectiveness state and the prefilter itself. +#[derive(Debug)] +pub(crate) struct Pre<'a> { +    /// State that tracks the effectiveness of a prefilter. +    prestate: &'a mut PrefilterState, +    /// The actual prefilter. +    prestrat: &'a Prefilter, +} + +impl<'a> Pre<'a> { +    /// Call this prefilter on the given haystack with the given needle. +    #[inline] +    pub(crate) fn find(&mut self, haystack: &[u8]) -> Option<usize> { +        let result = self.prestrat.find(haystack); +        self.prestate.update(result.unwrap_or(haystack.len())); +        result +    } + +    /// Return true if and only if this prefilter should be used. +    #[inline] +    pub(crate) fn is_effective(&mut self) -> bool { +        self.prestate.is_effective() +    } +} + +/// Returns true if the needle has the right characteristics for a vector +/// algorithm to handle the entirety of substring search. +/// +/// Vector algorithms can be used for prefilters for other substring search +/// algorithms (like Two-Way), but they can also be used for substring search +/// on their own. When used for substring search, vector algorithms will +/// quickly identify candidate match positions (just like in the prefilter +/// case), but instead of returning the candidate position they will try to +/// confirm the match themselves. Confirmation happens via `memcmp`. This +/// works well for short needles, but can break down when many false candidate +/// positions are generated for large needles. Thus, we only permit vector +/// algorithms to own substring search when the needle is of a certain length. +#[inline] +fn do_packed_search(needle: &[u8]) -> bool { +    /// The minimum length of a needle required for this algorithm. The minimum +    /// is 2 since a length of 1 should just use memchr and a length of 0 isn't +    /// a case handled by this searcher. +    const MIN_LEN: usize = 2; + +    /// The maximum length of a needle required for this algorithm. +    /// +    /// In reality, there is no hard max here. The code below can handle any +    /// length needle. (Perhaps that suggests there are missing optimizations.) +    /// Instead, this is a heuristic and a bound guaranteeing our linear time +    /// complexity. +    /// +    /// It is a heuristic because when a candidate match is found, memcmp is +    /// run. For very large needles with lots of false positives, memcmp can +    /// make the code run quite slow. +    /// +    /// It is a bound because the worst case behavior with memcmp is +    /// multiplicative in the size of the needle and haystack, and we want +    /// to keep that additive. This bound ensures we still meet that bound +    /// theoretically, since it's just a constant. We aren't acting in bad +    /// faith here, memcmp on tiny needles is so fast that even in pathological +    /// cases (see pathological vector benchmarks), this is still just as fast +    /// or faster in practice. +    /// +    /// This specific number was chosen by tweaking a bit and running +    /// benchmarks. The rare-medium-needle, for example, gets about 5% faster +    /// by using this algorithm instead of a prefilter-accelerated Two-Way. +    /// There's also a theoretical desire to keep this number reasonably +    /// low, to mitigate the impact of pathological cases. I did try 64, and +    /// some benchmarks got a little better, and others (particularly the +    /// pathological ones), got a lot worse. So... 32 it is? +    const MAX_LEN: usize = 32; +    MIN_LEN <= needle.len() && needle.len() <= MAX_LEN +} diff --git a/vendor/memchr/src/tests/memchr/mod.rs b/vendor/memchr/src/tests/memchr/mod.rs new file mode 100644 index 0000000..0564ad4 --- /dev/null +++ b/vendor/memchr/src/tests/memchr/mod.rs @@ -0,0 +1,307 @@ +use alloc::{ +    string::{String, ToString}, +    vec, +    vec::Vec, +}; + +use crate::ext::Byte; + +pub(crate) mod naive; +#[macro_use] +pub(crate) mod prop; + +const SEEDS: &'static [Seed] = &[ +    Seed { haystack: "a", needles: &[b'a'], positions: &[0] }, +    Seed { haystack: "aa", needles: &[b'a'], positions: &[0, 1] }, +    Seed { haystack: "aaa", needles: &[b'a'], positions: &[0, 1, 2] }, +    Seed { haystack: "", needles: &[b'a'], positions: &[] }, +    Seed { haystack: "z", needles: &[b'a'], positions: &[] }, +    Seed { haystack: "zz", needles: &[b'a'], positions: &[] }, +    Seed { haystack: "zza", needles: &[b'a'], positions: &[2] }, +    Seed { haystack: "zaza", needles: &[b'a'], positions: &[1, 3] }, +    Seed { haystack: "zzza", needles: &[b'a'], positions: &[3] }, +    Seed { haystack: "\x00a", needles: &[b'a'], positions: &[1] }, +    Seed { haystack: "\x00", needles: &[b'\x00'], positions: &[0] }, +    Seed { haystack: "\x00\x00", needles: &[b'\x00'], positions: &[0, 1] }, +    Seed { haystack: "\x00a\x00", needles: &[b'\x00'], positions: &[0, 2] }, +    Seed { haystack: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[16] }, +    Seed { +        haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza", +        needles: &[b'a'], +        positions: &[32], +    }, +    // two needles (applied to memchr2 + memchr3) +    Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, +    Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, +    Seed { haystack: "az", needles: &[b'x', b'y'], positions: &[] }, +    Seed { haystack: "az", needles: &[b'a', b'y'], positions: &[0] }, +    Seed { haystack: "az", needles: &[b'x', b'z'], positions: &[1] }, +    Seed { haystack: "yyyyaz", needles: &[b'a', b'z'], positions: &[4, 5] }, +    Seed { haystack: "yyyyaz", needles: &[b'z', b'a'], positions: &[4, 5] }, +    // three needles (applied to memchr3) +    Seed { +        haystack: "xyz", +        needles: &[b'x', b'y', b'z'], +        positions: &[0, 1, 2], +    }, +    Seed { +        haystack: "zxy", +        needles: &[b'x', b'y', b'z'], +        positions: &[0, 1, 2], +    }, +    Seed { haystack: "zxy", needles: &[b'x', b'a', b'z'], positions: &[0, 1] }, +    Seed { haystack: "zxy", needles: &[b't', b'a', b'z'], positions: &[0] }, +    Seed { haystack: "yxz", needles: &[b't', b'a', b'z'], positions: &[2] }, +]; + +/// Runs a host of substring search tests. +/// +/// This has support for "partial" substring search implementations only work +/// for a subset of needles/haystacks. For example, the "packed pair" substring +/// search implementation only works for haystacks of some minimum length based +/// of the pair of bytes selected and the size of the vector used. +pub(crate) struct Runner { +    needle_len: usize, +} + +impl Runner { +    /// Create a new test runner for forward and reverse byte search +    /// implementations. +    /// +    /// The `needle_len` given must be at most `3` and at least `1`. It +    /// corresponds to the number of needle bytes to search for. +    pub(crate) fn new(needle_len: usize) -> Runner { +        assert!(needle_len >= 1, "needle_len must be at least 1"); +        assert!(needle_len <= 3, "needle_len must be at most 3"); +        Runner { needle_len } +    } + +    /// Run all tests. This panics on the first failure. +    /// +    /// If the implementation being tested returns `None` for a particular +    /// haystack/needle combination, then that test is skipped. +    pub(crate) fn forward_iter<F>(self, mut test: F) +    where +        F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static, +    { +        for seed in SEEDS.iter() { +            if seed.needles.len() > self.needle_len { +                continue; +            } +            for t in seed.generate() { +                let results = match test(t.haystack.as_bytes(), &t.needles) { +                    None => continue, +                    Some(results) => results, +                }; +                assert_eq!( +                    t.expected, +                    results, +                    "needles: {:?}, haystack: {:?}", +                    t.needles +                        .iter() +                        .map(|&b| b.to_char()) +                        .collect::<Vec<char>>(), +                    t.haystack, +                ); +            } +        } +    } + +    /// Run all tests in the reverse direction. This panics on the first +    /// failure. +    /// +    /// If the implementation being tested returns `None` for a particular +    /// haystack/needle combination, then that test is skipped. +    pub(crate) fn reverse_iter<F>(self, mut test: F) +    where +        F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static, +    { +        for seed in SEEDS.iter() { +            if seed.needles.len() > self.needle_len { +                continue; +            } +            for t in seed.generate() { +                let mut results = match test(t.haystack.as_bytes(), &t.needles) +                { +                    None => continue, +                    Some(results) => results, +                }; +                results.reverse(); +                assert_eq!( +                    t.expected, +                    results, +                    "needles: {:?}, haystack: {:?}", +                    t.needles +                        .iter() +                        .map(|&b| b.to_char()) +                        .collect::<Vec<char>>(), +                    t.haystack, +                ); +            } +        } +    } + +    /// Run all tests as counting tests. This panics on the first failure. +    /// +    /// That is, this only checks that the number of matches is correct and +    /// not whether the offsets of each match are. +    pub(crate) fn count_iter<F>(self, mut test: F) +    where +        F: FnMut(&[u8], &[u8]) -> Option<usize> + 'static, +    { +        for seed in SEEDS.iter() { +            if seed.needles.len() > self.needle_len { +                continue; +            } +            for t in seed.generate() { +                let got = match test(t.haystack.as_bytes(), &t.needles) { +                    None => continue, +                    Some(got) => got, +                }; +                assert_eq!( +                    t.expected.len(), +                    got, +                    "needles: {:?}, haystack: {:?}", +                    t.needles +                        .iter() +                        .map(|&b| b.to_char()) +                        .collect::<Vec<char>>(), +                    t.haystack, +                ); +            } +        } +    } + +    /// Like `Runner::forward`, but for a function that returns only the next +    /// match and not all matches. +    /// +    /// If the function returns `None`, then it is skipped. +    pub(crate) fn forward_oneshot<F>(self, mut test: F) +    where +        F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, +    { +        self.forward_iter(move |haystack, needles| { +            let mut start = 0; +            let mut results = vec![]; +            while let Some(i) = test(&haystack[start..], needles)? { +                results.push(start + i); +                start += i + 1; +            } +            Some(results) +        }) +    } + +    /// Like `Runner::reverse`, but for a function that returns only the last +    /// match and not all matches. +    /// +    /// If the function returns `None`, then it is skipped. +    pub(crate) fn reverse_oneshot<F>(self, mut test: F) +    where +        F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, +    { +        self.reverse_iter(move |haystack, needles| { +            let mut end = haystack.len(); +            let mut results = vec![]; +            while let Some(i) = test(&haystack[..end], needles)? { +                results.push(i); +                end = i; +            } +            Some(results) +        }) +    } +} + +/// A single test for memr?chr{,2,3}. +#[derive(Clone, Debug)] +struct Test { +    /// The string to search in. +    haystack: String, +    /// The needles to look for. +    needles: Vec<u8>, +    /// The offsets that are expected to be found for all needles in the +    /// forward direction. +    expected: Vec<usize>, +} + +impl Test { +    fn new(seed: &Seed) -> Test { +        Test { +            haystack: seed.haystack.to_string(), +            needles: seed.needles.to_vec(), +            expected: seed.positions.to_vec(), +        } +    } +} + +/// Data that can be expanded into many memchr tests by padding out the corpus. +#[derive(Clone, Debug)] +struct Seed { +    /// The thing to search. We use `&str` instead of `&[u8]` because they +    /// are nicer to write in tests, and we don't miss much since memchr +    /// doesn't care about UTF-8. +    /// +    /// Corpora cannot contain either '%' or '#'. We use these bytes when +    /// expanding test cases into many test cases, and we assume they are not +    /// used. If they are used, `memchr_tests` will panic. +    haystack: &'static str, +    /// The needles to search for. This is intended to be an alternation of +    /// needles. The number of needles may cause this test to be skipped for +    /// some memchr variants. For example, a test with 2 needles cannot be used +    /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. +    /// However, a test with only 1 needle can be used to test all of `memchr`, +    /// `memchr2` and `memchr3`. We achieve this by filling in the needles with +    /// bytes that we never used in the corpus (such as '#'). +    needles: &'static [u8], +    /// The positions expected to match for all of the needles. +    positions: &'static [usize], +} + +impl Seed { +    /// Controls how much we expand the haystack on either side for each test. +    /// We lower this on Miri because otherwise running the tests would take +    /// forever. +    const EXPAND_LEN: usize = { +        #[cfg(not(miri))] +        { +            515 +        } +        #[cfg(miri)] +        { +            6 +        } +    }; + +    /// Expand this test into many variations of the same test. +    /// +    /// In particular, this will generate more tests with larger corpus sizes. +    /// The expected positions are updated to maintain the integrity of the +    /// test. +    /// +    /// This is important in testing a memchr implementation, because there are +    /// often different cases depending on the length of the corpus. +    /// +    /// Note that we extend the corpus by adding `%` bytes, which we +    /// don't otherwise use as a needle. +    fn generate(&self) -> impl Iterator<Item = Test> { +        let mut more = vec![]; + +        // Add bytes to the start of the corpus. +        for i in 0..Seed::EXPAND_LEN { +            let mut t = Test::new(self); +            let mut new: String = core::iter::repeat('%').take(i).collect(); +            new.push_str(&t.haystack); +            t.haystack = new; +            t.expected = t.expected.into_iter().map(|p| p + i).collect(); +            more.push(t); +        } +        // Add bytes to the end of the corpus. +        for i in 1..Seed::EXPAND_LEN { +            let mut t = Test::new(self); +            let padding: String = core::iter::repeat('%').take(i).collect(); +            t.haystack.push_str(&padding); +            more.push(t); +        } + +        more.into_iter() +    } +} diff --git a/vendor/memchr/src/tests/memchr/naive.rs b/vendor/memchr/src/tests/memchr/naive.rs new file mode 100644 index 0000000..6ebcdae --- /dev/null +++ b/vendor/memchr/src/tests/memchr/naive.rs @@ -0,0 +1,33 @@ +pub(crate) fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> { +    haystack.iter().position(|&b| b == n1) +} + +pub(crate) fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> { +    haystack.iter().position(|&b| b == n1 || b == n2) +} + +pub(crate) fn memchr3( +    n1: u8, +    n2: u8, +    n3: u8, +    haystack: &[u8], +) -> Option<usize> { +    haystack.iter().position(|&b| b == n1 || b == n2 || b == n3) +} + +pub(crate) fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> { +    haystack.iter().rposition(|&b| b == n1) +} + +pub(crate) fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> { +    haystack.iter().rposition(|&b| b == n1 || b == n2) +} + +pub(crate) fn memrchr3( +    n1: u8, +    n2: u8, +    n3: u8, +    haystack: &[u8], +) -> Option<usize> { +    haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3) +} diff --git a/vendor/memchr/src/tests/memchr/prop.rs b/vendor/memchr/src/tests/memchr/prop.rs new file mode 100644 index 0000000..b988260 --- /dev/null +++ b/vendor/memchr/src/tests/memchr/prop.rs @@ -0,0 +1,321 @@ +#[cfg(miri)] +#[macro_export] +macro_rules! define_memchr_quickcheck { +    ($($tt:tt)*) => {}; +} + +#[cfg(not(miri))] +#[macro_export] +macro_rules! define_memchr_quickcheck { +    ($mod:ident) => { +        define_memchr_quickcheck!($mod, new); +    }; +    ($mod:ident, $cons:ident) => { +        use alloc::vec::Vec; + +        use quickcheck::TestResult; + +        use crate::tests::memchr::{ +            naive, +            prop::{double_ended_take, naive1_iter, naive2_iter, naive3_iter}, +        }; + +        quickcheck::quickcheck! { +            fn qc_memchr_matches_naive(n1: u8, corpus: Vec<u8>) -> TestResult { +                let expected = naive::memchr(n1, &corpus); +                let got = match $mod::One::$cons(n1) { +                    None => return TestResult::discard(), +                    Some(f) => f.find(&corpus), +                }; +                TestResult::from_bool(expected == got) +            } + +            fn qc_memrchr_matches_naive(n1: u8, corpus: Vec<u8>) -> TestResult { +                let expected = naive::memrchr(n1, &corpus); +                let got = match $mod::One::$cons(n1) { +                    None => return TestResult::discard(), +                    Some(f) => f.rfind(&corpus), +                }; +                TestResult::from_bool(expected == got) +            } + +            fn qc_memchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> TestResult { +                let expected = naive::memchr2(n1, n2, &corpus); +                let got = match $mod::Two::$cons(n1, n2) { +                    None => return TestResult::discard(), +                    Some(f) => f.find(&corpus), +                }; +                TestResult::from_bool(expected == got) +            } + +            fn qc_memrchr2_matches_naive(n1: u8, n2: u8, corpus: Vec<u8>) -> TestResult { +                let expected = naive::memrchr2(n1, n2, &corpus); +                let got = match $mod::Two::$cons(n1, n2) { +                    None => return TestResult::discard(), +                    Some(f) => f.rfind(&corpus), +                }; +                TestResult::from_bool(expected == got) +            } + +            fn qc_memchr3_matches_naive( +                n1: u8, n2: u8, n3: u8, +                corpus: Vec<u8> +            ) -> TestResult { +                let expected = naive::memchr3(n1, n2, n3, &corpus); +                let got = match $mod::Three::$cons(n1, n2, n3) { +                    None => return TestResult::discard(), +                    Some(f) => f.find(&corpus), +                }; +                TestResult::from_bool(expected == got) +            } + +            fn qc_memrchr3_matches_naive( +                n1: u8, n2: u8, n3: u8, +                corpus: Vec<u8> +            ) -> TestResult { +                let expected = naive::memrchr3(n1, n2, n3, &corpus); +                let got = match $mod::Three::$cons(n1, n2, n3) { +                    None => return TestResult::discard(), +                    Some(f) => f.rfind(&corpus), +                }; +                TestResult::from_bool(expected == got) +            } + +            fn qc_memchr_double_ended_iter( +                needle: u8, data: Vec<u8>, take_side: Vec<bool> +            ) -> TestResult { +                // make nonempty +                let mut take_side = take_side; +                if take_side.is_empty() { take_side.push(true) }; + +                let finder = match $mod::One::$cons(needle) { +                    None => return TestResult::discard(), +                    Some(finder) => finder, +                }; +                let iter = finder.iter(&data); +                let got = double_ended_take( +                    iter, +                    take_side.iter().cycle().cloned(), +                ); +                let expected = naive1_iter(needle, &data); + +                TestResult::from_bool(got.iter().cloned().eq(expected)) +            } + +            fn qc_memchr2_double_ended_iter( +                needle1: u8, needle2: u8, data: Vec<u8>, take_side: Vec<bool> +            ) -> TestResult { +                // make nonempty +                let mut take_side = take_side; +                if take_side.is_empty() { take_side.push(true) }; + +                let finder = match $mod::Two::$cons(needle1, needle2) { +                    None => return TestResult::discard(), +                    Some(finder) => finder, +                }; +                let iter = finder.iter(&data); +                let got = double_ended_take( +                    iter, +                    take_side.iter().cycle().cloned(), +                ); +                let expected = naive2_iter(needle1, needle2, &data); + +                TestResult::from_bool(got.iter().cloned().eq(expected)) +            } + +            fn qc_memchr3_double_ended_iter( +                needle1: u8, needle2: u8, needle3: u8, +                data: Vec<u8>, take_side: Vec<bool> +            ) -> TestResult { +                // make nonempty +                let mut take_side = take_side; +                if take_side.is_empty() { take_side.push(true) }; + +                let finder = match $mod::Three::$cons(needle1, needle2, needle3) { +                    None => return TestResult::discard(), +                    Some(finder) => finder, +                }; +                let iter = finder.iter(&data); +                let got = double_ended_take( +                    iter, +                    take_side.iter().cycle().cloned(), +                ); +                let expected = naive3_iter(needle1, needle2, needle3, &data); + +                TestResult::from_bool(got.iter().cloned().eq(expected)) +            } + +            fn qc_memchr1_iter(data: Vec<u8>) -> TestResult { +                let needle = 0; +                let finder = match $mod::One::$cons(needle) { +                    None => return TestResult::discard(), +                    Some(finder) => finder, +                }; +                let got = finder.iter(&data); +                let expected = naive1_iter(needle, &data); +                TestResult::from_bool(got.eq(expected)) +            } + +            fn qc_memchr1_rev_iter(data: Vec<u8>) -> TestResult { +                let needle = 0; + +                let finder = match $mod::One::$cons(needle) { +                    None => return TestResult::discard(), +                    Some(finder) => finder, +                }; +                let got = finder.iter(&data).rev(); +                let expected = naive1_iter(needle, &data).rev(); +                TestResult::from_bool(got.eq(expected)) +            } + +            fn qc_memchr2_iter(data: Vec<u8>) -> TestResult { +                let needle1 = 0; +                let needle2 = 1; + +                let finder = match $mod::Two::$cons(needle1, needle2) { +                    None => return TestResult::discard(), +                    Some(finder) => finder, +                }; +                let got = finder.iter(&data); +                let expected = naive2_iter(needle1, needle2, &data); +                TestResult::from_bool(got.eq(expected)) +            } + +            fn qc_memchr2_rev_iter(data: Vec<u8>) -> TestResult { +                let needle1 = 0; +                let needle2 = 1; + +                let finder = match $mod::Two::$cons(needle1, needle2) { +                    None => return TestResult::discard(), +                    Some(finder) => finder, +                }; +                let got = finder.iter(&data).rev(); +                let expected = naive2_iter(needle1, needle2, &data).rev(); +                TestResult::from_bool(got.eq(expected)) +            } + +            fn qc_memchr3_iter(data: Vec<u8>) -> TestResult { +                let needle1 = 0; +                let needle2 = 1; +                let needle3 = 2; + +                let finder = match $mod::Three::$cons(needle1, needle2, needle3) { +                    None => return TestResult::discard(), +                    Some(finder) => finder, +                }; +                let got = finder.iter(&data); +                let expected = naive3_iter(needle1, needle2, needle3, &data); +                TestResult::from_bool(got.eq(expected)) +            } + +            fn qc_memchr3_rev_iter(data: Vec<u8>) -> TestResult { +                let needle1 = 0; +                let needle2 = 1; +                let needle3 = 2; + +                let finder = match $mod::Three::$cons(needle1, needle2, needle3) { +                    None => return TestResult::discard(), +                    Some(finder) => finder, +                }; +                let got = finder.iter(&data).rev(); +                let expected = naive3_iter(needle1, needle2, needle3, &data).rev(); +                TestResult::from_bool(got.eq(expected)) +            } + +            fn qc_memchr1_iter_size_hint(data: Vec<u8>) -> TestResult { +                // test that the size hint is within reasonable bounds +                let needle = 0; +                let finder = match $mod::One::$cons(needle) { +                    None => return TestResult::discard(), +                    Some(finder) => finder, +                }; +                let mut iter = finder.iter(&data); +                let mut real_count = data +                    .iter() +                    .filter(|&&elt| elt == needle) +                    .count(); + +                while let Some(index) = iter.next() { +                    real_count -= 1; +                    let (lower, upper) = iter.size_hint(); +                    assert!(lower <= real_count); +                    assert!(upper.unwrap() >= real_count); +                    assert!(upper.unwrap() <= data.len() - index); +                } +                TestResult::passed() +            } +        } +    }; +} + +// take items from a DEI, taking front for each true and back for each false. +// Return a vector with the concatenation of the fronts and the reverse of the +// backs. +#[cfg(not(miri))] +pub(crate) fn double_ended_take<I, J>( +    mut iter: I, +    take_side: J, +) -> alloc::vec::Vec<I::Item> +where +    I: DoubleEndedIterator, +    J: Iterator<Item = bool>, +{ +    let mut found_front = alloc::vec![]; +    let mut found_back = alloc::vec![]; + +    for take_front in take_side { +        if take_front { +            if let Some(pos) = iter.next() { +                found_front.push(pos); +            } else { +                break; +            } +        } else { +            if let Some(pos) = iter.next_back() { +                found_back.push(pos); +            } else { +                break; +            } +        }; +    } + +    let mut all_found = found_front; +    all_found.extend(found_back.into_iter().rev()); +    all_found +} + +// return an iterator of the 0-based indices of haystack that match the needle +#[cfg(not(miri))] +pub(crate) fn naive1_iter<'a>( +    n1: u8, +    haystack: &'a [u8], +) -> impl DoubleEndedIterator<Item = usize> + 'a { +    haystack.iter().enumerate().filter(move |&(_, &b)| b == n1).map(|t| t.0) +} + +#[cfg(not(miri))] +pub(crate) fn naive2_iter<'a>( +    n1: u8, +    n2: u8, +    haystack: &'a [u8], +) -> impl DoubleEndedIterator<Item = usize> + 'a { +    haystack +        .iter() +        .enumerate() +        .filter(move |&(_, &b)| b == n1 || b == n2) +        .map(|t| t.0) +} + +#[cfg(not(miri))] +pub(crate) fn naive3_iter<'a>( +    n1: u8, +    n2: u8, +    n3: u8, +    haystack: &'a [u8], +) -> impl DoubleEndedIterator<Item = usize> + 'a { +    haystack +        .iter() +        .enumerate() +        .filter(move |&(_, &b)| b == n1 || b == n2 || b == n3) +        .map(|t| t.0) +} diff --git a/vendor/memchr/src/tests/mod.rs b/vendor/memchr/src/tests/mod.rs new file mode 100644 index 0000000..259b678 --- /dev/null +++ b/vendor/memchr/src/tests/mod.rs @@ -0,0 +1,15 @@ +#[macro_use] +pub(crate) mod memchr; +pub(crate) mod packedpair; +#[macro_use] +pub(crate) mod substring; + +// For debugging, particularly in CI, print out the byte order of the current +// target. +#[test] +fn byte_order() { +    #[cfg(target_endian = "little")] +    std::eprintln!("LITTLE ENDIAN"); +    #[cfg(target_endian = "big")] +    std::eprintln!("BIG ENDIAN"); +} diff --git a/vendor/memchr/src/tests/packedpair.rs b/vendor/memchr/src/tests/packedpair.rs new file mode 100644 index 0000000..204635b --- /dev/null +++ b/vendor/memchr/src/tests/packedpair.rs @@ -0,0 +1,216 @@ +use alloc::{boxed::Box, vec, vec::Vec}; + +/// A set of "packed pair" test seeds. Each seed serves as the base for the +/// generation of many other tests. In essence, the seed captures the pair of +/// bytes we used for a predicate and first byte among our needle. The tests +/// generated from each seed essentially vary the length of the needle and +/// haystack, while using the rare/first byte configuration from the seed. +/// +/// The purpose of this is to test many different needle/haystack lengths. +/// In particular, some of the vector optimizations might only have bugs +/// in haystacks of a certain size. +const SEEDS: &[Seed] = &[ +    // Why not use different 'first' bytes? It seemed like a good idea to be +    // able to configure it, but when I wrote the test generator below, it +    // didn't seem necessary to use for reasons that I forget. +    Seed { first: b'x', index1: b'y', index2: b'z' }, +    Seed { first: b'x', index1: b'x', index2: b'z' }, +    Seed { first: b'x', index1: b'y', index2: b'x' }, +    Seed { first: b'x', index1: b'x', index2: b'x' }, +    Seed { first: b'x', index1: b'y', index2: b'y' }, +]; + +/// Runs a host of "packed pair" search tests. +/// +/// These tests specifically look for the occurrence of a possible substring +/// match based on a pair of bytes matching at the right offsets. +pub(crate) struct Runner { +    fwd: Option< +        Box< +            dyn FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, +        >, +    >, +} + +impl Runner { +    /// Create a new test runner for "packed pair" substring search. +    pub(crate) fn new() -> Runner { +        Runner { fwd: None } +    } + +    /// Run all tests. This panics on the first failure. +    /// +    /// If the implementation being tested returns `None` for a particular +    /// haystack/needle combination, then that test is skipped. +    /// +    /// This runs tests on both the forward and reverse implementations given. +    /// If either (or both) are missing, then tests for that implementation are +    /// skipped. +    pub(crate) fn run(self) { +        if let Some(mut fwd) = self.fwd { +            for seed in SEEDS.iter() { +                for t in seed.generate() { +                    match fwd(&t.haystack, &t.needle, t.index1, t.index2) { +                        None => continue, +                        Some(result) => { +                            assert_eq!( +                                t.fwd, result, +                                "FORWARD, needle: {:?}, haystack: {:?}, \ +                                 index1: {:?}, index2: {:?}", +                                t.needle, t.haystack, t.index1, t.index2, +                            ) +                        } +                    } +                } +            } +        } +    } + +    /// Set the implementation for forward "packed pair" substring search. +    /// +    /// If the closure returns `None`, then it is assumed that the given +    /// test cannot be applied to the particular implementation and it is +    /// skipped. For example, if a particular implementation only supports +    /// needles or haystacks for some minimum length. +    /// +    /// If this is not set, then forward "packed pair" search is not tested. +    pub(crate) fn fwd( +        mut self, +        search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, +    ) -> Runner { +        self.fwd = Some(Box::new(search)); +        self +    } +} + +/// A test that represents the input and expected output to a "packed pair" +/// search function. The test should be able to run with any "packed pair" +/// implementation and get the expected output. +struct Test { +    haystack: Vec<u8>, +    needle: Vec<u8>, +    index1: u8, +    index2: u8, +    fwd: Option<usize>, +} + +impl Test { +    /// Create a new "packed pair" test from a seed and some given offsets to +    /// the pair of bytes to use as a predicate in the seed's needle. +    /// +    /// If a valid test could not be constructed, then None is returned. +    /// (Currently, we take the approach of massaging tests to be valid +    /// instead of rejecting them outright.) +    fn new( +        seed: Seed, +        index1: usize, +        index2: usize, +        haystack_len: usize, +        needle_len: usize, +        fwd: Option<usize>, +    ) -> Option<Test> { +        let mut index1: u8 = index1.try_into().unwrap(); +        let mut index2: u8 = index2.try_into().unwrap(); +        // The '#' byte is never used in a haystack (unless we're expecting +        // a match), while the '@' byte is never used in a needle. +        let mut haystack = vec![b'@'; haystack_len]; +        let mut needle = vec![b'#'; needle_len]; +        needle[0] = seed.first; +        needle[index1 as usize] = seed.index1; +        needle[index2 as usize] = seed.index2; +        // If we're expecting a match, then make sure the needle occurs +        // in the haystack at the expected position. +        if let Some(i) = fwd { +            haystack[i..i + needle.len()].copy_from_slice(&needle); +        } +        // If the operations above lead to rare offsets pointing to the +        // non-first occurrence of a byte, then adjust it. This might lead +        // to redundant tests, but it's simpler than trying to change the +        // generation process I think. +        if let Some(i) = crate::memchr(seed.index1, &needle) { +            index1 = u8::try_from(i).unwrap(); +        } +        if let Some(i) = crate::memchr(seed.index2, &needle) { +            index2 = u8::try_from(i).unwrap(); +        } +        Some(Test { haystack, needle, index1, index2, fwd }) +    } +} + +/// Data that describes a single prefilter test seed. +#[derive(Clone, Copy)] +struct Seed { +    first: u8, +    index1: u8, +    index2: u8, +} + +impl Seed { +    const NEEDLE_LENGTH_LIMIT: usize = { +        #[cfg(not(miri))] +        { +            33 +        } +        #[cfg(miri)] +        { +            5 +        } +    }; + +    const HAYSTACK_LENGTH_LIMIT: usize = { +        #[cfg(not(miri))] +        { +            65 +        } +        #[cfg(miri)] +        { +            8 +        } +    }; + +    /// Generate a series of prefilter tests from this seed. +    fn generate(self) -> impl Iterator<Item = Test> { +        let len_start = 2; +        // The iterator below generates *a lot* of tests. The number of +        // tests was chosen somewhat empirically to be "bearable" when +        // running the test suite. +        // +        // We use an iterator here because the collective haystacks of all +        // these test cases add up to enough memory to OOM a conservative +        // sandbox or a small laptop. +        (len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| { +            let index_start = len_start - 1; +            (index_start..needle_len).flat_map(move |index1| { +                (index1..needle_len).flat_map(move |index2| { +                    (needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map( +                        move |haystack_len| { +                            Test::new( +                                self, +                                index1, +                                index2, +                                haystack_len, +                                needle_len, +                                None, +                            ) +                            .into_iter() +                            .chain( +                                (0..=(haystack_len - needle_len)).flat_map( +                                    move |output| { +                                        Test::new( +                                            self, +                                            index1, +                                            index2, +                                            haystack_len, +                                            needle_len, +                                            Some(output), +                                        ) +                                    }, +                                ), +                            ) +                        }, +                    ) +                }) +            }) +        }) +    } +} diff --git a/vendor/memchr/src/tests/substring/mod.rs b/vendor/memchr/src/tests/substring/mod.rs new file mode 100644 index 0000000..dd10cbd --- /dev/null +++ b/vendor/memchr/src/tests/substring/mod.rs @@ -0,0 +1,232 @@ +/*! +This module defines tests and test helpers for substring implementations. +*/ + +use alloc::{ +    boxed::Box, +    format, +    string::{String, ToString}, +}; + +pub(crate) mod naive; +#[macro_use] +pub(crate) mod prop; + +const SEEDS: &'static [Seed] = &[ +    Seed::new("", "", Some(0), Some(0)), +    Seed::new("", "a", Some(0), Some(1)), +    Seed::new("", "ab", Some(0), Some(2)), +    Seed::new("", "abc", Some(0), Some(3)), +    Seed::new("a", "", None, None), +    Seed::new("a", "a", Some(0), Some(0)), +    Seed::new("a", "aa", Some(0), Some(1)), +    Seed::new("a", "ba", Some(1), Some(1)), +    Seed::new("a", "bba", Some(2), Some(2)), +    Seed::new("a", "bbba", Some(3), Some(3)), +    Seed::new("a", "bbbab", Some(3), Some(3)), +    Seed::new("a", "bbbabb", Some(3), Some(3)), +    Seed::new("a", "bbbabbb", Some(3), Some(3)), +    Seed::new("a", "bbbbbb", None, None), +    Seed::new("ab", "", None, None), +    Seed::new("ab", "a", None, None), +    Seed::new("ab", "b", None, None), +    Seed::new("ab", "ab", Some(0), Some(0)), +    Seed::new("ab", "aab", Some(1), Some(1)), +    Seed::new("ab", "aaab", Some(2), Some(2)), +    Seed::new("ab", "abaab", Some(0), Some(3)), +    Seed::new("ab", "baaab", Some(3), Some(3)), +    Seed::new("ab", "acb", None, None), +    Seed::new("ab", "abba", Some(0), Some(0)), +    Seed::new("abc", "ab", None, None), +    Seed::new("abc", "abc", Some(0), Some(0)), +    Seed::new("abc", "abcz", Some(0), Some(0)), +    Seed::new("abc", "abczz", Some(0), Some(0)), +    Seed::new("abc", "zabc", Some(1), Some(1)), +    Seed::new("abc", "zzabc", Some(2), Some(2)), +    Seed::new("abc", "azbc", None, None), +    Seed::new("abc", "abzc", None, None), +    Seed::new("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)), +    Seed::new("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)), +    Seed::new( +        "xyz", +        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz", +        Some(32), +        Some(32), +    ), +    Seed::new("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)), +    Seed::new("\u{0}\u{1e}", "\u{1e}\u{0}", None, None), +]; + +/// Runs a host of substring search tests. +/// +/// This has support for "partial" substring search implementations only work +/// for a subset of needles/haystacks. For example, the "packed pair" substring +/// search implementation only works for haystacks of some minimum length based +/// of the pair of bytes selected and the size of the vector used. +pub(crate) struct Runner { +    fwd: Option< +        Box<dyn FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static>, +    >, +    rev: Option< +        Box<dyn FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static>, +    >, +} + +impl Runner { +    /// Create a new test runner for forward and reverse substring search +    /// implementations. +    pub(crate) fn new() -> Runner { +        Runner { fwd: None, rev: None } +    } + +    /// Run all tests. This panics on the first failure. +    /// +    /// If the implementation being tested returns `None` for a particular +    /// haystack/needle combination, then that test is skipped. +    /// +    /// This runs tests on both the forward and reverse implementations given. +    /// If either (or both) are missing, then tests for that implementation are +    /// skipped. +    pub(crate) fn run(self) { +        if let Some(mut fwd) = self.fwd { +            for seed in SEEDS.iter() { +                for t in seed.generate() { +                    match fwd(t.haystack.as_bytes(), t.needle.as_bytes()) { +                        None => continue, +                        Some(result) => { +                            assert_eq!( +                                t.fwd, result, +                                "FORWARD, needle: {:?}, haystack: {:?}", +                                t.needle, t.haystack, +                            ); +                        } +                    } +                } +            } +        } +        if let Some(mut rev) = self.rev { +            for seed in SEEDS.iter() { +                for t in seed.generate() { +                    match rev(t.haystack.as_bytes(), t.needle.as_bytes()) { +                        None => continue, +                        Some(result) => { +                            assert_eq!( +                                t.rev, result, +                                "REVERSE, needle: {:?}, haystack: {:?}", +                                t.needle, t.haystack, +                            ); +                        } +                    } +                } +            } +        } +    } + +    /// Set the implementation for forward substring search. +    /// +    /// If the closure returns `None`, then it is assumed that the given +    /// test cannot be applied to the particular implementation and it is +    /// skipped. For example, if a particular implementation only supports +    /// needles or haystacks for some minimum length. +    /// +    /// If this is not set, then forward substring search is not tested. +    pub(crate) fn fwd( +        mut self, +        search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, +    ) -> Runner { +        self.fwd = Some(Box::new(search)); +        self +    } + +    /// Set the implementation for reverse substring search. +    /// +    /// If the closure returns `None`, then it is assumed that the given +    /// test cannot be applied to the particular implementation and it is +    /// skipped. For example, if a particular implementation only supports +    /// needles or haystacks for some minimum length. +    /// +    /// If this is not set, then reverse substring search is not tested. +    pub(crate) fn rev( +        mut self, +        search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, +    ) -> Runner { +        self.rev = Some(Box::new(search)); +        self +    } +} + +/// A single substring test for forward and reverse searches. +#[derive(Clone, Debug)] +struct Test { +    needle: String, +    haystack: String, +    fwd: Option<usize>, +    rev: Option<usize>, +} + +/// A single substring test for forward and reverse searches. +/// +/// Each seed is valid on its own, but it also serves as a starting point +/// to generate more tests. Namely, we pad out the haystacks with other +/// characters so that we get more complete coverage. This is especially useful +/// for testing vector algorithms that tend to have weird special cases for +/// alignment and loop unrolling. +/// +/// Padding works by assuming certain characters never otherwise appear in a +/// needle or a haystack. Neither should contain a `#` character. +#[derive(Clone, Copy, Debug)] +struct Seed { +    needle: &'static str, +    haystack: &'static str, +    fwd: Option<usize>, +    rev: Option<usize>, +} + +impl Seed { +    const MAX_PAD: usize = 34; + +    const fn new( +        needle: &'static str, +        haystack: &'static str, +        fwd: Option<usize>, +        rev: Option<usize>, +    ) -> Seed { +        Seed { needle, haystack, fwd, rev } +    } + +    fn generate(self) -> impl Iterator<Item = Test> { +        assert!(!self.needle.contains('#'), "needle must not contain '#'"); +        assert!(!self.haystack.contains('#'), "haystack must not contain '#'"); +        (0..=Seed::MAX_PAD) +            // Generate tests for padding at the beginning of haystack. +            .map(move |pad| { +                let needle = self.needle.to_string(); +                let prefix = "#".repeat(pad); +                let haystack = format!("{}{}", prefix, self.haystack); +                let fwd = if needle.is_empty() { +                    Some(0) +                } else { +                    self.fwd.map(|i| pad + i) +                }; +                let rev = if needle.is_empty() { +                    Some(haystack.len()) +                } else { +                    self.rev.map(|i| pad + i) +                }; +                Test { needle, haystack, fwd, rev } +            }) +            // Generate tests for padding at the end of haystack. +            .chain((1..=Seed::MAX_PAD).map(move |pad| { +                let needle = self.needle.to_string(); +                let suffix = "#".repeat(pad); +                let haystack = format!("{}{}", self.haystack, suffix); +                let fwd = if needle.is_empty() { Some(0) } else { self.fwd }; +                let rev = if needle.is_empty() { +                    Some(haystack.len()) +                } else { +                    self.rev +                }; +                Test { needle, haystack, fwd, rev } +            })) +    } +} diff --git a/vendor/memchr/src/tests/substring/naive.rs b/vendor/memchr/src/tests/substring/naive.rs new file mode 100644 index 0000000..1bc6009 --- /dev/null +++ b/vendor/memchr/src/tests/substring/naive.rs @@ -0,0 +1,45 @@ +/*! +This module defines "naive" implementations of substring search. + +These are sometimes useful to compare with "real" substring implementations. +The idea is that they are so simple that they are unlikely to be incorrect. +*/ + +/// Naively search forwards for the given needle in the given haystack. +pub(crate) fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { +    let end = haystack.len().checked_sub(needle.len()).map_or(0, |i| i + 1); +    for i in 0..end { +        if needle == &haystack[i..i + needle.len()] { +            return Some(i); +        } +    } +    None +} + +/// Naively search in reverse for the given needle in the given haystack. +pub(crate) fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { +    let end = haystack.len().checked_sub(needle.len()).map_or(0, |i| i + 1); +    for i in (0..end).rev() { +        if needle == &haystack[i..i + needle.len()] { +            return Some(i); +        } +    } +    None +} + +#[cfg(test)] +mod tests { +    use crate::tests::substring; + +    use super::*; + +    #[test] +    fn forward() { +        substring::Runner::new().fwd(|h, n| Some(find(h, n))).run() +    } + +    #[test] +    fn reverse() { +        substring::Runner::new().rev(|h, n| Some(rfind(h, n))).run() +    } +} diff --git a/vendor/memchr/src/tests/substring/prop.rs b/vendor/memchr/src/tests/substring/prop.rs new file mode 100644 index 0000000..a8352ec --- /dev/null +++ b/vendor/memchr/src/tests/substring/prop.rs @@ -0,0 +1,126 @@ +/*! +This module defines a few quickcheck properties for substring search. + +It also provides a forward and reverse macro for conveniently defining +quickcheck tests that run these properties over any substring search +implementation. +*/ + +use crate::tests::substring::naive; + +/// $fwd is a `impl FnMut(haystack, needle) -> Option<Option<usize>>`. When the +/// routine returns `None`, then it's skipped, which is useful for substring +/// implementations that don't work for all inputs. +#[macro_export] +macro_rules! define_substring_forward_quickcheck { +    ($fwd:expr) => { +        #[cfg(not(miri))] +        quickcheck::quickcheck! { +            fn qc_fwd_prefix_is_substring(bs: alloc::vec::Vec<u8>) -> bool { +                crate::tests::substring::prop::prefix_is_substring(&bs, $fwd) +            } + +            fn qc_fwd_suffix_is_substring(bs: alloc::vec::Vec<u8>) -> bool { +                crate::tests::substring::prop::suffix_is_substring(&bs, $fwd) +            } + +            fn qc_fwd_matches_naive( +                haystack: alloc::vec::Vec<u8>, +                needle: alloc::vec::Vec<u8> +            ) -> bool { +                crate::tests::substring::prop::same_as_naive( +                    false, +                    &haystack, +                    &needle, +                    $fwd, +                ) +            } +        } +    }; +} + +/// $rev is a `impl FnMut(haystack, needle) -> Option<Option<usize>>`. When the +/// routine returns `None`, then it's skipped, which is useful for substring +/// implementations that don't work for all inputs. +#[macro_export] +macro_rules! define_substring_reverse_quickcheck { +    ($rev:expr) => { +        #[cfg(not(miri))] +        quickcheck::quickcheck! { +            fn qc_rev_prefix_is_substring(bs: alloc::vec::Vec<u8>) -> bool { +                crate::tests::substring::prop::prefix_is_substring(&bs, $rev) +            } + +            fn qc_rev_suffix_is_substring(bs: alloc::vec::Vec<u8>) -> bool { +                crate::tests::substring::prop::suffix_is_substring(&bs, $rev) +            } + +            fn qc_rev_matches_naive( +                haystack: alloc::vec::Vec<u8>, +                needle: alloc::vec::Vec<u8> +            ) -> bool { +                crate::tests::substring::prop::same_as_naive( +                    true, +                    &haystack, +                    &needle, +                    $rev, +                ) +            } +        } +    }; +} + +/// Check that every prefix of the given byte string is a substring. +pub(crate) fn prefix_is_substring( +    bs: &[u8], +    mut search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>>, +) -> bool { +    for i in 0..bs.len().saturating_sub(1) { +        let prefix = &bs[..i]; +        let result = match search(bs, prefix) { +            None => continue, +            Some(result) => result, +        }; +        if !result.is_some() { +            return false; +        } +    } +    true +} + +/// Check that every suffix of the given byte string is a substring. +pub(crate) fn suffix_is_substring( +    bs: &[u8], +    mut search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>>, +) -> bool { +    for i in 0..bs.len().saturating_sub(1) { +        let suffix = &bs[i..]; +        let result = match search(bs, suffix) { +            None => continue, +            Some(result) => result, +        }; +        if !result.is_some() { +            return false; +        } +    } +    true +} + +/// Check that naive substring search matches the result of the given search +/// algorithm. +pub(crate) fn same_as_naive( +    reverse: bool, +    haystack: &[u8], +    needle: &[u8], +    mut search: impl FnMut(&[u8], &[u8]) -> Option<Option<usize>>, +) -> bool { +    let result = match search(haystack, needle) { +        None => return true, +        Some(result) => result, +    }; +    if reverse { +        result == naive::rfind(haystack, needle) +    } else { +        result == naive::find(haystack, needle) +    } +} diff --git a/vendor/memchr/src/tests/x86_64-soft_float.json b/vendor/memchr/src/tests/x86_64-soft_float.json new file mode 100644 index 0000000..b77649e --- /dev/null +++ b/vendor/memchr/src/tests/x86_64-soft_float.json @@ -0,0 +1,15 @@ +{ +    "llvm-target": "x86_64-unknown-none", +    "target-endian": "little", +    "target-pointer-width": "64", +    "target-c-int-width": "32", +    "os": "none", +    "arch": "x86_64", +    "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", +    "linker-flavor": "ld.lld", +    "linker": "rust-lld", +    "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-3dnow,-3dnowa,-avx,-avx2,+soft-float", +    "executables": true, +    "disable-redzone": true, +    "panic-strategy": "abort" +} diff --git a/vendor/memchr/src/vector.rs b/vendor/memchr/src/vector.rs new file mode 100644 index 0000000..f360176 --- /dev/null +++ b/vendor/memchr/src/vector.rs @@ -0,0 +1,515 @@ +/// A trait for describing vector operations used by vectorized searchers. +/// +/// The trait is highly constrained to low level vector operations needed. +/// In general, it was invented mostly to be generic over x86's __m128i and +/// __m256i types. At time of writing, it also supports wasm and aarch64 +/// 128-bit vector types as well. +/// +/// # Safety +/// +/// All methods are not safe since they are intended to be implemented using +/// vendor intrinsics, which are also not safe. Callers must ensure that the +/// appropriate target features are enabled in the calling function, and that +/// the current CPU supports them. All implementations should avoid marking the +/// routines with #[target_feature] and instead mark them as #[inline(always)] +/// to ensure they get appropriately inlined. (inline(always) cannot be used +/// with target_feature.) +pub(crate) trait Vector: Copy + core::fmt::Debug { +    /// The number of bits in the vector. +    const BITS: usize; +    /// The number of bytes in the vector. That is, this is the size of the +    /// vector in memory. +    const BYTES: usize; +    /// The bits that must be zero in order for a `*const u8` pointer to be +    /// correctly aligned to read vector values. +    const ALIGN: usize; + +    /// The type of the value returned by `Vector::movemask`. +    /// +    /// This supports abstracting over the specific representation used in +    /// order to accommodate different representations in different ISAs. +    type Mask: MoveMask; + +    /// Create a vector with 8-bit lanes with the given byte repeated into each +    /// lane. +    unsafe fn splat(byte: u8) -> Self; + +    /// Read a vector-size number of bytes from the given pointer. The pointer +    /// must be aligned to the size of the vector. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that at least `BYTES` bytes are readable from +    /// `data` and that `data` is aligned to a `BYTES` boundary. +    unsafe fn load_aligned(data: *const u8) -> Self; + +    /// Read a vector-size number of bytes from the given pointer. The pointer +    /// does not need to be aligned. +    /// +    /// # Safety +    /// +    /// Callers must guarantee that at least `BYTES` bytes are readable from +    /// `data`. +    unsafe fn load_unaligned(data: *const u8) -> Self; + +    /// _mm_movemask_epi8 or _mm256_movemask_epi8 +    unsafe fn movemask(self) -> Self::Mask; +    /// _mm_cmpeq_epi8 or _mm256_cmpeq_epi8 +    unsafe fn cmpeq(self, vector2: Self) -> Self; +    /// _mm_and_si128 or _mm256_and_si256 +    unsafe fn and(self, vector2: Self) -> Self; +    /// _mm_or or _mm256_or_si256 +    unsafe fn or(self, vector2: Self) -> Self; +    /// Returns true if and only if `Self::movemask` would return a mask that +    /// contains at least one non-zero bit. +    unsafe fn movemask_will_have_non_zero(self) -> bool { +        self.movemask().has_non_zero() +    } +} + +/// A trait that abstracts over a vector-to-scalar operation called +/// "move mask." +/// +/// On x86-64, this is `_mm_movemask_epi8` for SSE2 and `_mm256_movemask_epi8` +/// for AVX2. It takes a vector of `u8` lanes and returns a scalar where the +/// `i`th bit is set if and only if the most significant bit in the `i`th lane +/// of the vector is set. The simd128 ISA for wasm32 also supports this +/// exact same operation natively. +/// +/// ... But aarch64 doesn't. So we have to fake it with more instructions and +/// a slightly different representation. We could do extra work to unify the +/// representations, but then would require additional costs in the hot path +/// for `memchr` and `packedpair`. So instead, we abstraction over the specific +/// representation with this trait an ddefine the operations we actually need. +pub(crate) trait MoveMask: Copy + core::fmt::Debug { +    /// Return a mask that is all zeros except for the least significant `n` +    /// lanes in a corresponding vector. +    fn all_zeros_except_least_significant(n: usize) -> Self; + +    /// Returns true if and only if this mask has a a non-zero bit anywhere. +    fn has_non_zero(self) -> bool; + +    /// Returns the number of bits set to 1 in this mask. +    fn count_ones(self) -> usize; + +    /// Does a bitwise `and` operation between `self` and `other`. +    fn and(self, other: Self) -> Self; + +    /// Does a bitwise `or` operation between `self` and `other`. +    fn or(self, other: Self) -> Self; + +    /// Returns a mask that is equivalent to `self` but with the least +    /// significant 1-bit set to 0. +    fn clear_least_significant_bit(self) -> Self; + +    /// Returns the offset of the first non-zero lane this mask represents. +    fn first_offset(self) -> usize; + +    /// Returns the offset of the last non-zero lane this mask represents. +    fn last_offset(self) -> usize; +} + +/// This is a "sensible" movemask implementation where each bit represents +/// whether the most significant bit is set in each corresponding lane of a +/// vector. This is used on x86-64 and wasm, but such a mask is more expensive +/// to get on aarch64 so we use something a little different. +/// +/// We call this "sensible" because this is what we get using native sse/avx +/// movemask instructions. But neon has no such native equivalent. +#[derive(Clone, Copy, Debug)] +pub(crate) struct SensibleMoveMask(u32); + +impl SensibleMoveMask { +    /// Get the mask in a form suitable for computing offsets. +    /// +    /// Basically, this normalizes to little endian. On big endian, this swaps +    /// the bytes. +    #[inline(always)] +    fn get_for_offset(self) -> u32 { +        #[cfg(target_endian = "big")] +        { +            self.0.swap_bytes() +        } +        #[cfg(target_endian = "little")] +        { +            self.0 +        } +    } +} + +impl MoveMask for SensibleMoveMask { +    #[inline(always)] +    fn all_zeros_except_least_significant(n: usize) -> SensibleMoveMask { +        debug_assert!(n < 32); +        SensibleMoveMask(!((1 << n) - 1)) +    } + +    #[inline(always)] +    fn has_non_zero(self) -> bool { +        self.0 != 0 +    } + +    #[inline(always)] +    fn count_ones(self) -> usize { +        self.0.count_ones() as usize +    } + +    #[inline(always)] +    fn and(self, other: SensibleMoveMask) -> SensibleMoveMask { +        SensibleMoveMask(self.0 & other.0) +    } + +    #[inline(always)] +    fn or(self, other: SensibleMoveMask) -> SensibleMoveMask { +        SensibleMoveMask(self.0 | other.0) +    } + +    #[inline(always)] +    fn clear_least_significant_bit(self) -> SensibleMoveMask { +        SensibleMoveMask(self.0 & (self.0 - 1)) +    } + +    #[inline(always)] +    fn first_offset(self) -> usize { +        // We are dealing with little endian here (and if we aren't, we swap +        // the bytes so we are in practice), where the most significant byte +        // is at a higher address. That means the least significant bit that +        // is set corresponds to the position of our first matching byte. +        // That position corresponds to the number of zeros after the least +        // significant bit. +        self.get_for_offset().trailing_zeros() as usize +    } + +    #[inline(always)] +    fn last_offset(self) -> usize { +        // We are dealing with little endian here (and if we aren't, we swap +        // the bytes so we are in practice), where the most significant byte is +        // at a higher address. That means the most significant bit that is set +        // corresponds to the position of our last matching byte. The position +        // from the end of the mask is therefore the number of leading zeros +        // in a 32 bit integer, and the position from the start of the mask is +        // therefore 32 - (leading zeros) - 1. +        32 - self.get_for_offset().leading_zeros() as usize - 1 +    } +} + +#[cfg(target_arch = "x86_64")] +mod x86sse2 { +    use core::arch::x86_64::*; + +    use super::{SensibleMoveMask, Vector}; + +    impl Vector for __m128i { +        const BITS: usize = 128; +        const BYTES: usize = 16; +        const ALIGN: usize = Self::BYTES - 1; + +        type Mask = SensibleMoveMask; + +        #[inline(always)] +        unsafe fn splat(byte: u8) -> __m128i { +            _mm_set1_epi8(byte as i8) +        } + +        #[inline(always)] +        unsafe fn load_aligned(data: *const u8) -> __m128i { +            _mm_load_si128(data as *const __m128i) +        } + +        #[inline(always)] +        unsafe fn load_unaligned(data: *const u8) -> __m128i { +            _mm_loadu_si128(data as *const __m128i) +        } + +        #[inline(always)] +        unsafe fn movemask(self) -> SensibleMoveMask { +            SensibleMoveMask(_mm_movemask_epi8(self) as u32) +        } + +        #[inline(always)] +        unsafe fn cmpeq(self, vector2: Self) -> __m128i { +            _mm_cmpeq_epi8(self, vector2) +        } + +        #[inline(always)] +        unsafe fn and(self, vector2: Self) -> __m128i { +            _mm_and_si128(self, vector2) +        } + +        #[inline(always)] +        unsafe fn or(self, vector2: Self) -> __m128i { +            _mm_or_si128(self, vector2) +        } +    } +} + +#[cfg(target_arch = "x86_64")] +mod x86avx2 { +    use core::arch::x86_64::*; + +    use super::{SensibleMoveMask, Vector}; + +    impl Vector for __m256i { +        const BITS: usize = 256; +        const BYTES: usize = 32; +        const ALIGN: usize = Self::BYTES - 1; + +        type Mask = SensibleMoveMask; + +        #[inline(always)] +        unsafe fn splat(byte: u8) -> __m256i { +            _mm256_set1_epi8(byte as i8) +        } + +        #[inline(always)] +        unsafe fn load_aligned(data: *const u8) -> __m256i { +            _mm256_load_si256(data as *const __m256i) +        } + +        #[inline(always)] +        unsafe fn load_unaligned(data: *const u8) -> __m256i { +            _mm256_loadu_si256(data as *const __m256i) +        } + +        #[inline(always)] +        unsafe fn movemask(self) -> SensibleMoveMask { +            SensibleMoveMask(_mm256_movemask_epi8(self) as u32) +        } + +        #[inline(always)] +        unsafe fn cmpeq(self, vector2: Self) -> __m256i { +            _mm256_cmpeq_epi8(self, vector2) +        } + +        #[inline(always)] +        unsafe fn and(self, vector2: Self) -> __m256i { +            _mm256_and_si256(self, vector2) +        } + +        #[inline(always)] +        unsafe fn or(self, vector2: Self) -> __m256i { +            _mm256_or_si256(self, vector2) +        } +    } +} + +#[cfg(target_arch = "aarch64")] +mod aarch64neon { +    use core::arch::aarch64::*; + +    use super::{MoveMask, Vector}; + +    impl Vector for uint8x16_t { +        const BITS: usize = 128; +        const BYTES: usize = 16; +        const ALIGN: usize = Self::BYTES - 1; + +        type Mask = NeonMoveMask; + +        #[inline(always)] +        unsafe fn splat(byte: u8) -> uint8x16_t { +            vdupq_n_u8(byte) +        } + +        #[inline(always)] +        unsafe fn load_aligned(data: *const u8) -> uint8x16_t { +            // I've tried `data.cast::<uint8x16_t>().read()` instead, but +            // couldn't observe any benchmark differences. +            Self::load_unaligned(data) +        } + +        #[inline(always)] +        unsafe fn load_unaligned(data: *const u8) -> uint8x16_t { +            vld1q_u8(data) +        } + +        #[inline(always)] +        unsafe fn movemask(self) -> NeonMoveMask { +            let asu16s = vreinterpretq_u16_u8(self); +            let mask = vshrn_n_u16(asu16s, 4); +            let asu64 = vreinterpret_u64_u8(mask); +            let scalar64 = vget_lane_u64(asu64, 0); +            NeonMoveMask(scalar64 & 0x8888888888888888) +        } + +        #[inline(always)] +        unsafe fn cmpeq(self, vector2: Self) -> uint8x16_t { +            vceqq_u8(self, vector2) +        } + +        #[inline(always)] +        unsafe fn and(self, vector2: Self) -> uint8x16_t { +            vandq_u8(self, vector2) +        } + +        #[inline(always)] +        unsafe fn or(self, vector2: Self) -> uint8x16_t { +            vorrq_u8(self, vector2) +        } + +        /// This is the only interesting implementation of this routine. +        /// Basically, instead of doing the "shift right narrow" dance, we use +        /// adajacent folding max to determine whether there are any non-zero +        /// bytes in our mask. If there are, *then* we'll do the "shift right +        /// narrow" dance. In benchmarks, this does lead to slightly better +        /// throughput, but the win doesn't appear huge. +        #[inline(always)] +        unsafe fn movemask_will_have_non_zero(self) -> bool { +            let low = vreinterpretq_u64_u8(vpmaxq_u8(self, self)); +            vgetq_lane_u64(low, 0) != 0 +        } +    } + +    /// Neon doesn't have a `movemask` that works like the one in x86-64, so we +    /// wind up using a different method[1]. The different method also produces +    /// a mask, but 4 bits are set in the neon case instead of a single bit set +    /// in the x86-64 case. We do an extra step to zero out 3 of the 4 bits, +    /// but we still wind up with at least 3 zeroes between each set bit. This +    /// generally means that we need to do some division by 4 before extracting +    /// offsets. +    /// +    /// In fact, the existence of this type is the entire reason that we have +    /// the `MoveMask` trait in the first place. This basically lets us keep +    /// the different representations of masks without being forced to unify +    /// them into a single representation, which could result in extra and +    /// unnecessary work. +    /// +    /// [1]: https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon +    #[derive(Clone, Copy, Debug)] +    pub(crate) struct NeonMoveMask(u64); + +    impl NeonMoveMask { +        /// Get the mask in a form suitable for computing offsets. +        /// +        /// Basically, this normalizes to little endian. On big endian, this +        /// swaps the bytes. +        #[inline(always)] +        fn get_for_offset(self) -> u64 { +            #[cfg(target_endian = "big")] +            { +                self.0.swap_bytes() +            } +            #[cfg(target_endian = "little")] +            { +                self.0 +            } +        } +    } + +    impl MoveMask for NeonMoveMask { +        #[inline(always)] +        fn all_zeros_except_least_significant(n: usize) -> NeonMoveMask { +            debug_assert!(n < 16); +            NeonMoveMask(!(((1 << n) << 2) - 1)) +        } + +        #[inline(always)] +        fn has_non_zero(self) -> bool { +            self.0 != 0 +        } + +        #[inline(always)] +        fn count_ones(self) -> usize { +            self.0.count_ones() as usize +        } + +        #[inline(always)] +        fn and(self, other: NeonMoveMask) -> NeonMoveMask { +            NeonMoveMask(self.0 & other.0) +        } + +        #[inline(always)] +        fn or(self, other: NeonMoveMask) -> NeonMoveMask { +            NeonMoveMask(self.0 | other.0) +        } + +        #[inline(always)] +        fn clear_least_significant_bit(self) -> NeonMoveMask { +            NeonMoveMask(self.0 & (self.0 - 1)) +        } + +        #[inline(always)] +        fn first_offset(self) -> usize { +            // We are dealing with little endian here (and if we aren't, +            // we swap the bytes so we are in practice), where the most +            // significant byte is at a higher address. That means the least +            // significant bit that is set corresponds to the position of our +            // first matching byte. That position corresponds to the number of +            // zeros after the least significant bit. +            // +            // Note that unlike `SensibleMoveMask`, this mask has its bits +            // spread out over 64 bits instead of 16 bits (for a 128 bit +            // vector). Namely, where as x86-64 will turn +            // +            //   0x00 0xFF 0x00 0x00 0xFF +            // +            // into 10010, our neon approach will turn it into +            // +            //   10000000000010000000 +            // +            // And this happens because neon doesn't have a native `movemask` +            // instruction, so we kind of fake it[1]. Thus, we divide the +            // number of trailing zeros by 4 to get the "real" offset. +            // +            // [1]: https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon +            (self.get_for_offset().trailing_zeros() >> 2) as usize +        } + +        #[inline(always)] +        fn last_offset(self) -> usize { +            // See comment in `first_offset` above. This is basically the same, +            // but coming from the other direction. +            16 - (self.get_for_offset().leading_zeros() >> 2) as usize - 1 +        } +    } +} + +#[cfg(target_arch = "wasm32")] +mod wasm_simd128 { +    use core::arch::wasm32::*; + +    use super::{SensibleMoveMask, Vector}; + +    impl Vector for v128 { +        const BITS: usize = 128; +        const BYTES: usize = 16; +        const ALIGN: usize = Self::BYTES - 1; + +        type Mask = SensibleMoveMask; + +        #[inline(always)] +        unsafe fn splat(byte: u8) -> v128 { +            u8x16_splat(byte) +        } + +        #[inline(always)] +        unsafe fn load_aligned(data: *const u8) -> v128 { +            *data.cast() +        } + +        #[inline(always)] +        unsafe fn load_unaligned(data: *const u8) -> v128 { +            v128_load(data.cast()) +        } + +        #[inline(always)] +        unsafe fn movemask(self) -> SensibleMoveMask { +            SensibleMoveMask(u8x16_bitmask(self).into()) +        } + +        #[inline(always)] +        unsafe fn cmpeq(self, vector2: Self) -> v128 { +            u8x16_eq(self, vector2) +        } + +        #[inline(always)] +        unsafe fn and(self, vector2: Self) -> v128 { +            v128_and(self, vector2) +        } + +        #[inline(always)] +        unsafe fn or(self, vector2: Self) -> v128 { +            v128_or(self, vector2) +        } +    } +}  | 
