Initial vendor packages

Signed-off-by: Valentin Popov <valentin@popov.link>
author: Valentin Popov <valentin@popov.link> 2024-01-08 00:21:28 +0300
committer: Valentin Popov <valentin@popov.link> 2024-01-08 00:21:28 +0300
commit: 1b6a04ca5504955c571d1c97504fb45ea0befee4 (patch)
tree: 7579f518b23313e8a9748a88ab6173d5e030b227 /vendor/memchr/src/arch/x86_64
parent: 5ecd8cf2cba827454317368b68571df0d13d7842 (diff)
download: fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.tar.xz
fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.zip
8 files changed, 3288 insertions, 0 deletions
diff --git a/vendor/memchr/src/arch/x86_64/avx2/memchr.rs b/vendor/memchr/src/arch/x86_64/avx2/memchr.rs
new file mode 100644
index 0000000..59f8c7f
--- /dev/null
+++ b/vendor/memchr/src/arch/x86_64/avx2/memchr.rs
@@ -0,0 +1,1352 @@
+/*!
+This module defines 256-bit vector implementations of `memchr` and friends.
+
+The main types in this module are [`One`], [`Two`] and [`Three`]. They are for
+searching for one, two or three distinct bytes, respectively, in a haystack.
+Each type also has corresponding double ended iterators. These searchers are
+typically much faster than scalar routines accomplishing the same task.
+
+The `One` searcher also provides a [`One::count`] routine for efficiently
+counting the number of times a single byte occurs in a haystack. This is
+useful, for example, for counting the number of lines in a haystack. This
+routine exists because it is usually faster, especially with a high match
+count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its
+`Iterator::count` implementation to use this routine.)
+
+Only one, two and three bytes are supported because three bytes is about
+the point where one sees diminishing returns. Beyond this point and it's
+probably (but not necessarily) better to just use a simple `[bool; 256]` array
+or similar. However, it depends mightily on the specific work-load and the
+expected match frequency.
+*/
+
+use core::arch::x86_64::{__m128i, __m256i};
+
+use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector};
+
+/// Finds all occurrences of a single byte in a haystack.
+#[derive(Clone, Copy, Debug)]
+pub struct One {
+    /// Used for haystacks less than 32 bytes.
+    sse2: generic::One<__m128i>,
+    /// Used for haystacks bigger than 32 bytes.
+    avx2: generic::One<__m256i>,
+}
+
+impl One {
+    /// Create a new searcher that finds occurrences of the needle byte given.
+    ///
+    /// This particular searcher is specialized to use AVX2 vector instructions
+    /// that typically make it quite fast. (SSE2 is used for haystacks that
+    /// are too short to accommodate an AVX2 vector.)
+    ///
+    /// If either SSE2 or AVX2 is unavailable in the current environment, then
+    /// `None` is returned.
+    #[inline]
+    pub fn new(needle: u8) -> Option<One> {
+        if One::is_available() {
+            // SAFETY: we check that sse2 and avx2 are available above.
+            unsafe { Some(One::new_unchecked(needle)) }
+        } else {
+            None
+        }
+    }
+
+    /// Create a new finder specific to AVX2 vectors and routines without
+    /// checking that either SSE2 or AVX2 is available.
+    ///
+    /// # Safety
+    ///
+    /// Callers must guarantee that it is safe to execute both `sse2` and
+    /// `avx2` instructions in the current environment.
+    ///
+    /// Note that it is a common misconception that if one compiles for an
+    /// `x86_64` target, then they therefore automatically have access to SSE2
+    /// instructions. While this is almost always the case, it isn't true in
+    /// 100% of cases.
+    #[target_feature(enable = "sse2", enable = "avx2")]
+    #[inline]
+    pub unsafe fn new_unchecked(needle: u8) -> One {
+        One {
+            sse2: generic::One::new(needle),
+            avx2: generic::One::new(needle),
+        }
+    }
+
+    /// Returns true when this implementation is available in the current
+    /// environment.
+    ///
+    /// When this is true, it is guaranteed that [`One::new`] will return
+    /// a `Some` value. Similarly, when it is false, it is guaranteed that
+    /// `One::new` will return a `None` value.
+    ///
+    /// Note also that for the lifetime of a single program, if this returns
+    /// true then it will always return true.
+    #[inline]
+    pub fn is_available() -> bool {
+        #[cfg(not(target_feature = "sse2"))]
+        {
+            false
+        }
+        #[cfg(target_feature = "sse2")]
+        {
+            #[cfg(target_feature = "avx2")]
+            {
+                true
+            }
+            #[cfg(not(target_feature = "avx2"))]
+            {
+                #[cfg(feature = "std")]
+                {
+                    std::is_x86_feature_detected!("avx2")
+                }
+                #[cfg(not(feature = "std"))]
+                {
+                    false
+                }
+            }
+        }
+    }
+
+    /// Return the first occurrence of one of the needle bytes in the given
+    /// haystack. If no such occurrence exists, then `None` is returned.
+    ///
+    /// The occurrence is reported as an offset into `haystack`. Its maximum
+    /// value is `haystack.len() - 1`.
+    #[inline]
+    pub fn find(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: `find_raw` guarantees that if a pointer is returned, it
+        // falls within the bounds of the start and end pointers.
+        unsafe {
+            generic::search_slice_with_raw(haystack, |s, e| {
+                self.find_raw(s, e)
+            })
+        }
+    }
+
+    /// Return the last occurrence of one of the needle bytes in the given
+    /// haystack. If no such occurrence exists, then `None` is returned.
+    ///
+    /// The occurrence is reported as an offset into `haystack`. Its maximum
+    /// value is `haystack.len() - 1`.
+    #[inline]
+    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: `find_raw` guarantees that if a pointer is returned, it
+        // falls within the bounds of the start and end pointers.
+        unsafe {
+            generic::search_slice_with_raw(haystack, |s, e| {
+                self.rfind_raw(s, e)
+            })
+        }
+    }
+
+    /// Counts all occurrences of this byte in the given haystack.
+    #[inline]
+    pub fn count(&self, haystack: &[u8]) -> usize {
+        // SAFETY: All of our pointers are derived directly from a borrowed
+        // slice, which is guaranteed to be valid.
+        unsafe {
+            let start = haystack.as_ptr();
+            let end = start.add(haystack.len());
+            self.count_raw(start, end)
+        }
+    }
+
+    /// Like `find`, but accepts and returns raw pointers.
+    ///
+    /// When a match is found, the pointer returned is guaranteed to be
+    /// `>= start` and `< end`.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `None` will always be returned.
+    #[inline]
+    pub unsafe fn find_raw(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        if start >= end {
+            return None;
+        }
+        let len = end.distance(start);
+        if len < __m256i::BYTES {
+            return if len < __m128i::BYTES {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                generic::fwd_byte_by_byte(start, end, |b| {
+                    b == self.sse2.needle1()
+                })
+            } else {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                self.find_raw_sse2(start, end)
+            };
+        }
+        // SAFETY: Building a `One` means it's safe to call both 'sse2' and
+        // 'avx2' routines. Also, we've checked that our haystack is big
+        // enough to run on the vector routine. Pointer validity is caller's
+        // responsibility.
+        //
+        // Note that we could call `self.avx2.find_raw` directly here. But that
+        // means we'd have to annotate this routine with `target_feature`.
+        // Which is fine, because this routine is `unsafe` anyway and the
+        // `target_feature` obligation is met by virtue of building a `One`.
+        // The real problem is that a routine with a `target_feature`
+        // annotation generally can't be inlined into caller code unless
+        // the caller code has the same target feature annotations. Namely,
+        // the common case (at time of writing) is for calling code to not
+        // have the `avx2` target feature enabled *at compile time*. Without
+        // `target_feature` on this routine, it can be inlined which will
+        // handle some of the short-haystack cases above without touching the
+        // architecture specific code.
+        self.find_raw_avx2(start, end)
+    }
+
+    /// Like `rfind`, but accepts and returns raw pointers.
+    ///
+    /// When a match is found, the pointer returned is guaranteed to be
+    /// `>= start` and `< end`.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `None` will always be returned.
+    #[inline]
+    pub unsafe fn rfind_raw(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        if start >= end {
+            return None;
+        }
+        let len = end.distance(start);
+        if len < __m256i::BYTES {
+            return if len < __m128i::BYTES {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                generic::rev_byte_by_byte(start, end, |b| {
+                    b == self.sse2.needle1()
+                })
+            } else {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                self.rfind_raw_sse2(start, end)
+            };
+        }
+        // SAFETY: Building a `One` means it's safe to call both 'sse2' and
+        // 'avx2' routines. Also, we've checked that our haystack is big
+        // enough to run on the vector routine. Pointer validity is caller's
+        // responsibility.
+        //
+        // See note in forward routine above for why we don't just call
+        // `self.avx2.rfind_raw` directly here.
+        self.rfind_raw_avx2(start, end)
+    }
+
+    /// Counts all occurrences of this byte in the given haystack represented
+    /// by raw pointers.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `0` will always be returned.
+    #[inline]
+    pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize {
+        if start >= end {
+            return 0;
+        }
+        let len = end.distance(start);
+        if len < __m256i::BYTES {
+            return if len < __m128i::BYTES {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                generic::count_byte_by_byte(start, end, |b| {
+                    b == self.sse2.needle1()
+                })
+            } else {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                self.count_raw_sse2(start, end)
+            };
+        }
+        // SAFETY: Building a `One` means it's safe to call both 'sse2' and
+        // 'avx2' routines. Also, we've checked that our haystack is big
+        // enough to run on the vector routine. Pointer validity is caller's
+        // responsibility.
+        self.count_raw_avx2(start, end)
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`One::find_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `One`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn find_raw_sse2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.sse2.find_raw(start, end)
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`One::rfind_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `One`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn rfind_raw_sse2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.sse2.rfind_raw(start, end)
+    }
+
+    /// Execute a count using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`One::count_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `One`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn count_raw_sse2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> usize {
+        self.sse2.count_raw(start, end)
+    }
+
+    /// Execute a search using AVX2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`One::find_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an AVX2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `One`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "avx2")]
+    #[inline]
+    unsafe fn find_raw_avx2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.avx2.find_raw(start, end)
+    }
+
+    /// Execute a search using AVX2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`One::rfind_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an AVX2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `One`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "avx2")]
+    #[inline]
+    unsafe fn rfind_raw_avx2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.avx2.rfind_raw(start, end)
+    }
+
+    /// Execute a count using AVX2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`One::count_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an AVX2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `One`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "avx2")]
+    #[inline]
+    unsafe fn count_raw_avx2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> usize {
+        self.avx2.count_raw(start, end)
+    }
+
+    /// Returns an iterator over all occurrences of the needle byte in the
+    /// given haystack.
+    ///
+    /// The iterator returned implements `DoubleEndedIterator`. This means it
+    /// can also be used to find occurrences in reverse order.
+    #[inline]
+    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> {
+        OneIter { searcher: self, it: generic::Iter::new(haystack) }
+    }
+}
+
+/// An iterator over all occurrences of a single byte in a haystack.
+///
+/// This iterator implements `DoubleEndedIterator`, which means it can also be
+/// used to find occurrences in reverse order.
+///
+/// This iterator is created by the [`One::iter`] method.
+///
+/// The lifetime parameters are as follows:
+///
+/// * `'a` refers to the lifetime of the underlying [`One`] searcher.
+/// * `'h` refers to the lifetime of the haystack being searched.
+#[derive(Clone, Debug)]
+pub struct OneIter<'a, 'h> {
+    searcher: &'a One,
+    it: generic::Iter<'h>,
+}
+
+impl<'a, 'h> Iterator for OneIter<'a, 'h> {
+    type Item = usize;
+
+    #[inline]
+    fn next(&mut self) -> Option<usize> {
+        // SAFETY: We rely on the generic iterator to provide valid start
+        // and end pointers, but we guarantee that any pointer returned by
+        // 'find_raw' falls within the bounds of the start and end pointer.
+        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
+    }
+
+    #[inline]
+    fn count(self) -> usize {
+        self.it.count(|s, e| {
+            // SAFETY: We rely on our generic iterator to return valid start
+            // and end pointers.
+            unsafe { self.searcher.count_raw(s, e) }
+        })
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.it.size_hint()
+    }
+}
+
+impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> {
+    #[inline]
+    fn next_back(&mut self) -> Option<usize> {
+        // SAFETY: We rely on the generic iterator to provide valid start
+        // and end pointers, but we guarantee that any pointer returned by
+        // 'rfind_raw' falls within the bounds of the start and end pointer.
+        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
+    }
+}
+
+impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {}
+
+/// Finds all occurrences of two bytes in a haystack.
+///
+/// That is, this reports matches of one of two possible bytes. For example,
+/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
+/// `4` and `5`.
+#[derive(Clone, Copy, Debug)]
+pub struct Two {
+    /// Used for haystacks less than 32 bytes.
+    sse2: generic::Two<__m128i>,
+    /// Used for haystacks bigger than 32 bytes.
+    avx2: generic::Two<__m256i>,
+}
+
+impl Two {
+    /// Create a new searcher that finds occurrences of the needle bytes given.
+    ///
+    /// This particular searcher is specialized to use AVX2 vector instructions
+    /// that typically make it quite fast. (SSE2 is used for haystacks that
+    /// are too short to accommodate an AVX2 vector.)
+    ///
+    /// If either SSE2 or AVX2 is unavailable in the current environment, then
+    /// `None` is returned.
+    #[inline]
+    pub fn new(needle1: u8, needle2: u8) -> Option<Two> {
+        if Two::is_available() {
+            // SAFETY: we check that sse2 and avx2 are available above.
+            unsafe { Some(Two::new_unchecked(needle1, needle2)) }
+        } else {
+            None
+        }
+    }
+
+    /// Create a new finder specific to AVX2 vectors and routines without
+    /// checking that either SSE2 or AVX2 is available.
+    ///
+    /// # Safety
+    ///
+    /// Callers must guarantee that it is safe to execute both `sse2` and
+    /// `avx2` instructions in the current environment.
+    ///
+    /// Note that it is a common misconception that if one compiles for an
+    /// `x86_64` target, then they therefore automatically have access to SSE2
+    /// instructions. While this is almost always the case, it isn't true in
+    /// 100% of cases.
+    #[target_feature(enable = "sse2", enable = "avx2")]
+    #[inline]
+    pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two {
+        Two {
+            sse2: generic::Two::new(needle1, needle2),
+            avx2: generic::Two::new(needle1, needle2),
+        }
+    }
+
+    /// Returns true when this implementation is available in the current
+    /// environment.
+    ///
+    /// When this is true, it is guaranteed that [`Two::new`] will return
+    /// a `Some` value. Similarly, when it is false, it is guaranteed that
+    /// `Two::new` will return a `None` value.
+    ///
+    /// Note also that for the lifetime of a single program, if this returns
+    /// true then it will always return true.
+    #[inline]
+    pub fn is_available() -> bool {
+        #[cfg(not(target_feature = "sse2"))]
+        {
+            false
+        }
+        #[cfg(target_feature = "sse2")]
+        {
+            #[cfg(target_feature = "avx2")]
+            {
+                true
+            }
+            #[cfg(not(target_feature = "avx2"))]
+            {
+                #[cfg(feature = "std")]
+                {
+                    std::is_x86_feature_detected!("avx2")
+                }
+                #[cfg(not(feature = "std"))]
+                {
+                    false
+                }
+            }
+        }
+    }
+
+    /// Return the first occurrence of one of the needle bytes in the given
+    /// haystack. If no such occurrence exists, then `None` is returned.
+    ///
+    /// The occurrence is reported as an offset into `haystack`. Its maximum
+    /// value is `haystack.len() - 1`.
+    #[inline]
+    pub fn find(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: `find_raw` guarantees that if a pointer is returned, it
+        // falls within the bounds of the start and end pointers.
+        unsafe {
+            generic::search_slice_with_raw(haystack, |s, e| {
+                self.find_raw(s, e)
+            })
+        }
+    }
+
+    /// Return the last occurrence of one of the needle bytes in the given
+    /// haystack. If no such occurrence exists, then `None` is returned.
+    ///
+    /// The occurrence is reported as an offset into `haystack`. Its maximum
+    /// value is `haystack.len() - 1`.
+    #[inline]
+    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: `find_raw` guarantees that if a pointer is returned, it
+        // falls within the bounds of the start and end pointers.
+        unsafe {
+            generic::search_slice_with_raw(haystack, |s, e| {
+                self.rfind_raw(s, e)
+            })
+        }
+    }
+
+    /// Like `find`, but accepts and returns raw pointers.
+    ///
+    /// When a match is found, the pointer returned is guaranteed to be
+    /// `>= start` and `< end`.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `None` will always be returned.
+    #[inline]
+    pub unsafe fn find_raw(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        if start >= end {
+            return None;
+        }
+        let len = end.distance(start);
+        if len < __m256i::BYTES {
+            return if len < __m128i::BYTES {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                generic::fwd_byte_by_byte(start, end, |b| {
+                    b == self.sse2.needle1() || b == self.sse2.needle2()
+                })
+            } else {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                self.find_raw_sse2(start, end)
+            };
+        }
+        // SAFETY: Building a `Two` means it's safe to call both 'sse2' and
+        // 'avx2' routines. Also, we've checked that our haystack is big
+        // enough to run on the vector routine. Pointer validity is caller's
+        // responsibility.
+        //
+        // Note that we could call `self.avx2.find_raw` directly here. But that
+        // means we'd have to annotate this routine with `target_feature`.
+        // Which is fine, because this routine is `unsafe` anyway and the
+        // `target_feature` obligation is met by virtue of building a `Two`.
+        // The real problem is that a routine with a `target_feature`
+        // annotation generally can't be inlined into caller code unless
+        // the caller code has the same target feature annotations. Namely,
+        // the common case (at time of writing) is for calling code to not
+        // have the `avx2` target feature enabled *at compile time*. Without
+        // `target_feature` on this routine, it can be inlined which will
+        // handle some of the short-haystack cases above without touching the
+        // architecture specific code.
+        self.find_raw_avx2(start, end)
+    }
+
+    /// Like `rfind`, but accepts and returns raw pointers.
+    ///
+    /// When a match is found, the pointer returned is guaranteed to be
+    /// `>= start` and `< end`.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `None` will always be returned.
+    #[inline]
+    pub unsafe fn rfind_raw(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        if start >= end {
+            return None;
+        }
+        let len = end.distance(start);
+        if len < __m256i::BYTES {
+            return if len < __m128i::BYTES {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                generic::rev_byte_by_byte(start, end, |b| {
+                    b == self.sse2.needle1() || b == self.sse2.needle2()
+                })
+            } else {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                self.rfind_raw_sse2(start, end)
+            };
+        }
+        // SAFETY: Building a `Two` means it's safe to call both 'sse2' and
+        // 'avx2' routines. Also, we've checked that our haystack is big
+        // enough to run on the vector routine. Pointer validity is caller's
+        // responsibility.
+        //
+        // See note in forward routine above for why we don't just call
+        // `self.avx2.rfind_raw` directly here.
+        self.rfind_raw_avx2(start, end)
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`Two::find_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Two`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn find_raw_sse2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.sse2.find_raw(start, end)
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`Two::rfind_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Two`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn rfind_raw_sse2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.sse2.rfind_raw(start, end)
+    }
+
+    /// Execute a search using AVX2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`Two::find_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an AVX2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Two`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "avx2")]
+    #[inline]
+    unsafe fn find_raw_avx2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.avx2.find_raw(start, end)
+    }
+
+    /// Execute a search using AVX2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`Two::rfind_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an AVX2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Two`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "avx2")]
+    #[inline]
+    unsafe fn rfind_raw_avx2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.avx2.rfind_raw(start, end)
+    }
+
+    /// Returns an iterator over all occurrences of the needle bytes in the
+    /// given haystack.
+    ///
+    /// The iterator returned implements `DoubleEndedIterator`. This means it
+    /// can also be used to find occurrences in reverse order.
+    #[inline]
+    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> {
+        TwoIter { searcher: self, it: generic::Iter::new(haystack) }
+    }
+}
+
+/// An iterator over all occurrences of two possible bytes in a haystack.
+///
+/// This iterator implements `DoubleEndedIterator`, which means it can also be
+/// used to find occurrences in reverse order.
+///
+/// This iterator is created by the [`Two::iter`] method.
+///
+/// The lifetime parameters are as follows:
+///
+/// * `'a` refers to the lifetime of the underlying [`Two`] searcher.
+/// * `'h` refers to the lifetime of the haystack being searched.
+#[derive(Clone, Debug)]
+pub struct TwoIter<'a, 'h> {
+    searcher: &'a Two,
+    it: generic::Iter<'h>,
+}
+
+impl<'a, 'h> Iterator for TwoIter<'a, 'h> {
+    type Item = usize;
+
+    #[inline]
+    fn next(&mut self) -> Option<usize> {
+        // SAFETY: We rely on the generic iterator to provide valid start
+        // and end pointers, but we guarantee that any pointer returned by
+        // 'find_raw' falls within the bounds of the start and end pointer.
+        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.it.size_hint()
+    }
+}
+
+impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> {
+    #[inline]
+    fn next_back(&mut self) -> Option<usize> {
+        // SAFETY: We rely on the generic iterator to provide valid start
+        // and end pointers, but we guarantee that any pointer returned by
+        // 'rfind_raw' falls within the bounds of the start and end pointer.
+        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
+    }
+}
+
+impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {}
+
+/// Finds all occurrences of three bytes in a haystack.
+///
+/// That is, this reports matches of one of three possible bytes. For example,
+/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets
+/// `0`, `2`, `3`, `4` and `5`.
+#[derive(Clone, Copy, Debug)]
+pub struct Three {
+    /// Used for haystacks less than 32 bytes.
+    sse2: generic::Three<__m128i>,
+    /// Used for haystacks bigger than 32 bytes.
+    avx2: generic::Three<__m256i>,
+}
+
+impl Three {
+    /// Create a new searcher that finds occurrences of the needle bytes given.
+    ///
+    /// This particular searcher is specialized to use AVX2 vector instructions
+    /// that typically make it quite fast. (SSE2 is used for haystacks that
+    /// are too short to accommodate an AVX2 vector.)
+    ///
+    /// If either SSE2 or AVX2 is unavailable in the current environment, then
+    /// `None` is returned.
+    #[inline]
+    pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> {
+        if Three::is_available() {
+            // SAFETY: we check that sse2 and avx2 are available above.
+            unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) }
+        } else {
+            None
+        }
+    }
+
+    /// Create a new finder specific to AVX2 vectors and routines without
+    /// checking that either SSE2 or AVX2 is available.
+    ///
+    /// # Safety
+    ///
+    /// Callers must guarantee that it is safe to execute both `sse2` and
+    /// `avx2` instructions in the current environment.
+    ///
+    /// Note that it is a common misconception that if one compiles for an
+    /// `x86_64` target, then they therefore automatically have access to SSE2
+    /// instructions. While this is almost always the case, it isn't true in
+    /// 100% of cases.
+    #[target_feature(enable = "sse2", enable = "avx2")]
+    #[inline]
+    pub unsafe fn new_unchecked(
+        needle1: u8,
+        needle2: u8,
+        needle3: u8,
+    ) -> Three {
+        Three {
+            sse2: generic::Three::new(needle1, needle2, needle3),
+            avx2: generic::Three::new(needle1, needle2, needle3),
+        }
+    }
+
+    /// Returns true when this implementation is available in the current
+    /// environment.
+    ///
+    /// When this is true, it is guaranteed that [`Three::new`] will return
+    /// a `Some` value. Similarly, when it is false, it is guaranteed that
+    /// `Three::new` will return a `None` value.
+    ///
+    /// Note also that for the lifetime of a single program, if this returns
+    /// true then it will always return true.
+    #[inline]
+    pub fn is_available() -> bool {
+        #[cfg(not(target_feature = "sse2"))]
+        {
+            false
+        }
+        #[cfg(target_feature = "sse2")]
+        {
+            #[cfg(target_feature = "avx2")]
+            {
+                true
+            }
+            #[cfg(not(target_feature = "avx2"))]
+            {
+                #[cfg(feature = "std")]
+                {
+                    std::is_x86_feature_detected!("avx2")
+                }
+                #[cfg(not(feature = "std"))]
+                {
+                    false
+                }
+            }
+        }
+    }
+
+    /// Return the first occurrence of one of the needle bytes in the given
+    /// haystack. If no such occurrence exists, then `None` is returned.
+    ///
+    /// The occurrence is reported as an offset into `haystack`. Its maximum
+    /// value is `haystack.len() - 1`.
+    #[inline]
+    pub fn find(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: `find_raw` guarantees that if a pointer is returned, it
+        // falls within the bounds of the start and end pointers.
+        unsafe {
+            generic::search_slice_with_raw(haystack, |s, e| {
+                self.find_raw(s, e)
+            })
+        }
+    }
+
+    /// Return the last occurrence of one of the needle bytes in the given
+    /// haystack. If no such occurrence exists, then `None` is returned.
+    ///
+    /// The occurrence is reported as an offset into `haystack`. Its maximum
+    /// value is `haystack.len() - 1`.
+    #[inline]
+    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: `find_raw` guarantees that if a pointer is returned, it
+        // falls within the bounds of the start and end pointers.
+        unsafe {
+            generic::search_slice_with_raw(haystack, |s, e| {
+                self.rfind_raw(s, e)
+            })
+        }
+    }
+
+    /// Like `find`, but accepts and returns raw pointers.
+    ///
+    /// When a match is found, the pointer returned is guaranteed to be
+    /// `>= start` and `< end`.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `None` will always be returned.
+    #[inline]
+    pub unsafe fn find_raw(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        if start >= end {
+            return None;
+        }
+        let len = end.distance(start);
+        if len < __m256i::BYTES {
+            return if len < __m128i::BYTES {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                generic::fwd_byte_by_byte(start, end, |b| {
+                    b == self.sse2.needle1()
+                        || b == self.sse2.needle2()
+                        || b == self.sse2.needle3()
+                })
+            } else {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                self.find_raw_sse2(start, end)
+            };
+        }
+        // SAFETY: Building a `Three` means it's safe to call both 'sse2' and
+        // 'avx2' routines. Also, we've checked that our haystack is big
+        // enough to run on the vector routine. Pointer validity is caller's
+        // responsibility.
+        //
+        // Note that we could call `self.avx2.find_raw` directly here. But that
+        // means we'd have to annotate this routine with `target_feature`.
+        // Which is fine, because this routine is `unsafe` anyway and the
+        // `target_feature` obligation is met by virtue of building a `Three`.
+        // The real problem is that a routine with a `target_feature`
+        // annotation generally can't be inlined into caller code unless
+        // the caller code has the same target feature annotations. Namely,
+        // the common case (at time of writing) is for calling code to not
+        // have the `avx2` target feature enabled *at compile time*. Without
+        // `target_feature` on this routine, it can be inlined which will
+        // handle some of the short-haystack cases above without touching the
+        // architecture specific code.
+        self.find_raw_avx2(start, end)
+    }
+
+    /// Like `rfind`, but accepts and returns raw pointers.
+    ///
+    /// When a match is found, the pointer returned is guaranteed to be
+    /// `>= start` and `< end`.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `None` will always be returned.
+    #[inline]
+    pub unsafe fn rfind_raw(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        if start >= end {
+            return None;
+        }
+        let len = end.distance(start);
+        if len < __m256i::BYTES {
+            return if len < __m128i::BYTES {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                generic::rev_byte_by_byte(start, end, |b| {
+                    b == self.sse2.needle1()
+                        || b == self.sse2.needle2()
+                        || b == self.sse2.needle3()
+                })
+            } else {
+                // SAFETY: We require the caller to pass valid start/end
+                // pointers.
+                self.rfind_raw_sse2(start, end)
+            };
+        }
+        // SAFETY: Building a `Three` means it's safe to call both 'sse2' and
+        // 'avx2' routines. Also, we've checked that our haystack is big
+        // enough to run on the vector routine. Pointer validity is caller's
+        // responsibility.
+        //
+        // See note in forward routine above for why we don't just call
+        // `self.avx2.rfind_raw` directly here.
+        self.rfind_raw_avx2(start, end)
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`Three::find_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Three`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn find_raw_sse2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.sse2.find_raw(start, end)
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`Three::rfind_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Three`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn rfind_raw_sse2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.sse2.rfind_raw(start, end)
+    }
+
+    /// Execute a search using AVX2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`Three::find_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an AVX2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Three`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "avx2")]
+    #[inline]
+    unsafe fn find_raw_avx2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.avx2.find_raw(start, end)
+    }
+
+    /// Execute a search using AVX2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`Three::rfind_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an AVX2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Three`, which can only be constructed
+    /// when it is safe to call `sse2`/`avx2` routines.)
+    #[target_feature(enable = "avx2")]
+    #[inline]
+    unsafe fn rfind_raw_avx2(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.avx2.rfind_raw(start, end)
+    }
+
+    /// Returns an iterator over all occurrences of the needle bytes in the
+    /// given haystack.
+    ///
+    /// The iterator returned implements `DoubleEndedIterator`. This means it
+    /// can also be used to find occurrences in reverse order.
+    #[inline]
+    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> {
+        ThreeIter { searcher: self, it: generic::Iter::new(haystack) }
+    }
+}
+
+/// An iterator over all occurrences of three possible bytes in a haystack.
+///
+/// This iterator implements `DoubleEndedIterator`, which means it can also be
+/// used to find occurrences in reverse order.
+///
+/// This iterator is created by the [`Three::iter`] method.
+///
+/// The lifetime parameters are as follows:
+///
+/// * `'a` refers to the lifetime of the underlying [`Three`] searcher.
+/// * `'h` refers to the lifetime of the haystack being searched.
+#[derive(Clone, Debug)]
+pub struct ThreeIter<'a, 'h> {
+    searcher: &'a Three,
+    it: generic::Iter<'h>,
+}
+
+impl<'a, 'h> Iterator for ThreeIter<'a, 'h> {
+    type Item = usize;
+
+    #[inline]
+    fn next(&mut self) -> Option<usize> {
+        // SAFETY: We rely on the generic iterator to provide valid start
+        // and end pointers, but we guarantee that any pointer returned by
+        // 'find_raw' falls within the bounds of the start and end pointer.
+        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.it.size_hint()
+    }
+}
+
+impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> {
+    #[inline]
+    fn next_back(&mut self) -> Option<usize> {
+        // SAFETY: We rely on the generic iterator to provide valid start
+        // and end pointers, but we guarantee that any pointer returned by
+        // 'rfind_raw' falls within the bounds of the start and end pointer.
+        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
+    }
+}
+
+impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    define_memchr_quickcheck!(super);
+
+    #[test]
+    fn forward_one() {
+        crate::tests::memchr::Runner::new(1).forward_iter(
+            |haystack, needles| {
+                Some(One::new(needles[0])?.iter(haystack).collect())
+            },
+        )
+    }
+
+    #[test]
+    fn reverse_one() {
+        crate::tests::memchr::Runner::new(1).reverse_iter(
+            |haystack, needles| {
+                Some(One::new(needles[0])?.iter(haystack).rev().collect())
+            },
+        )
+    }
+
+    #[test]
+    fn count_one() {
+        crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| {
+            Some(One::new(needles[0])?.iter(haystack).count())
+        })
+    }
+
+    #[test]
+    fn forward_two() {
+        crate::tests::memchr::Runner::new(2).forward_iter(
+            |haystack, needles| {
+                let n1 = needles.get(0).copied()?;
+                let n2 = needles.get(1).copied()?;
+                Some(Two::new(n1, n2)?.iter(haystack).collect())
+            },
+        )
+    }
+
+    #[test]
+    fn reverse_two() {
+        crate::tests::memchr::Runner::new(2).reverse_iter(
+            |haystack, needles| {
+                let n1 = needles.get(0).copied()?;
+                let n2 = needles.get(1).copied()?;
+                Some(Two::new(n1, n2)?.iter(haystack).rev().collect())
+            },
+        )
+    }
+
+    #[test]
+    fn forward_three() {
+        crate::tests::memchr::Runner::new(3).forward_iter(
+            |haystack, needles| {
+                let n1 = needles.get(0).copied()?;
+                let n2 = needles.get(1).copied()?;
+                let n3 = needles.get(2).copied()?;
+                Some(Three::new(n1, n2, n3)?.iter(haystack).collect())
+            },
+        )
+    }
+
+    #[test]
+    fn reverse_three() {
+        crate::tests::memchr::Runner::new(3).reverse_iter(
+            |haystack, needles| {
+                let n1 = needles.get(0).copied()?;
+                let n2 = needles.get(1).copied()?;
+                let n3 = needles.get(2).copied()?;
+                Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect())
+            },
+        )
+    }
+}
diff --git a/vendor/memchr/src/arch/x86_64/avx2/mod.rs b/vendor/memchr/src/arch/x86_64/avx2/mod.rs
new file mode 100644
index 0000000..ee4097d
--- /dev/null
+++ b/vendor/memchr/src/arch/x86_64/avx2/mod.rs
@@ -0,0 +1,6 @@
+/*!
+Algorithms for the `x86_64` target using 256-bit vectors via AVX2.
+*/
+
+pub mod memchr;
+pub mod packedpair;
diff --git a/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs b/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs
new file mode 100644
index 0000000..efae7b6
--- /dev/null
+++ b/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs
@@ -0,0 +1,272 @@
+/*!
+A 256-bit vector implementation of the "packed pair" SIMD algorithm.
+
+The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
+difference is that it (by default) uses a background distribution of byte
+frequencies to heuristically select the pair of bytes to search for.
+
+[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
+*/
+
+use core::arch::x86_64::{__m128i, __m256i};
+
+use crate::arch::{all::packedpair::Pair, generic::packedpair};
+
+/// A "packed pair" finder that uses 256-bit vector operations.
+///
+/// This finder picks two bytes that it believes have high predictive power
+/// for indicating an overall match of a needle. Depending on whether
+/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets
+/// where the needle matches or could match. In the prefilter case, candidates
+/// are reported whenever the [`Pair`] of bytes given matches.
+#[derive(Clone, Copy, Debug)]
+pub struct Finder {
+    sse2: packedpair::Finder<__m128i>,
+    avx2: packedpair::Finder<__m256i>,
+}
+
+impl Finder {
+    /// Create a new pair searcher. The searcher returned can either report
+    /// exact matches of `needle` or act as a prefilter and report candidate
+    /// positions of `needle`.
+    ///
+    /// If AVX2 is unavailable in the current environment or if a [`Pair`]
+    /// could not be constructed from the needle given, then `None` is
+    /// returned.
+    #[inline]
+    pub fn new(needle: &[u8]) -> Option<Finder> {
+        Finder::with_pair(needle, Pair::new(needle)?)
+    }
+
+    /// Create a new "packed pair" finder using the pair of bytes given.
+    ///
+    /// This constructor permits callers to control precisely which pair of
+    /// bytes is used as a predicate.
+    ///
+    /// If AVX2 is unavailable in the current environment, then `None` is
+    /// returned.
+    #[inline]
+    pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> {
+        if Finder::is_available() {
+            // SAFETY: we check that sse2/avx2 is available above. We are also
+            // guaranteed to have needle.len() > 1 because we have a valid
+            // Pair.
+            unsafe { Some(Finder::with_pair_impl(needle, pair)) }
+        } else {
+            None
+        }
+    }
+
+    /// Create a new `Finder` specific to SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as the safety for `packedpair::Finder::new`, and callers must also
+    /// ensure that both SSE2 and AVX2 are available.
+    #[target_feature(enable = "sse2", enable = "avx2")]
+    #[inline]
+    unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder {
+        let sse2 = packedpair::Finder::<__m128i>::new(needle, pair);
+        let avx2 = packedpair::Finder::<__m256i>::new(needle, pair);
+        Finder { sse2, avx2 }
+    }
+
+    /// Returns true when this implementation is available in the current
+    /// environment.
+    ///
+    /// When this is true, it is guaranteed that [`Finder::with_pair`] will
+    /// return a `Some` value. Similarly, when it is false, it is guaranteed
+    /// that `Finder::with_pair` will return a `None` value. Notice that this
+    /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely,
+    /// even when `Finder::is_available` is true, it is not guaranteed that a
+    /// valid [`Pair`] can be found from the needle given.
+    ///
+    /// Note also that for the lifetime of a single program, if this returns
+    /// true then it will always return true.
+    #[inline]
+    pub fn is_available() -> bool {
+        #[cfg(not(target_feature = "sse2"))]
+        {
+            false
+        }
+        #[cfg(target_feature = "sse2")]
+        {
+            #[cfg(target_feature = "avx2")]
+            {
+                true
+            }
+            #[cfg(not(target_feature = "avx2"))]
+            {
+                #[cfg(feature = "std")]
+                {
+                    std::is_x86_feature_detected!("avx2")
+                }
+                #[cfg(not(feature = "std"))]
+                {
+                    false
+                }
+            }
+        }
+    }
+
+    /// Execute a search using AVX2 vectors and routines.
+    ///
+    /// # Panics
+    ///
+    /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
+    #[inline]
+    pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
+        // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
+        unsafe { self.find_impl(haystack, needle) }
+    }
+
+    /// Run this finder on the given haystack as a prefilter.
+    ///
+    /// If a candidate match is found, then an offset where the needle *could*
+    /// begin in the haystack is returned.
+    ///
+    /// # Panics
+    ///
+    /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
+    #[inline]
+    pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
+        unsafe { self.find_prefilter_impl(haystack) }
+    }
+
+    /// Execute a search using AVX2 vectors and routines.
+    ///
+    /// # Panics
+    ///
+    /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
+    ///
+    /// # Safety
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Finder`, which can only be constructed
+    /// when it is safe to call `sse2` and `avx2` routines.)
+    #[target_feature(enable = "sse2", enable = "avx2")]
+    #[inline]
+    unsafe fn find_impl(
+        &self,
+        haystack: &[u8],
+        needle: &[u8],
+    ) -> Option<usize> {
+        if haystack.len() < self.avx2.min_haystack_len() {
+            self.sse2.find(haystack, needle)
+        } else {
+            self.avx2.find(haystack, needle)
+        }
+    }
+
+    /// Execute a prefilter search using AVX2 vectors and routines.
+    ///
+    /// # Panics
+    ///
+    /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
+    ///
+    /// # Safety
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Finder`, which can only be constructed
+    /// when it is safe to call `sse2` and `avx2` routines.)
+    #[target_feature(enable = "sse2", enable = "avx2")]
+    #[inline]
+    unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> {
+        if haystack.len() < self.avx2.min_haystack_len() {
+            self.sse2.find_prefilter(haystack)
+        } else {
+            self.avx2.find_prefilter(haystack)
+        }
+    }
+
+    /// Returns the pair of offsets (into the needle) used to check as a
+    /// predicate before confirming whether a needle exists at a particular
+    /// position.
+    #[inline]
+    pub fn pair(&self) -> &Pair {
+        self.avx2.pair()
+    }
+
+    /// Returns the minimum haystack length that this `Finder` can search.
+    ///
+    /// Using a haystack with length smaller than this in a search will result
+    /// in a panic. The reason for this restriction is that this finder is
+    /// meant to be a low-level component that is part of a larger substring
+    /// strategy. In that sense, it avoids trying to handle all cases and
+    /// instead only handles the cases that it can handle very well.
+    #[inline]
+    pub fn min_haystack_len(&self) -> usize {
+        // The caller doesn't need to care about AVX2's min_haystack_len
+        // since this implementation will automatically switch to the SSE2
+        // implementation if the haystack is too short for AVX2. Therefore, the
+        // caller only needs to care about SSE2's min_haystack_len.
+        //
+        // This does assume that SSE2's min_haystack_len is less than or
+        // equal to AVX2's min_haystack_len. In practice, this is true and
+        // there is no way it could be false based on how this Finder is
+        // implemented. Namely, both SSE2 and AVX2 use the same `Pair`. If
+        // they used different pairs, then it's possible (although perhaps
+        // pathological) for SSE2's min_haystack_len to be bigger than AVX2's.
+        self.sse2.min_haystack_len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> {
+        let f = Finder::new(needle)?;
+        if haystack.len() < f.min_haystack_len() {
+            return None;
+        }
+        Some(f.find(haystack, needle))
+    }
+
+    define_substring_forward_quickcheck!(find);
+
+    #[test]
+    fn forward_substring() {
+        crate::tests::substring::Runner::new().fwd(find).run()
+    }
+
+    #[test]
+    fn forward_packedpair() {
+        fn find(
+            haystack: &[u8],
+            needle: &[u8],
+            index1: u8,
+            index2: u8,
+        ) -> Option<Option<usize>> {
+            let pair = Pair::with_indices(needle, index1, index2)?;
+            let f = Finder::with_pair(needle, pair)?;
+            if haystack.len() < f.min_haystack_len() {
+                return None;
+            }
+            Some(f.find(haystack, needle))
+        }
+        crate::tests::packedpair::Runner::new().fwd(find).run()
+    }
+
+    #[test]
+    fn forward_packedpair_prefilter() {
+        fn find(
+            haystack: &[u8],
+            needle: &[u8],
+            index1: u8,
+            index2: u8,
+        ) -> Option<Option<usize>> {
+            if !cfg!(target_feature = "sse2") {
+                return None;
+            }
+            let pair = Pair::with_indices(needle, index1, index2)?;
+            let f = Finder::with_pair(needle, pair)?;
+            if haystack.len() < f.min_haystack_len() {
+                return None;
+            }
+            Some(f.find_prefilter(haystack))
+        }
+        crate::tests::packedpair::Runner::new().fwd(find).run()
+    }
+}
diff --git a/vendor/memchr/src/arch/x86_64/memchr.rs b/vendor/memchr/src/arch/x86_64/memchr.rs
new file mode 100644
index 0000000..fcb1399
--- /dev/null
+++ b/vendor/memchr/src/arch/x86_64/memchr.rs
@@ -0,0 +1,335 @@
+/*!
+Wrapper routines for `memchr` and friends.
+
+These routines efficiently dispatch to the best implementation based on what
+the CPU supports.
+*/
+
+/// Provides a way to run a memchr-like function while amortizing the cost of
+/// runtime CPU feature detection.
+///
+/// This works by loading a function pointer from an atomic global. Initially,
+/// this global is set to a function that does CPU feature detection. For
+/// example, if AVX2 is enabled, then the AVX2 implementation is used.
+/// Otherwise, at least on x86_64, the SSE2 implementation is used. (And
+/// in some niche cases, if SSE2 isn't available, then the architecture
+/// independent fallback implementation is used.)
+///
+/// After the first call to this function, the atomic global is replaced with
+/// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then
+/// will directly call the chosen routine instead of needing to go through the
+/// CPU feature detection branching again.
+///
+/// This particular macro is specifically written to provide the implementation
+/// of functions with the following signature:
+///
+/// ```ignore
+/// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option<usize>;
+/// ```
+///
+/// Where you can also have `memchr2` and `memchr3`, but with `needle2` and
+/// `needle3`, respectively. The `start` and `end` parameters correspond to the
+/// start and end of the haystack, respectively.
+///
+/// We use raw pointers here instead of the more obvious `haystack: &[u8]` so
+/// that the function is compatible with our lower level iterator logic that
+/// operates on raw pointers. We use this macro to implement "raw" memchr
+/// routines with the signature above, and then define memchr routines using
+/// regular slices on top of them.
+///
+/// Note that we use `#[cfg(target_feature = "sse2")]` below even though
+/// it shouldn't be strictly necessary because without it, it seems to
+/// cause the compiler to blow up. I guess it can't handle a function
+/// pointer being created with a sse target feature? Dunno. See the
+/// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with
+/// this.
+///
+/// # Safety
+///
+/// Primarily callers must that `$fnty` is a correct function pointer type and
+/// not something else.
+///
+/// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a
+/// routine that returns a valid function pointer when a match is found. That
+/// is, a pointer that is `>= start` and `< end`.
+///
+/// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers
+/// correspond to valid pointers.
+macro_rules! unsafe_ifunc {
+    (
+        $memchrty:ident,
+        $memchrfind:ident,
+        $fnty:ty,
+        $retty:ty,
+        $hay_start:ident,
+        $hay_end:ident,
+        $($needle:ident),+
+    ) => {{
+        #![allow(unused_unsafe)]
+
+        use core::sync::atomic::{AtomicPtr, Ordering};
+
+        type Fn = *mut ();
+        type RealFn = $fnty;
+        static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn);
+
+        #[cfg(target_feature = "sse2")]
+        #[target_feature(enable = "sse2", enable = "avx2")]
+        unsafe fn find_avx2(
+            $($needle: u8),+,
+            $hay_start: *const u8,
+            $hay_end: *const u8,
+        ) -> $retty {
+            use crate::arch::x86_64::avx2::memchr::$memchrty;
+            $memchrty::new_unchecked($($needle),+)
+                .$memchrfind($hay_start, $hay_end)
+        }
+
+        #[cfg(target_feature = "sse2")]
+        #[target_feature(enable = "sse2")]
+        unsafe fn find_sse2(
+            $($needle: u8),+,
+            $hay_start: *const u8,
+            $hay_end: *const u8,
+        ) -> $retty {
+            use crate::arch::x86_64::sse2::memchr::$memchrty;
+            $memchrty::new_unchecked($($needle),+)
+                .$memchrfind($hay_start, $hay_end)
+        }
+
+        unsafe fn find_fallback(
+            $($needle: u8),+,
+            $hay_start: *const u8,
+            $hay_end: *const u8,
+        ) -> $retty {
+            use crate::arch::all::memchr::$memchrty;
+            $memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end)
+        }
+
+        unsafe fn detect(
+            $($needle: u8),+,
+            $hay_start: *const u8,
+            $hay_end: *const u8,
+        ) -> $retty {
+            let fun = {
+                #[cfg(not(target_feature = "sse2"))]
+                {
+                    debug!(
+                        "no sse2 feature available, using fallback for {}",
+                        stringify!($memchrty),
+                    );
+                    find_fallback as RealFn
+                }
+                #[cfg(target_feature = "sse2")]
+                {
+                    use crate::arch::x86_64::{sse2, avx2};
+                    if avx2::memchr::$memchrty::is_available() {
+                        debug!("chose AVX2 for {}", stringify!($memchrty));
+                        find_avx2 as RealFn
+                    } else if sse2::memchr::$memchrty::is_available() {
+                        debug!("chose SSE2 for {}", stringify!($memchrty));
+                        find_sse2 as RealFn
+                    } else {
+                        debug!("chose fallback for {}", stringify!($memchrty));
+                        find_fallback as RealFn
+                    }
+                }
+            };
+            FN.store(fun as Fn, Ordering::Relaxed);
+            // SAFETY: The only thing we need to uphold here is the
+            // `#[target_feature]` requirements. Since we check is_available
+            // above before using the corresponding implementation, we are
+            // guaranteed to only call code that is supported on the current
+            // CPU.
+            fun($($needle),+, $hay_start, $hay_end)
+        }
+
+        // SAFETY: By virtue of the caller contract, RealFn is a function
+        // pointer, which is always safe to transmute with a *mut (). Also,
+        // since we use $memchrty::is_available, it is guaranteed to be safe
+        // to call $memchrty::$memchrfind.
+        unsafe {
+            let fun = FN.load(Ordering::Relaxed);
+            core::mem::transmute::<Fn, RealFn>(fun)(
+                $($needle),+,
+                $hay_start,
+                $hay_end,
+            )
+        }
+    }};
+}
+
+// The routines below dispatch to AVX2, SSE2 or a fallback routine based on
+// what's available in the current environment. The secret sauce here is that
+// we only check for which one to use approximately once, and then "cache" that
+// choice into a global function pointer. Subsequent invocations then just call
+// the appropriate function directly.
+
+/// memchr, but using raw pointers to represent the haystack.
+///
+/// # Safety
+///
+/// Pointers must be valid. See `One::find_raw`.
+#[inline(always)]
+pub(crate) fn memchr_raw(
+    n1: u8,
+    start: *const u8,
+    end: *const u8,
+) -> Option<*const u8> {
+    // SAFETY: We provide a valid function pointer type.
+    unsafe_ifunc!(
+        One,
+        find_raw,
+        unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
+        Option<*const u8>,
+        start,
+        end,
+        n1
+    )
+}
+
+/// memrchr, but using raw pointers to represent the haystack.
+///
+/// # Safety
+///
+/// Pointers must be valid. See `One::rfind_raw`.
+#[inline(always)]
+pub(crate) fn memrchr_raw(
+    n1: u8,
+    start: *const u8,
+    end: *const u8,
+) -> Option<*const u8> {
+    // SAFETY: We provide a valid function pointer type.
+    unsafe_ifunc!(
+        One,
+        rfind_raw,
+        unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
+        Option<*const u8>,
+        start,
+        end,
+        n1
+    )
+}
+
+/// memchr2, but using raw pointers to represent the haystack.
+///
+/// # Safety
+///
+/// Pointers must be valid. See `Two::find_raw`.
+#[inline(always)]
+pub(crate) fn memchr2_raw(
+    n1: u8,
+    n2: u8,
+    start: *const u8,
+    end: *const u8,
+) -> Option<*const u8> {
+    // SAFETY: We provide a valid function pointer type.
+    unsafe_ifunc!(
+        Two,
+        find_raw,
+        unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
+        Option<*const u8>,
+        start,
+        end,
+        n1,
+        n2
+    )
+}
+
+/// memrchr2, but using raw pointers to represent the haystack.
+///
+/// # Safety
+///
+/// Pointers must be valid. See `Two::rfind_raw`.
+#[inline(always)]
+pub(crate) fn memrchr2_raw(
+    n1: u8,
+    n2: u8,
+    start: *const u8,
+    end: *const u8,
+) -> Option<*const u8> {
+    // SAFETY: We provide a valid function pointer type.
+    unsafe_ifunc!(
+        Two,
+        rfind_raw,
+        unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
+        Option<*const u8>,
+        start,
+        end,
+        n1,
+        n2
+    )
+}
+
+/// memchr3, but using raw pointers to represent the haystack.
+///
+/// # Safety
+///
+/// Pointers must be valid. See `Three::find_raw`.
+#[inline(always)]
+pub(crate) fn memchr3_raw(
+    n1: u8,
+    n2: u8,
+    n3: u8,
+    start: *const u8,
+    end: *const u8,
+) -> Option<*const u8> {
+    // SAFETY: We provide a valid function pointer type.
+    unsafe_ifunc!(
+        Three,
+        find_raw,
+        unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
+        Option<*const u8>,
+        start,
+        end,
+        n1,
+        n2,
+        n3
+    )
+}
+
+/// memrchr3, but using raw pointers to represent the haystack.
+///
+/// # Safety
+///
+/// Pointers must be valid. See `Three::rfind_raw`.
+#[inline(always)]
+pub(crate) fn memrchr3_raw(
+    n1: u8,
+    n2: u8,
+    n3: u8,
+    start: *const u8,
+    end: *const u8,
+) -> Option<*const u8> {
+    // SAFETY: We provide a valid function pointer type.
+    unsafe_ifunc!(
+        Three,
+        rfind_raw,
+        unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
+        Option<*const u8>,
+        start,
+        end,
+        n1,
+        n2,
+        n3
+    )
+}
+
+/// Count all matching bytes, but using raw pointers to represent the haystack.
+///
+/// # Safety
+///
+/// Pointers must be valid. See `One::count_raw`.
+#[inline(always)]
+pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize {
+    // SAFETY: We provide a valid function pointer type.
+    unsafe_ifunc!(
+        One,
+        count_raw,
+        unsafe fn(u8, *const u8, *const u8) -> usize,
+        usize,
+        start,
+        end,
+        n1
+    )
+}
diff --git a/vendor/memchr/src/arch/x86_64/mod.rs b/vendor/memchr/src/arch/x86_64/mod.rs
new file mode 100644
index 0000000..5dad721
--- /dev/null
+++ b/vendor/memchr/src/arch/x86_64/mod.rs
@@ -0,0 +1,8 @@
+/*!
+Vector algorithms for the `x86_64` target.
+*/
+
+pub mod avx2;
+pub mod sse2;
+
+pub(crate) mod memchr;
diff --git a/vendor/memchr/src/arch/x86_64/sse2/memchr.rs b/vendor/memchr/src/arch/x86_64/sse2/memchr.rs
new file mode 100644
index 0000000..c6f75df
--- /dev/null
+++ b/vendor/memchr/src/arch/x86_64/sse2/memchr.rs
@@ -0,0 +1,1077 @@
+/*!
+This module defines 128-bit vector implementations of `memchr` and friends.
+
+The main types in this module are [`One`], [`Two`] and [`Three`]. They are for
+searching for one, two or three distinct bytes, respectively, in a haystack.
+Each type also has corresponding double ended iterators. These searchers are
+typically much faster than scalar routines accomplishing the same task.
+
+The `One` searcher also provides a [`One::count`] routine for efficiently
+counting the number of times a single byte occurs in a haystack. This is
+useful, for example, for counting the number of lines in a haystack. This
+routine exists because it is usually faster, especially with a high match
+count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its
+`Iterator::count` implementation to use this routine.)
+
+Only one, two and three bytes are supported because three bytes is about
+the point where one sees diminishing returns. Beyond this point and it's
+probably (but not necessarily) better to just use a simple `[bool; 256]` array
+or similar. However, it depends mightily on the specific work-load and the
+expected match frequency.
+*/
+
+use core::arch::x86_64::__m128i;
+
+use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector};
+
+/// Finds all occurrences of a single byte in a haystack.
+#[derive(Clone, Copy, Debug)]
+pub struct One(generic::One<__m128i>);
+
+impl One {
+    /// Create a new searcher that finds occurrences of the needle byte given.
+    ///
+    /// This particular searcher is specialized to use SSE2 vector instructions
+    /// that typically make it quite fast.
+    ///
+    /// If SSE2 is unavailable in the current environment, then `None` is
+    /// returned.
+    #[inline]
+    pub fn new(needle: u8) -> Option<One> {
+        if One::is_available() {
+            // SAFETY: we check that sse2 is available above.
+            unsafe { Some(One::new_unchecked(needle)) }
+        } else {
+            None
+        }
+    }
+
+    /// Create a new finder specific to SSE2 vectors and routines without
+    /// checking that SSE2 is available.
+    ///
+    /// # Safety
+    ///
+    /// Callers must guarantee that it is safe to execute `sse2` instructions
+    /// in the current environment.
+    ///
+    /// Note that it is a common misconception that if one compiles for an
+    /// `x86_64` target, then they therefore automatically have access to SSE2
+    /// instructions. While this is almost always the case, it isn't true in
+    /// 100% of cases.
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    pub unsafe fn new_unchecked(needle: u8) -> One {
+        One(generic::One::new(needle))
+    }
+
+    /// Returns true when this implementation is available in the current
+    /// environment.
+    ///
+    /// When this is true, it is guaranteed that [`One::new`] will return
+    /// a `Some` value. Similarly, when it is false, it is guaranteed that
+    /// `One::new` will return a `None` value.
+    ///
+    /// Note also that for the lifetime of a single program, if this returns
+    /// true then it will always return true.
+    #[inline]
+    pub fn is_available() -> bool {
+        #[cfg(target_feature = "sse2")]
+        {
+            true
+        }
+        #[cfg(not(target_feature = "sse2"))]
+        {
+            false
+        }
+    }
+
+    /// Return the first occurrence of one of the needle bytes in the given
+    /// haystack. If no such occurrence exists, then `None` is returned.
+    ///
+    /// The occurrence is reported as an offset into `haystack`. Its maximum
+    /// value is `haystack.len() - 1`.
+    #[inline]
+    pub fn find(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: `find_raw` guarantees that if a pointer is returned, it
+        // falls within the bounds of the start and end pointers.
+        unsafe {
+            generic::search_slice_with_raw(haystack, |s, e| {
+                self.find_raw(s, e)
+            })
+        }
+    }
+
+    /// Return the last occurrence of one of the needle bytes in the given
+    /// haystack. If no such occurrence exists, then `None` is returned.
+    ///
+    /// The occurrence is reported as an offset into `haystack`. Its maximum
+    /// value is `haystack.len() - 1`.
+    #[inline]
+    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
+        // falls within the bounds of the start and end pointers.
+        unsafe {
+            generic::search_slice_with_raw(haystack, |s, e| {
+                self.rfind_raw(s, e)
+            })
+        }
+    }
+
+    /// Counts all occurrences of this byte in the given haystack.
+    #[inline]
+    pub fn count(&self, haystack: &[u8]) -> usize {
+        // SAFETY: All of our pointers are derived directly from a borrowed
+        // slice, which is guaranteed to be valid.
+        unsafe {
+            let start = haystack.as_ptr();
+            let end = start.add(haystack.len());
+            self.count_raw(start, end)
+        }
+    }
+
+    /// Like `find`, but accepts and returns raw pointers.
+    ///
+    /// When a match is found, the pointer returned is guaranteed to be
+    /// `>= start` and `< end`.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `None` will always be returned.
+    #[inline]
+    pub unsafe fn find_raw(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        if start >= end {
+            return None;
+        }
+        if end.distance(start) < __m128i::BYTES {
+            // SAFETY: We require the caller to pass valid start/end pointers.
+            return generic::fwd_byte_by_byte(start, end, |b| {
+                b == self.0.needle1()
+            });
+        }
+        // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
+        // Also, we've checked that our haystack is big enough to run on the
+        // vector routine. Pointer validity is caller's responsibility.
+        //
+        // Note that we could call `self.0.find_raw` directly here. But that
+        // means we'd have to annotate this routine with `target_feature`.
+        // Which is fine, because this routine is `unsafe` anyway and the
+        // `target_feature` obligation is met by virtue of building a `One`.
+        // The real problem is that a routine with a `target_feature`
+        // annotation generally can't be inlined into caller code unless the
+        // caller code has the same target feature annotations. Which is maybe
+        // okay for SSE2, but we do the same thing for AVX2 where caller code
+        // probably usually doesn't have AVX2 enabled. That means that this
+        // routine can be inlined which will handle some of the short-haystack
+        // cases above without touching the architecture specific code.
+        self.find_raw_impl(start, end)
+    }
+
+    /// Like `rfind`, but accepts and returns raw pointers.
+    ///
+    /// When a match is found, the pointer returned is guaranteed to be
+    /// `>= start` and `< end`.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `None` will always be returned.
+    #[inline]
+    pub unsafe fn rfind_raw(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        if start >= end {
+            return None;
+        }
+        if end.distance(start) < __m128i::BYTES {
+            // SAFETY: We require the caller to pass valid start/end pointers.
+            return generic::rev_byte_by_byte(start, end, |b| {
+                b == self.0.needle1()
+            });
+        }
+        // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
+        // Also, we've checked that our haystack is big enough to run on the
+        // vector routine. Pointer validity is caller's responsibility.
+        //
+        // See note in forward routine above for why we don't just call
+        // `self.0.rfind_raw` directly here.
+        self.rfind_raw_impl(start, end)
+    }
+
+    /// Counts all occurrences of this byte in the given haystack represented
+    /// by raw pointers.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `0` will always be returned.
+    #[inline]
+    pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize {
+        if start >= end {
+            return 0;
+        }
+        if end.distance(start) < __m128i::BYTES {
+            // SAFETY: We require the caller to pass valid start/end pointers.
+            return generic::count_byte_by_byte(start, end, |b| {
+                b == self.0.needle1()
+            });
+        }
+        // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
+        // Also, we've checked that our haystack is big enough to run on the
+        // vector routine. Pointer validity is caller's responsibility.
+        self.count_raw_impl(start, end)
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`One::find_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `One`, which can only be constructed
+    /// when it is safe to call `sse2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn find_raw_impl(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.0.find_raw(start, end)
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`One::rfind_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `One`, which can only be constructed
+    /// when it is safe to call `sse2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn rfind_raw_impl(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.0.rfind_raw(start, end)
+    }
+
+    /// Execute a count using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`One::count_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `One`, which can only be constructed
+    /// when it is safe to call `sse2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn count_raw_impl(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> usize {
+        self.0.count_raw(start, end)
+    }
+
+    /// Returns an iterator over all occurrences of the needle byte in the
+    /// given haystack.
+    ///
+    /// The iterator returned implements `DoubleEndedIterator`. This means it
+    /// can also be used to find occurrences in reverse order.
+    #[inline]
+    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> {
+        OneIter { searcher: self, it: generic::Iter::new(haystack) }
+    }
+}
+
+/// An iterator over all occurrences of a single byte in a haystack.
+///
+/// This iterator implements `DoubleEndedIterator`, which means it can also be
+/// used to find occurrences in reverse order.
+///
+/// This iterator is created by the [`One::iter`] method.
+///
+/// The lifetime parameters are as follows:
+///
+/// * `'a` refers to the lifetime of the underlying [`One`] searcher.
+/// * `'h` refers to the lifetime of the haystack being searched.
+#[derive(Clone, Debug)]
+pub struct OneIter<'a, 'h> {
+    searcher: &'a One,
+    it: generic::Iter<'h>,
+}
+
+impl<'a, 'h> Iterator for OneIter<'a, 'h> {
+    type Item = usize;
+
+    #[inline]
+    fn next(&mut self) -> Option<usize> {
+        // SAFETY: We rely on the generic iterator to provide valid start
+        // and end pointers, but we guarantee that any pointer returned by
+        // 'find_raw' falls within the bounds of the start and end pointer.
+        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
+    }
+
+    #[inline]
+    fn count(self) -> usize {
+        self.it.count(|s, e| {
+            // SAFETY: We rely on our generic iterator to return valid start
+            // and end pointers.
+            unsafe { self.searcher.count_raw(s, e) }
+        })
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.it.size_hint()
+    }
+}
+
+impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> {
+    #[inline]
+    fn next_back(&mut self) -> Option<usize> {
+        // SAFETY: We rely on the generic iterator to provide valid start
+        // and end pointers, but we guarantee that any pointer returned by
+        // 'rfind_raw' falls within the bounds of the start and end pointer.
+        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
+    }
+}
+
+impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {}
+
+/// Finds all occurrences of two bytes in a haystack.
+///
+/// That is, this reports matches of one of two possible bytes. For example,
+/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
+/// `4` and `5`.
+#[derive(Clone, Copy, Debug)]
+pub struct Two(generic::Two<__m128i>);
+
+impl Two {
+    /// Create a new searcher that finds occurrences of the needle bytes given.
+    ///
+    /// This particular searcher is specialized to use SSE2 vector instructions
+    /// that typically make it quite fast.
+    ///
+    /// If SSE2 is unavailable in the current environment, then `None` is
+    /// returned.
+    #[inline]
+    pub fn new(needle1: u8, needle2: u8) -> Option<Two> {
+        if Two::is_available() {
+            // SAFETY: we check that sse2 is available above.
+            unsafe { Some(Two::new_unchecked(needle1, needle2)) }
+        } else {
+            None
+        }
+    }
+
+    /// Create a new finder specific to SSE2 vectors and routines without
+    /// checking that SSE2 is available.
+    ///
+    /// # Safety
+    ///
+    /// Callers must guarantee that it is safe to execute `sse2` instructions
+    /// in the current environment.
+    ///
+    /// Note that it is a common misconception that if one compiles for an
+    /// `x86_64` target, then they therefore automatically have access to SSE2
+    /// instructions. While this is almost always the case, it isn't true in
+    /// 100% of cases.
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two {
+        Two(generic::Two::new(needle1, needle2))
+    }
+
+    /// Returns true when this implementation is available in the current
+    /// environment.
+    ///
+    /// When this is true, it is guaranteed that [`Two::new`] will return
+    /// a `Some` value. Similarly, when it is false, it is guaranteed that
+    /// `Two::new` will return a `None` value.
+    ///
+    /// Note also that for the lifetime of a single program, if this returns
+    /// true then it will always return true.
+    #[inline]
+    pub fn is_available() -> bool {
+        #[cfg(target_feature = "sse2")]
+        {
+            true
+        }
+        #[cfg(not(target_feature = "sse2"))]
+        {
+            false
+        }
+    }
+
+    /// Return the first occurrence of one of the needle bytes in the given
+    /// haystack. If no such occurrence exists, then `None` is returned.
+    ///
+    /// The occurrence is reported as an offset into `haystack`. Its maximum
+    /// value is `haystack.len() - 1`.
+    #[inline]
+    pub fn find(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: `find_raw` guarantees that if a pointer is returned, it
+        // falls within the bounds of the start and end pointers.
+        unsafe {
+            generic::search_slice_with_raw(haystack, |s, e| {
+                self.find_raw(s, e)
+            })
+        }
+    }
+
+    /// Return the last occurrence of one of the needle bytes in the given
+    /// haystack. If no such occurrence exists, then `None` is returned.
+    ///
+    /// The occurrence is reported as an offset into `haystack`. Its maximum
+    /// value is `haystack.len() - 1`.
+    #[inline]
+    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
+        // falls within the bounds of the start and end pointers.
+        unsafe {
+            generic::search_slice_with_raw(haystack, |s, e| {
+                self.rfind_raw(s, e)
+            })
+        }
+    }
+
+    /// Like `find`, but accepts and returns raw pointers.
+    ///
+    /// When a match is found, the pointer returned is guaranteed to be
+    /// `>= start` and `< end`.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `None` will always be returned.
+    #[inline]
+    pub unsafe fn find_raw(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        if start >= end {
+            return None;
+        }
+        if end.distance(start) < __m128i::BYTES {
+            // SAFETY: We require the caller to pass valid start/end pointers.
+            return generic::fwd_byte_by_byte(start, end, |b| {
+                b == self.0.needle1() || b == self.0.needle2()
+            });
+        }
+        // SAFETY: Building a `Two` means it's safe to call 'sse2' routines.
+        // Also, we've checked that our haystack is big enough to run on the
+        // vector routine. Pointer validity is caller's responsibility.
+        //
+        // Note that we could call `self.0.find_raw` directly here. But that
+        // means we'd have to annotate this routine with `target_feature`.
+        // Which is fine, because this routine is `unsafe` anyway and the
+        // `target_feature` obligation is met by virtue of building a `Two`.
+        // The real problem is that a routine with a `target_feature`
+        // annotation generally can't be inlined into caller code unless the
+        // caller code has the same target feature annotations. Which is maybe
+        // okay for SSE2, but we do the same thing for AVX2 where caller code
+        // probably usually doesn't have AVX2 enabled. That means that this
+        // routine can be inlined which will handle some of the short-haystack
+        // cases above without touching the architecture specific code.
+        self.find_raw_impl(start, end)
+    }
+
+    /// Like `rfind`, but accepts and returns raw pointers.
+    ///
+    /// When a match is found, the pointer returned is guaranteed to be
+    /// `>= start` and `< end`.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `None` will always be returned.
+    #[inline]
+    pub unsafe fn rfind_raw(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        if start >= end {
+            return None;
+        }
+        if end.distance(start) < __m128i::BYTES {
+            // SAFETY: We require the caller to pass valid start/end pointers.
+            return generic::rev_byte_by_byte(start, end, |b| {
+                b == self.0.needle1() || b == self.0.needle2()
+            });
+        }
+        // SAFETY: Building a `Two` means it's safe to call 'sse2' routines.
+        // Also, we've checked that our haystack is big enough to run on the
+        // vector routine. Pointer validity is caller's responsibility.
+        //
+        // See note in forward routine above for why we don't just call
+        // `self.0.rfind_raw` directly here.
+        self.rfind_raw_impl(start, end)
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`Two::find_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Two`, which can only be constructed
+    /// when it is safe to call `sse2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn find_raw_impl(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.0.find_raw(start, end)
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`Two::rfind_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Two`, which can only be constructed
+    /// when it is safe to call `sse2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn rfind_raw_impl(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.0.rfind_raw(start, end)
+    }
+
+    /// Returns an iterator over all occurrences of the needle bytes in the
+    /// given haystack.
+    ///
+    /// The iterator returned implements `DoubleEndedIterator`. This means it
+    /// can also be used to find occurrences in reverse order.
+    #[inline]
+    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> {
+        TwoIter { searcher: self, it: generic::Iter::new(haystack) }
+    }
+}
+
+/// An iterator over all occurrences of two possible bytes in a haystack.
+///
+/// This iterator implements `DoubleEndedIterator`, which means it can also be
+/// used to find occurrences in reverse order.
+///
+/// This iterator is created by the [`Two::iter`] method.
+///
+/// The lifetime parameters are as follows:
+///
+/// * `'a` refers to the lifetime of the underlying [`Two`] searcher.
+/// * `'h` refers to the lifetime of the haystack being searched.
+#[derive(Clone, Debug)]
+pub struct TwoIter<'a, 'h> {
+    searcher: &'a Two,
+    it: generic::Iter<'h>,
+}
+
+impl<'a, 'h> Iterator for TwoIter<'a, 'h> {
+    type Item = usize;
+
+    #[inline]
+    fn next(&mut self) -> Option<usize> {
+        // SAFETY: We rely on the generic iterator to provide valid start
+        // and end pointers, but we guarantee that any pointer returned by
+        // 'find_raw' falls within the bounds of the start and end pointer.
+        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.it.size_hint()
+    }
+}
+
+impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> {
+    #[inline]
+    fn next_back(&mut self) -> Option<usize> {
+        // SAFETY: We rely on the generic iterator to provide valid start
+        // and end pointers, but we guarantee that any pointer returned by
+        // 'rfind_raw' falls within the bounds of the start and end pointer.
+        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
+    }
+}
+
+impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {}
+
+/// Finds all occurrences of three bytes in a haystack.
+///
+/// That is, this reports matches of one of three possible bytes. For example,
+/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets
+/// `0`, `2`, `3`, `4` and `5`.
+#[derive(Clone, Copy, Debug)]
+pub struct Three(generic::Three<__m128i>);
+
+impl Three {
+    /// Create a new searcher that finds occurrences of the needle bytes given.
+    ///
+    /// This particular searcher is specialized to use SSE2 vector instructions
+    /// that typically make it quite fast.
+    ///
+    /// If SSE2 is unavailable in the current environment, then `None` is
+    /// returned.
+    #[inline]
+    pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> {
+        if Three::is_available() {
+            // SAFETY: we check that sse2 is available above.
+            unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) }
+        } else {
+            None
+        }
+    }
+
+    /// Create a new finder specific to SSE2 vectors and routines without
+    /// checking that SSE2 is available.
+    ///
+    /// # Safety
+    ///
+    /// Callers must guarantee that it is safe to execute `sse2` instructions
+    /// in the current environment.
+    ///
+    /// Note that it is a common misconception that if one compiles for an
+    /// `x86_64` target, then they therefore automatically have access to SSE2
+    /// instructions. While this is almost always the case, it isn't true in
+    /// 100% of cases.
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    pub unsafe fn new_unchecked(
+        needle1: u8,
+        needle2: u8,
+        needle3: u8,
+    ) -> Three {
+        Three(generic::Three::new(needle1, needle2, needle3))
+    }
+
+    /// Returns true when this implementation is available in the current
+    /// environment.
+    ///
+    /// When this is true, it is guaranteed that [`Three::new`] will return
+    /// a `Some` value. Similarly, when it is false, it is guaranteed that
+    /// `Three::new` will return a `None` value.
+    ///
+    /// Note also that for the lifetime of a single program, if this returns
+    /// true then it will always return true.
+    #[inline]
+    pub fn is_available() -> bool {
+        #[cfg(target_feature = "sse2")]
+        {
+            true
+        }
+        #[cfg(not(target_feature = "sse2"))]
+        {
+            false
+        }
+    }
+
+    /// Return the first occurrence of one of the needle bytes in the given
+    /// haystack. If no such occurrence exists, then `None` is returned.
+    ///
+    /// The occurrence is reported as an offset into `haystack`. Its maximum
+    /// value is `haystack.len() - 1`.
+    #[inline]
+    pub fn find(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: `find_raw` guarantees that if a pointer is returned, it
+        // falls within the bounds of the start and end pointers.
+        unsafe {
+            generic::search_slice_with_raw(haystack, |s, e| {
+                self.find_raw(s, e)
+            })
+        }
+    }
+
+    /// Return the last occurrence of one of the needle bytes in the given
+    /// haystack. If no such occurrence exists, then `None` is returned.
+    ///
+    /// The occurrence is reported as an offset into `haystack`. Its maximum
+    /// value is `haystack.len() - 1`.
+    #[inline]
+    pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
+        // falls within the bounds of the start and end pointers.
+        unsafe {
+            generic::search_slice_with_raw(haystack, |s, e| {
+                self.rfind_raw(s, e)
+            })
+        }
+    }
+
+    /// Like `find`, but accepts and returns raw pointers.
+    ///
+    /// When a match is found, the pointer returned is guaranteed to be
+    /// `>= start` and `< end`.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `None` will always be returned.
+    #[inline]
+    pub unsafe fn find_raw(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        if start >= end {
+            return None;
+        }
+        if end.distance(start) < __m128i::BYTES {
+            // SAFETY: We require the caller to pass valid start/end pointers.
+            return generic::fwd_byte_by_byte(start, end, |b| {
+                b == self.0.needle1()
+                    || b == self.0.needle2()
+                    || b == self.0.needle3()
+            });
+        }
+        // SAFETY: Building a `Three` means it's safe to call 'sse2' routines.
+        // Also, we've checked that our haystack is big enough to run on the
+        // vector routine. Pointer validity is caller's responsibility.
+        //
+        // Note that we could call `self.0.find_raw` directly here. But that
+        // means we'd have to annotate this routine with `target_feature`.
+        // Which is fine, because this routine is `unsafe` anyway and the
+        // `target_feature` obligation is met by virtue of building a `Three`.
+        // The real problem is that a routine with a `target_feature`
+        // annotation generally can't be inlined into caller code unless the
+        // caller code has the same target feature annotations. Which is maybe
+        // okay for SSE2, but we do the same thing for AVX2 where caller code
+        // probably usually doesn't have AVX2 enabled. That means that this
+        // routine can be inlined which will handle some of the short-haystack
+        // cases above without touching the architecture specific code.
+        self.find_raw_impl(start, end)
+    }
+
+    /// Like `rfind`, but accepts and returns raw pointers.
+    ///
+    /// When a match is found, the pointer returned is guaranteed to be
+    /// `>= start` and `< end`.
+    ///
+    /// This routine is useful if you're already using raw pointers and would
+    /// like to avoid converting back to a slice before executing a search.
+    ///
+    /// # Safety
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    ///
+    /// Note that callers may pass a pair of pointers such that `start >= end`.
+    /// In that case, `None` will always be returned.
+    #[inline]
+    pub unsafe fn rfind_raw(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        if start >= end {
+            return None;
+        }
+        if end.distance(start) < __m128i::BYTES {
+            // SAFETY: We require the caller to pass valid start/end pointers.
+            return generic::rev_byte_by_byte(start, end, |b| {
+                b == self.0.needle1()
+                    || b == self.0.needle2()
+                    || b == self.0.needle3()
+            });
+        }
+        // SAFETY: Building a `Three` means it's safe to call 'sse2' routines.
+        // Also, we've checked that our haystack is big enough to run on the
+        // vector routine. Pointer validity is caller's responsibility.
+        //
+        // See note in forward routine above for why we don't just call
+        // `self.0.rfind_raw` directly here.
+        self.rfind_raw_impl(start, end)
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`Three::find_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Three`, which can only be constructed
+    /// when it is safe to call `sse2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn find_raw_impl(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.0.find_raw(start, end)
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as [`Three::rfind_raw`], except the distance between `start` and
+    /// `end` must be at least the size of an SSE2 vector (in bytes).
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Three`, which can only be constructed
+    /// when it is safe to call `sse2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn rfind_raw_impl(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<*const u8> {
+        self.0.rfind_raw(start, end)
+    }
+
+    /// Returns an iterator over all occurrences of the needle byte in the
+    /// given haystack.
+    ///
+    /// The iterator returned implements `DoubleEndedIterator`. This means it
+    /// can also be used to find occurrences in reverse order.
+    #[inline]
+    pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> {
+        ThreeIter { searcher: self, it: generic::Iter::new(haystack) }
+    }
+}
+
+/// An iterator over all occurrences of three possible bytes in a haystack.
+///
+/// This iterator implements `DoubleEndedIterator`, which means it can also be
+/// used to find occurrences in reverse order.
+///
+/// This iterator is created by the [`Three::iter`] method.
+///
+/// The lifetime parameters are as follows:
+///
+/// * `'a` refers to the lifetime of the underlying [`Three`] searcher.
+/// * `'h` refers to the lifetime of the haystack being searched.
+#[derive(Clone, Debug)]
+pub struct ThreeIter<'a, 'h> {
+    searcher: &'a Three,
+    it: generic::Iter<'h>,
+}
+
+impl<'a, 'h> Iterator for ThreeIter<'a, 'h> {
+    type Item = usize;
+
+    #[inline]
+    fn next(&mut self) -> Option<usize> {
+        // SAFETY: We rely on the generic iterator to provide valid start
+        // and end pointers, but we guarantee that any pointer returned by
+        // 'find_raw' falls within the bounds of the start and end pointer.
+        unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.it.size_hint()
+    }
+}
+
+impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> {
+    #[inline]
+    fn next_back(&mut self) -> Option<usize> {
+        // SAFETY: We rely on the generic iterator to provide valid start
+        // and end pointers, but we guarantee that any pointer returned by
+        // 'rfind_raw' falls within the bounds of the start and end pointer.
+        unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
+    }
+}
+
+impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    define_memchr_quickcheck!(super);
+
+    #[test]
+    fn forward_one() {
+        crate::tests::memchr::Runner::new(1).forward_iter(
+            |haystack, needles| {
+                Some(One::new(needles[0])?.iter(haystack).collect())
+            },
+        )
+    }
+
+    #[test]
+    fn reverse_one() {
+        crate::tests::memchr::Runner::new(1).reverse_iter(
+            |haystack, needles| {
+                Some(One::new(needles[0])?.iter(haystack).rev().collect())
+            },
+        )
+    }
+
+    #[test]
+    fn count_one() {
+        crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| {
+            Some(One::new(needles[0])?.iter(haystack).count())
+        })
+    }
+
+    #[test]
+    fn forward_two() {
+        crate::tests::memchr::Runner::new(2).forward_iter(
+            |haystack, needles| {
+                let n1 = needles.get(0).copied()?;
+                let n2 = needles.get(1).copied()?;
+                Some(Two::new(n1, n2)?.iter(haystack).collect())
+            },
+        )
+    }
+
+    #[test]
+    fn reverse_two() {
+        crate::tests::memchr::Runner::new(2).reverse_iter(
+            |haystack, needles| {
+                let n1 = needles.get(0).copied()?;
+                let n2 = needles.get(1).copied()?;
+                Some(Two::new(n1, n2)?.iter(haystack).rev().collect())
+            },
+        )
+    }
+
+    #[test]
+    fn forward_three() {
+        crate::tests::memchr::Runner::new(3).forward_iter(
+            |haystack, needles| {
+                let n1 = needles.get(0).copied()?;
+                let n2 = needles.get(1).copied()?;
+                let n3 = needles.get(2).copied()?;
+                Some(Three::new(n1, n2, n3)?.iter(haystack).collect())
+            },
+        )
+    }
+
+    #[test]
+    fn reverse_three() {
+        crate::tests::memchr::Runner::new(3).reverse_iter(
+            |haystack, needles| {
+                let n1 = needles.get(0).copied()?;
+                let n2 = needles.get(1).copied()?;
+                let n3 = needles.get(2).copied()?;
+                Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect())
+            },
+        )
+    }
+}
diff --git a/vendor/memchr/src/arch/x86_64/sse2/mod.rs b/vendor/memchr/src/arch/x86_64/sse2/mod.rs
new file mode 100644
index 0000000..bcb8307
--- /dev/null
+++ b/vendor/memchr/src/arch/x86_64/sse2/mod.rs
@@ -0,0 +1,6 @@
+/*!
+Algorithms for the `x86_64` target using 128-bit vectors via SSE2.
+*/
+
+pub mod memchr;
+pub mod packedpair;
diff --git a/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs b/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs
new file mode 100644
index 0000000..c8b5b99
--- /dev/null
+++ b/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs
@@ -0,0 +1,232 @@
+/*!
+A 128-bit vector implementation of the "packed pair" SIMD algorithm.
+
+The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
+difference is that it (by default) uses a background distribution of byte
+frequencies to heuristically select the pair of bytes to search for.
+
+[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
+*/
+
+use core::arch::x86_64::__m128i;
+
+use crate::arch::{all::packedpair::Pair, generic::packedpair};
+
+/// A "packed pair" finder that uses 128-bit vector operations.
+///
+/// This finder picks two bytes that it believes have high predictive power
+/// for indicating an overall match of a needle. Depending on whether
+/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets
+/// where the needle matches or could match. In the prefilter case, candidates
+/// are reported whenever the [`Pair`] of bytes given matches.
+#[derive(Clone, Copy, Debug)]
+pub struct Finder(packedpair::Finder<__m128i>);
+
+impl Finder {
+    /// Create a new pair searcher. The searcher returned can either report
+    /// exact matches of `needle` or act as a prefilter and report candidate
+    /// positions of `needle`.
+    ///
+    /// If SSE2 is unavailable in the current environment or if a [`Pair`]
+    /// could not be constructed from the needle given, then `None` is
+    /// returned.
+    #[inline]
+    pub fn new(needle: &[u8]) -> Option<Finder> {
+        Finder::with_pair(needle, Pair::new(needle)?)
+    }
+
+    /// Create a new "packed pair" finder using the pair of bytes given.
+    ///
+    /// This constructor permits callers to control precisely which pair of
+    /// bytes is used as a predicate.
+    ///
+    /// If SSE2 is unavailable in the current environment, then `None` is
+    /// returned.
+    #[inline]
+    pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> {
+        if Finder::is_available() {
+            // SAFETY: we check that sse2 is available above. We are also
+            // guaranteed to have needle.len() > 1 because we have a valid
+            // Pair.
+            unsafe { Some(Finder::with_pair_impl(needle, pair)) }
+        } else {
+            None
+        }
+    }
+
+    /// Create a new `Finder` specific to SSE2 vectors and routines.
+    ///
+    /// # Safety
+    ///
+    /// Same as the safety for `packedpair::Finder::new`, and callers must also
+    /// ensure that SSE2 is available.
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder {
+        let finder = packedpair::Finder::<__m128i>::new(needle, pair);
+        Finder(finder)
+    }
+
+    /// Returns true when this implementation is available in the current
+    /// environment.
+    ///
+    /// When this is true, it is guaranteed that [`Finder::with_pair`] will
+    /// return a `Some` value. Similarly, when it is false, it is guaranteed
+    /// that `Finder::with_pair` will return a `None` value. Notice that this
+    /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely,
+    /// even when `Finder::is_available` is true, it is not guaranteed that a
+    /// valid [`Pair`] can be found from the needle given.
+    ///
+    /// Note also that for the lifetime of a single program, if this returns
+    /// true then it will always return true.
+    #[inline]
+    pub fn is_available() -> bool {
+        #[cfg(not(target_feature = "sse2"))]
+        {
+            false
+        }
+        #[cfg(target_feature = "sse2")]
+        {
+            true
+        }
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Panics
+    ///
+    /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
+    #[inline]
+    pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
+        // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
+        unsafe { self.find_impl(haystack, needle) }
+    }
+
+    /// Run this finder on the given haystack as a prefilter.
+    ///
+    /// If a candidate match is found, then an offset where the needle *could*
+    /// begin in the haystack is returned.
+    ///
+    /// # Panics
+    ///
+    /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
+    #[inline]
+    pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> {
+        // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
+        unsafe { self.find_prefilter_impl(haystack) }
+    }
+
+    /// Execute a search using SSE2 vectors and routines.
+    ///
+    /// # Panics
+    ///
+    /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
+    ///
+    /// # Safety
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Finder`, which can only be constructed
+    /// when it is safe to call `sse2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn find_impl(
+        &self,
+        haystack: &[u8],
+        needle: &[u8],
+    ) -> Option<usize> {
+        self.0.find(haystack, needle)
+    }
+
+    /// Execute a prefilter search using SSE2 vectors and routines.
+    ///
+    /// # Panics
+    ///
+    /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
+    ///
+    /// # Safety
+    ///
+    /// (The target feature safety obligation is automatically fulfilled by
+    /// virtue of being a method on `Finder`, which can only be constructed
+    /// when it is safe to call `sse2` routines.)
+    #[target_feature(enable = "sse2")]
+    #[inline]
+    unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> {
+        self.0.find_prefilter(haystack)
+    }
+
+    /// Returns the pair of offsets (into the needle) used to check as a
+    /// predicate before confirming whether a needle exists at a particular
+    /// position.
+    #[inline]
+    pub fn pair(&self) -> &Pair {
+        self.0.pair()
+    }
+
+    /// Returns the minimum haystack length that this `Finder` can search.
+    ///
+    /// Using a haystack with length smaller than this in a search will result
+    /// in a panic. The reason for this restriction is that this finder is
+    /// meant to be a low-level component that is part of a larger substring
+    /// strategy. In that sense, it avoids trying to handle all cases and
+    /// instead only handles the cases that it can handle very well.
+    #[inline]
+    pub fn min_haystack_len(&self) -> usize {
+        self.0.min_haystack_len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> {
+        let f = Finder::new(needle)?;
+        if haystack.len() < f.min_haystack_len() {
+            return None;
+        }
+        Some(f.find(haystack, needle))
+    }
+
+    define_substring_forward_quickcheck!(find);
+
+    #[test]
+    fn forward_substring() {
+        crate::tests::substring::Runner::new().fwd(find).run()
+    }
+
+    #[test]
+    fn forward_packedpair() {
+        fn find(
+            haystack: &[u8],
+            needle: &[u8],
+            index1: u8,
+            index2: u8,
+        ) -> Option<Option<usize>> {
+            let pair = Pair::with_indices(needle, index1, index2)?;
+            let f = Finder::with_pair(needle, pair)?;
+            if haystack.len() < f.min_haystack_len() {
+                return None;
+            }
+            Some(f.find(haystack, needle))
+        }
+        crate::tests::packedpair::Runner::new().fwd(find).run()
+    }
+
+    #[test]
+    fn forward_packedpair_prefilter() {
+        fn find(
+            haystack: &[u8],
+            needle: &[u8],
+            index1: u8,
+            index2: u8,
+        ) -> Option<Option<usize>> {
+            let pair = Pair::with_indices(needle, index1, index2)?;
+            let f = Finder::with_pair(needle, pair)?;
+            if haystack.len() < f.min_haystack_len() {
+                return None;
+            }
+            Some(f.find_prefilter(haystack))
+        }
+        crate::tests::packedpair::Runner::new().fwd(find).run()
+    }
+}
author	Valentin Popov <valentin@popov.link>	2024-01-08 00:21:28 +0300
committer	Valentin Popov <valentin@popov.link>	2024-01-08 00:21:28 +0300
commit	1b6a04ca5504955c571d1c97504fb45ea0befee4 (patch)
tree	7579f518b23313e8a9748a88ab6173d5e030b227 /vendor/memchr/src/arch/x86_64
parent	5ecd8cf2cba827454317368b68571df0d13d7842 (diff)
download	fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.tar.xz fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.zip