aboutsummaryrefslogtreecommitdiff
path: root/vendor/memchr/src/arch/x86_64/sse2
diff options
context:
space:
mode:
authorValentin Popov <valentin@popov.link>2024-07-19 15:37:58 +0300
committerValentin Popov <valentin@popov.link>2024-07-19 15:37:58 +0300
commita990de90fe41456a23e58bd087d2f107d321f3a1 (patch)
tree15afc392522a9e85dc3332235e311b7d39352ea9 /vendor/memchr/src/arch/x86_64/sse2
parent3d48cd3f81164bbfc1a755dc1d4a9a02f98c8ddd (diff)
downloadfparkan-a990de90fe41456a23e58bd087d2f107d321f3a1.tar.xz
fparkan-a990de90fe41456a23e58bd087d2f107d321f3a1.zip
Deleted vendor folder
Diffstat (limited to 'vendor/memchr/src/arch/x86_64/sse2')
-rw-r--r--vendor/memchr/src/arch/x86_64/sse2/memchr.rs1077
-rw-r--r--vendor/memchr/src/arch/x86_64/sse2/mod.rs6
-rw-r--r--vendor/memchr/src/arch/x86_64/sse2/packedpair.rs232
3 files changed, 0 insertions, 1315 deletions
diff --git a/vendor/memchr/src/arch/x86_64/sse2/memchr.rs b/vendor/memchr/src/arch/x86_64/sse2/memchr.rs
deleted file mode 100644
index c6f75df..0000000
--- a/vendor/memchr/src/arch/x86_64/sse2/memchr.rs
+++ /dev/null
@@ -1,1077 +0,0 @@
-/*!
-This module defines 128-bit vector implementations of `memchr` and friends.
-
-The main types in this module are [`One`], [`Two`] and [`Three`]. They are for
-searching for one, two or three distinct bytes, respectively, in a haystack.
-Each type also has corresponding double ended iterators. These searchers are
-typically much faster than scalar routines accomplishing the same task.
-
-The `One` searcher also provides a [`One::count`] routine for efficiently
-counting the number of times a single byte occurs in a haystack. This is
-useful, for example, for counting the number of lines in a haystack. This
-routine exists because it is usually faster, especially with a high match
-count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its
-`Iterator::count` implementation to use this routine.)
-
-Only one, two and three bytes are supported because three bytes is about
-the point where one sees diminishing returns. Beyond this point and it's
-probably (but not necessarily) better to just use a simple `[bool; 256]` array
-or similar. However, it depends mightily on the specific work-load and the
-expected match frequency.
-*/
-
-use core::arch::x86_64::__m128i;
-
-use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector};
-
-/// Finds all occurrences of a single byte in a haystack.
-#[derive(Clone, Copy, Debug)]
-pub struct One(generic::One<__m128i>);
-
-impl One {
- /// Create a new searcher that finds occurrences of the needle byte given.
- ///
- /// This particular searcher is specialized to use SSE2 vector instructions
- /// that typically make it quite fast.
- ///
- /// If SSE2 is unavailable in the current environment, then `None` is
- /// returned.
- #[inline]
- pub fn new(needle: u8) -> Option<One> {
- if One::is_available() {
- // SAFETY: we check that sse2 is available above.
- unsafe { Some(One::new_unchecked(needle)) }
- } else {
- None
- }
- }
-
- /// Create a new finder specific to SSE2 vectors and routines without
- /// checking that SSE2 is available.
- ///
- /// # Safety
- ///
- /// Callers must guarantee that it is safe to execute `sse2` instructions
- /// in the current environment.
- ///
- /// Note that it is a common misconception that if one compiles for an
- /// `x86_64` target, then they therefore automatically have access to SSE2
- /// instructions. While this is almost always the case, it isn't true in
- /// 100% of cases.
- #[target_feature(enable = "sse2")]
- #[inline]
- pub unsafe fn new_unchecked(needle: u8) -> One {
- One(generic::One::new(needle))
- }
-
- /// Returns true when this implementation is available in the current
- /// environment.
- ///
- /// When this is true, it is guaranteed that [`One::new`] will return
- /// a `Some` value. Similarly, when it is false, it is guaranteed that
- /// `One::new` will return a `None` value.
- ///
- /// Note also that for the lifetime of a single program, if this returns
- /// true then it will always return true.
- #[inline]
- pub fn is_available() -> bool {
- #[cfg(target_feature = "sse2")]
- {
- true
- }
- #[cfg(not(target_feature = "sse2"))]
- {
- false
- }
- }
-
- /// Return the first occurrence of one of the needle bytes in the given
- /// haystack. If no such occurrence exists, then `None` is returned.
- ///
- /// The occurrence is reported as an offset into `haystack`. Its maximum
- /// value is `haystack.len() - 1`.
- #[inline]
- pub fn find(&self, haystack: &[u8]) -> Option<usize> {
- // SAFETY: `find_raw` guarantees that if a pointer is returned, it
- // falls within the bounds of the start and end pointers.
- unsafe {
- generic::search_slice_with_raw(haystack, |s, e| {
- self.find_raw(s, e)
- })
- }
- }
-
- /// Return the last occurrence of one of the needle bytes in the given
- /// haystack. If no such occurrence exists, then `None` is returned.
- ///
- /// The occurrence is reported as an offset into `haystack`. Its maximum
- /// value is `haystack.len() - 1`.
- #[inline]
- pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
- // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
- // falls within the bounds of the start and end pointers.
- unsafe {
- generic::search_slice_with_raw(haystack, |s, e| {
- self.rfind_raw(s, e)
- })
- }
- }
-
- /// Counts all occurrences of this byte in the given haystack.
- #[inline]
- pub fn count(&self, haystack: &[u8]) -> usize {
- // SAFETY: All of our pointers are derived directly from a borrowed
- // slice, which is guaranteed to be valid.
- unsafe {
- let start = haystack.as_ptr();
- let end = start.add(haystack.len());
- self.count_raw(start, end)
- }
- }
-
- /// Like `find`, but accepts and returns raw pointers.
- ///
- /// When a match is found, the pointer returned is guaranteed to be
- /// `>= start` and `< end`.
- ///
- /// This routine is useful if you're already using raw pointers and would
- /// like to avoid converting back to a slice before executing a search.
- ///
- /// # Safety
- ///
- /// * Both `start` and `end` must be valid for reads.
- /// * Both `start` and `end` must point to an initialized value.
- /// * Both `start` and `end` must point to the same allocated object and
- /// must either be in bounds or at most one byte past the end of the
- /// allocated object.
- /// * Both `start` and `end` must be _derived from_ a pointer to the same
- /// object.
- /// * The distance between `start` and `end` must not overflow `isize`.
- /// * The distance being in bounds must not rely on "wrapping around" the
- /// address space.
- ///
- /// Note that callers may pass a pair of pointers such that `start >= end`.
- /// In that case, `None` will always be returned.
- #[inline]
- pub unsafe fn find_raw(
- &self,
- start: *const u8,
- end: *const u8,
- ) -> Option<*const u8> {
- if start >= end {
- return None;
- }
- if end.distance(start) < __m128i::BYTES {
- // SAFETY: We require the caller to pass valid start/end pointers.
- return generic::fwd_byte_by_byte(start, end, |b| {
- b == self.0.needle1()
- });
- }
- // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
- // Also, we've checked that our haystack is big enough to run on the
- // vector routine. Pointer validity is caller's responsibility.
- //
- // Note that we could call `self.0.find_raw` directly here. But that
- // means we'd have to annotate this routine with `target_feature`.
- // Which is fine, because this routine is `unsafe` anyway and the
- // `target_feature` obligation is met by virtue of building a `One`.
- // The real problem is that a routine with a `target_feature`
- // annotation generally can't be inlined into caller code unless the
- // caller code has the same target feature annotations. Which is maybe
- // okay for SSE2, but we do the same thing for AVX2 where caller code
- // probably usually doesn't have AVX2 enabled. That means that this
- // routine can be inlined which will handle some of the short-haystack
- // cases above without touching the architecture specific code.
- self.find_raw_impl(start, end)
- }
-
- /// Like `rfind`, but accepts and returns raw pointers.
- ///
- /// When a match is found, the pointer returned is guaranteed to be
- /// `>= start` and `< end`.
- ///
- /// This routine is useful if you're already using raw pointers and would
- /// like to avoid converting back to a slice before executing a search.
- ///
- /// # Safety
- ///
- /// * Both `start` and `end` must be valid for reads.
- /// * Both `start` and `end` must point to an initialized value.
- /// * Both `start` and `end` must point to the same allocated object and
- /// must either be in bounds or at most one byte past the end of the
- /// allocated object.
- /// * Both `start` and `end` must be _derived from_ a pointer to the same
- /// object.
- /// * The distance between `start` and `end` must not overflow `isize`.
- /// * The distance being in bounds must not rely on "wrapping around" the
- /// address space.
- ///
- /// Note that callers may pass a pair of pointers such that `start >= end`.
- /// In that case, `None` will always be returned.
- #[inline]
- pub unsafe fn rfind_raw(
- &self,
- start: *const u8,
- end: *const u8,
- ) -> Option<*const u8> {
- if start >= end {
- return None;
- }
- if end.distance(start) < __m128i::BYTES {
- // SAFETY: We require the caller to pass valid start/end pointers.
- return generic::rev_byte_by_byte(start, end, |b| {
- b == self.0.needle1()
- });
- }
- // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
- // Also, we've checked that our haystack is big enough to run on the
- // vector routine. Pointer validity is caller's responsibility.
- //
- // See note in forward routine above for why we don't just call
- // `self.0.rfind_raw` directly here.
- self.rfind_raw_impl(start, end)
- }
-
- /// Counts all occurrences of this byte in the given haystack represented
- /// by raw pointers.
- ///
- /// This routine is useful if you're already using raw pointers and would
- /// like to avoid converting back to a slice before executing a search.
- ///
- /// # Safety
- ///
- /// * Both `start` and `end` must be valid for reads.
- /// * Both `start` and `end` must point to an initialized value.
- /// * Both `start` and `end` must point to the same allocated object and
- /// must either be in bounds or at most one byte past the end of the
- /// allocated object.
- /// * Both `start` and `end` must be _derived from_ a pointer to the same
- /// object.
- /// * The distance between `start` and `end` must not overflow `isize`.
- /// * The distance being in bounds must not rely on "wrapping around" the
- /// address space.
- ///
- /// Note that callers may pass a pair of pointers such that `start >= end`.
- /// In that case, `0` will always be returned.
- #[inline]
- pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize {
- if start >= end {
- return 0;
- }
- if end.distance(start) < __m128i::BYTES {
- // SAFETY: We require the caller to pass valid start/end pointers.
- return generic::count_byte_by_byte(start, end, |b| {
- b == self.0.needle1()
- });
- }
- // SAFETY: Building a `One` means it's safe to call 'sse2' routines.
- // Also, we've checked that our haystack is big enough to run on the
- // vector routine. Pointer validity is caller's responsibility.
- self.count_raw_impl(start, end)
- }
-
- /// Execute a search using SSE2 vectors and routines.
- ///
- /// # Safety
- ///
- /// Same as [`One::find_raw`], except the distance between `start` and
- /// `end` must be at least the size of an SSE2 vector (in bytes).
- ///
- /// (The target feature safety obligation is automatically fulfilled by
- /// virtue of being a method on `One`, which can only be constructed
- /// when it is safe to call `sse2` routines.)
- #[target_feature(enable = "sse2")]
- #[inline]
- unsafe fn find_raw_impl(
- &self,
- start: *const u8,
- end: *const u8,
- ) -> Option<*const u8> {
- self.0.find_raw(start, end)
- }
-
- /// Execute a search using SSE2 vectors and routines.
- ///
- /// # Safety
- ///
- /// Same as [`One::rfind_raw`], except the distance between `start` and
- /// `end` must be at least the size of an SSE2 vector (in bytes).
- ///
- /// (The target feature safety obligation is automatically fulfilled by
- /// virtue of being a method on `One`, which can only be constructed
- /// when it is safe to call `sse2` routines.)
- #[target_feature(enable = "sse2")]
- #[inline]
- unsafe fn rfind_raw_impl(
- &self,
- start: *const u8,
- end: *const u8,
- ) -> Option<*const u8> {
- self.0.rfind_raw(start, end)
- }
-
- /// Execute a count using SSE2 vectors and routines.
- ///
- /// # Safety
- ///
- /// Same as [`One::count_raw`], except the distance between `start` and
- /// `end` must be at least the size of an SSE2 vector (in bytes).
- ///
- /// (The target feature safety obligation is automatically fulfilled by
- /// virtue of being a method on `One`, which can only be constructed
- /// when it is safe to call `sse2` routines.)
- #[target_feature(enable = "sse2")]
- #[inline]
- unsafe fn count_raw_impl(
- &self,
- start: *const u8,
- end: *const u8,
- ) -> usize {
- self.0.count_raw(start, end)
- }
-
- /// Returns an iterator over all occurrences of the needle byte in the
- /// given haystack.
- ///
- /// The iterator returned implements `DoubleEndedIterator`. This means it
- /// can also be used to find occurrences in reverse order.
- #[inline]
- pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> {
- OneIter { searcher: self, it: generic::Iter::new(haystack) }
- }
-}
-
-/// An iterator over all occurrences of a single byte in a haystack.
-///
-/// This iterator implements `DoubleEndedIterator`, which means it can also be
-/// used to find occurrences in reverse order.
-///
-/// This iterator is created by the [`One::iter`] method.
-///
-/// The lifetime parameters are as follows:
-///
-/// * `'a` refers to the lifetime of the underlying [`One`] searcher.
-/// * `'h` refers to the lifetime of the haystack being searched.
-#[derive(Clone, Debug)]
-pub struct OneIter<'a, 'h> {
- searcher: &'a One,
- it: generic::Iter<'h>,
-}
-
-impl<'a, 'h> Iterator for OneIter<'a, 'h> {
- type Item = usize;
-
- #[inline]
- fn next(&mut self) -> Option<usize> {
- // SAFETY: We rely on the generic iterator to provide valid start
- // and end pointers, but we guarantee that any pointer returned by
- // 'find_raw' falls within the bounds of the start and end pointer.
- unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
- }
-
- #[inline]
- fn count(self) -> usize {
- self.it.count(|s, e| {
- // SAFETY: We rely on our generic iterator to return valid start
- // and end pointers.
- unsafe { self.searcher.count_raw(s, e) }
- })
- }
-
- #[inline]
- fn size_hint(&self) -> (usize, Option<usize>) {
- self.it.size_hint()
- }
-}
-
-impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> {
- #[inline]
- fn next_back(&mut self) -> Option<usize> {
- // SAFETY: We rely on the generic iterator to provide valid start
- // and end pointers, but we guarantee that any pointer returned by
- // 'rfind_raw' falls within the bounds of the start and end pointer.
- unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
- }
-}
-
-impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {}
-
-/// Finds all occurrences of two bytes in a haystack.
-///
-/// That is, this reports matches of one of two possible bytes. For example,
-/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
-/// `4` and `5`.
-#[derive(Clone, Copy, Debug)]
-pub struct Two(generic::Two<__m128i>);
-
-impl Two {
- /// Create a new searcher that finds occurrences of the needle bytes given.
- ///
- /// This particular searcher is specialized to use SSE2 vector instructions
- /// that typically make it quite fast.
- ///
- /// If SSE2 is unavailable in the current environment, then `None` is
- /// returned.
- #[inline]
- pub fn new(needle1: u8, needle2: u8) -> Option<Two> {
- if Two::is_available() {
- // SAFETY: we check that sse2 is available above.
- unsafe { Some(Two::new_unchecked(needle1, needle2)) }
- } else {
- None
- }
- }
-
- /// Create a new finder specific to SSE2 vectors and routines without
- /// checking that SSE2 is available.
- ///
- /// # Safety
- ///
- /// Callers must guarantee that it is safe to execute `sse2` instructions
- /// in the current environment.
- ///
- /// Note that it is a common misconception that if one compiles for an
- /// `x86_64` target, then they therefore automatically have access to SSE2
- /// instructions. While this is almost always the case, it isn't true in
- /// 100% of cases.
- #[target_feature(enable = "sse2")]
- #[inline]
- pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two {
- Two(generic::Two::new(needle1, needle2))
- }
-
- /// Returns true when this implementation is available in the current
- /// environment.
- ///
- /// When this is true, it is guaranteed that [`Two::new`] will return
- /// a `Some` value. Similarly, when it is false, it is guaranteed that
- /// `Two::new` will return a `None` value.
- ///
- /// Note also that for the lifetime of a single program, if this returns
- /// true then it will always return true.
- #[inline]
- pub fn is_available() -> bool {
- #[cfg(target_feature = "sse2")]
- {
- true
- }
- #[cfg(not(target_feature = "sse2"))]
- {
- false
- }
- }
-
- /// Return the first occurrence of one of the needle bytes in the given
- /// haystack. If no such occurrence exists, then `None` is returned.
- ///
- /// The occurrence is reported as an offset into `haystack`. Its maximum
- /// value is `haystack.len() - 1`.
- #[inline]
- pub fn find(&self, haystack: &[u8]) -> Option<usize> {
- // SAFETY: `find_raw` guarantees that if a pointer is returned, it
- // falls within the bounds of the start and end pointers.
- unsafe {
- generic::search_slice_with_raw(haystack, |s, e| {
- self.find_raw(s, e)
- })
- }
- }
-
- /// Return the last occurrence of one of the needle bytes in the given
- /// haystack. If no such occurrence exists, then `None` is returned.
- ///
- /// The occurrence is reported as an offset into `haystack`. Its maximum
- /// value is `haystack.len() - 1`.
- #[inline]
- pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
- // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
- // falls within the bounds of the start and end pointers.
- unsafe {
- generic::search_slice_with_raw(haystack, |s, e| {
- self.rfind_raw(s, e)
- })
- }
- }
-
- /// Like `find`, but accepts and returns raw pointers.
- ///
- /// When a match is found, the pointer returned is guaranteed to be
- /// `>= start` and `< end`.
- ///
- /// This routine is useful if you're already using raw pointers and would
- /// like to avoid converting back to a slice before executing a search.
- ///
- /// # Safety
- ///
- /// * Both `start` and `end` must be valid for reads.
- /// * Both `start` and `end` must point to an initialized value.
- /// * Both `start` and `end` must point to the same allocated object and
- /// must either be in bounds or at most one byte past the end of the
- /// allocated object.
- /// * Both `start` and `end` must be _derived from_ a pointer to the same
- /// object.
- /// * The distance between `start` and `end` must not overflow `isize`.
- /// * The distance being in bounds must not rely on "wrapping around" the
- /// address space.
- ///
- /// Note that callers may pass a pair of pointers such that `start >= end`.
- /// In that case, `None` will always be returned.
- #[inline]
- pub unsafe fn find_raw(
- &self,
- start: *const u8,
- end: *const u8,
- ) -> Option<*const u8> {
- if start >= end {
- return None;
- }
- if end.distance(start) < __m128i::BYTES {
- // SAFETY: We require the caller to pass valid start/end pointers.
- return generic::fwd_byte_by_byte(start, end, |b| {
- b == self.0.needle1() || b == self.0.needle2()
- });
- }
- // SAFETY: Building a `Two` means it's safe to call 'sse2' routines.
- // Also, we've checked that our haystack is big enough to run on the
- // vector routine. Pointer validity is caller's responsibility.
- //
- // Note that we could call `self.0.find_raw` directly here. But that
- // means we'd have to annotate this routine with `target_feature`.
- // Which is fine, because this routine is `unsafe` anyway and the
- // `target_feature` obligation is met by virtue of building a `Two`.
- // The real problem is that a routine with a `target_feature`
- // annotation generally can't be inlined into caller code unless the
- // caller code has the same target feature annotations. Which is maybe
- // okay for SSE2, but we do the same thing for AVX2 where caller code
- // probably usually doesn't have AVX2 enabled. That means that this
- // routine can be inlined which will handle some of the short-haystack
- // cases above without touching the architecture specific code.
- self.find_raw_impl(start, end)
- }
-
- /// Like `rfind`, but accepts and returns raw pointers.
- ///
- /// When a match is found, the pointer returned is guaranteed to be
- /// `>= start` and `< end`.
- ///
- /// This routine is useful if you're already using raw pointers and would
- /// like to avoid converting back to a slice before executing a search.
- ///
- /// # Safety
- ///
- /// * Both `start` and `end` must be valid for reads.
- /// * Both `start` and `end` must point to an initialized value.
- /// * Both `start` and `end` must point to the same allocated object and
- /// must either be in bounds or at most one byte past the end of the
- /// allocated object.
- /// * Both `start` and `end` must be _derived from_ a pointer to the same
- /// object.
- /// * The distance between `start` and `end` must not overflow `isize`.
- /// * The distance being in bounds must not rely on "wrapping around" the
- /// address space.
- ///
- /// Note that callers may pass a pair of pointers such that `start >= end`.
- /// In that case, `None` will always be returned.
- #[inline]
- pub unsafe fn rfind_raw(
- &self,
- start: *const u8,
- end: *const u8,
- ) -> Option<*const u8> {
- if start >= end {
- return None;
- }
- if end.distance(start) < __m128i::BYTES {
- // SAFETY: We require the caller to pass valid start/end pointers.
- return generic::rev_byte_by_byte(start, end, |b| {
- b == self.0.needle1() || b == self.0.needle2()
- });
- }
- // SAFETY: Building a `Two` means it's safe to call 'sse2' routines.
- // Also, we've checked that our haystack is big enough to run on the
- // vector routine. Pointer validity is caller's responsibility.
- //
- // See note in forward routine above for why we don't just call
- // `self.0.rfind_raw` directly here.
- self.rfind_raw_impl(start, end)
- }
-
- /// Execute a search using SSE2 vectors and routines.
- ///
- /// # Safety
- ///
- /// Same as [`Two::find_raw`], except the distance between `start` and
- /// `end` must be at least the size of an SSE2 vector (in bytes).
- ///
- /// (The target feature safety obligation is automatically fulfilled by
- /// virtue of being a method on `Two`, which can only be constructed
- /// when it is safe to call `sse2` routines.)
- #[target_feature(enable = "sse2")]
- #[inline]
- unsafe fn find_raw_impl(
- &self,
- start: *const u8,
- end: *const u8,
- ) -> Option<*const u8> {
- self.0.find_raw(start, end)
- }
-
- /// Execute a search using SSE2 vectors and routines.
- ///
- /// # Safety
- ///
- /// Same as [`Two::rfind_raw`], except the distance between `start` and
- /// `end` must be at least the size of an SSE2 vector (in bytes).
- ///
- /// (The target feature safety obligation is automatically fulfilled by
- /// virtue of being a method on `Two`, which can only be constructed
- /// when it is safe to call `sse2` routines.)
- #[target_feature(enable = "sse2")]
- #[inline]
- unsafe fn rfind_raw_impl(
- &self,
- start: *const u8,
- end: *const u8,
- ) -> Option<*const u8> {
- self.0.rfind_raw(start, end)
- }
-
- /// Returns an iterator over all occurrences of the needle bytes in the
- /// given haystack.
- ///
- /// The iterator returned implements `DoubleEndedIterator`. This means it
- /// can also be used to find occurrences in reverse order.
- #[inline]
- pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> {
- TwoIter { searcher: self, it: generic::Iter::new(haystack) }
- }
-}
-
-/// An iterator over all occurrences of two possible bytes in a haystack.
-///
-/// This iterator implements `DoubleEndedIterator`, which means it can also be
-/// used to find occurrences in reverse order.
-///
-/// This iterator is created by the [`Two::iter`] method.
-///
-/// The lifetime parameters are as follows:
-///
-/// * `'a` refers to the lifetime of the underlying [`Two`] searcher.
-/// * `'h` refers to the lifetime of the haystack being searched.
-#[derive(Clone, Debug)]
-pub struct TwoIter<'a, 'h> {
- searcher: &'a Two,
- it: generic::Iter<'h>,
-}
-
-impl<'a, 'h> Iterator for TwoIter<'a, 'h> {
- type Item = usize;
-
- #[inline]
- fn next(&mut self) -> Option<usize> {
- // SAFETY: We rely on the generic iterator to provide valid start
- // and end pointers, but we guarantee that any pointer returned by
- // 'find_raw' falls within the bounds of the start and end pointer.
- unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
- }
-
- #[inline]
- fn size_hint(&self) -> (usize, Option<usize>) {
- self.it.size_hint()
- }
-}
-
-impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> {
- #[inline]
- fn next_back(&mut self) -> Option<usize> {
- // SAFETY: We rely on the generic iterator to provide valid start
- // and end pointers, but we guarantee that any pointer returned by
- // 'rfind_raw' falls within the bounds of the start and end pointer.
- unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
- }
-}
-
-impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {}
-
-/// Finds all occurrences of three bytes in a haystack.
-///
-/// That is, this reports matches of one of three possible bytes. For example,
-/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets
-/// `0`, `2`, `3`, `4` and `5`.
-#[derive(Clone, Copy, Debug)]
-pub struct Three(generic::Three<__m128i>);
-
-impl Three {
- /// Create a new searcher that finds occurrences of the needle bytes given.
- ///
- /// This particular searcher is specialized to use SSE2 vector instructions
- /// that typically make it quite fast.
- ///
- /// If SSE2 is unavailable in the current environment, then `None` is
- /// returned.
- #[inline]
- pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option<Three> {
- if Three::is_available() {
- // SAFETY: we check that sse2 is available above.
- unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) }
- } else {
- None
- }
- }
-
- /// Create a new finder specific to SSE2 vectors and routines without
- /// checking that SSE2 is available.
- ///
- /// # Safety
- ///
- /// Callers must guarantee that it is safe to execute `sse2` instructions
- /// in the current environment.
- ///
- /// Note that it is a common misconception that if one compiles for an
- /// `x86_64` target, then they therefore automatically have access to SSE2
- /// instructions. While this is almost always the case, it isn't true in
- /// 100% of cases.
- #[target_feature(enable = "sse2")]
- #[inline]
- pub unsafe fn new_unchecked(
- needle1: u8,
- needle2: u8,
- needle3: u8,
- ) -> Three {
- Three(generic::Three::new(needle1, needle2, needle3))
- }
-
- /// Returns true when this implementation is available in the current
- /// environment.
- ///
- /// When this is true, it is guaranteed that [`Three::new`] will return
- /// a `Some` value. Similarly, when it is false, it is guaranteed that
- /// `Three::new` will return a `None` value.
- ///
- /// Note also that for the lifetime of a single program, if this returns
- /// true then it will always return true.
- #[inline]
- pub fn is_available() -> bool {
- #[cfg(target_feature = "sse2")]
- {
- true
- }
- #[cfg(not(target_feature = "sse2"))]
- {
- false
- }
- }
-
- /// Return the first occurrence of one of the needle bytes in the given
- /// haystack. If no such occurrence exists, then `None` is returned.
- ///
- /// The occurrence is reported as an offset into `haystack`. Its maximum
- /// value is `haystack.len() - 1`.
- #[inline]
- pub fn find(&self, haystack: &[u8]) -> Option<usize> {
- // SAFETY: `find_raw` guarantees that if a pointer is returned, it
- // falls within the bounds of the start and end pointers.
- unsafe {
- generic::search_slice_with_raw(haystack, |s, e| {
- self.find_raw(s, e)
- })
- }
- }
-
- /// Return the last occurrence of one of the needle bytes in the given
- /// haystack. If no such occurrence exists, then `None` is returned.
- ///
- /// The occurrence is reported as an offset into `haystack`. Its maximum
- /// value is `haystack.len() - 1`.
- #[inline]
- pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
- // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it
- // falls within the bounds of the start and end pointers.
- unsafe {
- generic::search_slice_with_raw(haystack, |s, e| {
- self.rfind_raw(s, e)
- })
- }
- }
-
- /// Like `find`, but accepts and returns raw pointers.
- ///
- /// When a match is found, the pointer returned is guaranteed to be
- /// `>= start` and `< end`.
- ///
- /// This routine is useful if you're already using raw pointers and would
- /// like to avoid converting back to a slice before executing a search.
- ///
- /// # Safety
- ///
- /// * Both `start` and `end` must be valid for reads.
- /// * Both `start` and `end` must point to an initialized value.
- /// * Both `start` and `end` must point to the same allocated object and
- /// must either be in bounds or at most one byte past the end of the
- /// allocated object.
- /// * Both `start` and `end` must be _derived from_ a pointer to the same
- /// object.
- /// * The distance between `start` and `end` must not overflow `isize`.
- /// * The distance being in bounds must not rely on "wrapping around" the
- /// address space.
- ///
- /// Note that callers may pass a pair of pointers such that `start >= end`.
- /// In that case, `None` will always be returned.
- #[inline]
- pub unsafe fn find_raw(
- &self,
- start: *const u8,
- end: *const u8,
- ) -> Option<*const u8> {
- if start >= end {
- return None;
- }
- if end.distance(start) < __m128i::BYTES {
- // SAFETY: We require the caller to pass valid start/end pointers.
- return generic::fwd_byte_by_byte(start, end, |b| {
- b == self.0.needle1()
- || b == self.0.needle2()
- || b == self.0.needle3()
- });
- }
- // SAFETY: Building a `Three` means it's safe to call 'sse2' routines.
- // Also, we've checked that our haystack is big enough to run on the
- // vector routine. Pointer validity is caller's responsibility.
- //
- // Note that we could call `self.0.find_raw` directly here. But that
- // means we'd have to annotate this routine with `target_feature`.
- // Which is fine, because this routine is `unsafe` anyway and the
- // `target_feature` obligation is met by virtue of building a `Three`.
- // The real problem is that a routine with a `target_feature`
- // annotation generally can't be inlined into caller code unless the
- // caller code has the same target feature annotations. Which is maybe
- // okay for SSE2, but we do the same thing for AVX2 where caller code
- // probably usually doesn't have AVX2 enabled. That means that this
- // routine can be inlined which will handle some of the short-haystack
- // cases above without touching the architecture specific code.
- self.find_raw_impl(start, end)
- }
-
- /// Like `rfind`, but accepts and returns raw pointers.
- ///
- /// When a match is found, the pointer returned is guaranteed to be
- /// `>= start` and `< end`.
- ///
- /// This routine is useful if you're already using raw pointers and would
- /// like to avoid converting back to a slice before executing a search.
- ///
- /// # Safety
- ///
- /// * Both `start` and `end` must be valid for reads.
- /// * Both `start` and `end` must point to an initialized value.
- /// * Both `start` and `end` must point to the same allocated object and
- /// must either be in bounds or at most one byte past the end of the
- /// allocated object.
- /// * Both `start` and `end` must be _derived from_ a pointer to the same
- /// object.
- /// * The distance between `start` and `end` must not overflow `isize`.
- /// * The distance being in bounds must not rely on "wrapping around" the
- /// address space.
- ///
- /// Note that callers may pass a pair of pointers such that `start >= end`.
- /// In that case, `None` will always be returned.
- #[inline]
- pub unsafe fn rfind_raw(
- &self,
- start: *const u8,
- end: *const u8,
- ) -> Option<*const u8> {
- if start >= end {
- return None;
- }
- if end.distance(start) < __m128i::BYTES {
- // SAFETY: We require the caller to pass valid start/end pointers.
- return generic::rev_byte_by_byte(start, end, |b| {
- b == self.0.needle1()
- || b == self.0.needle2()
- || b == self.0.needle3()
- });
- }
- // SAFETY: Building a `Three` means it's safe to call 'sse2' routines.
- // Also, we've checked that our haystack is big enough to run on the
- // vector routine. Pointer validity is caller's responsibility.
- //
- // See note in forward routine above for why we don't just call
- // `self.0.rfind_raw` directly here.
- self.rfind_raw_impl(start, end)
- }
-
- /// Execute a search using SSE2 vectors and routines.
- ///
- /// # Safety
- ///
- /// Same as [`Three::find_raw`], except the distance between `start` and
- /// `end` must be at least the size of an SSE2 vector (in bytes).
- ///
- /// (The target feature safety obligation is automatically fulfilled by
- /// virtue of being a method on `Three`, which can only be constructed
- /// when it is safe to call `sse2` routines.)
- #[target_feature(enable = "sse2")]
- #[inline]
- unsafe fn find_raw_impl(
- &self,
- start: *const u8,
- end: *const u8,
- ) -> Option<*const u8> {
- self.0.find_raw(start, end)
- }
-
- /// Execute a search using SSE2 vectors and routines.
- ///
- /// # Safety
- ///
- /// Same as [`Three::rfind_raw`], except the distance between `start` and
- /// `end` must be at least the size of an SSE2 vector (in bytes).
- ///
- /// (The target feature safety obligation is automatically fulfilled by
- /// virtue of being a method on `Three`, which can only be constructed
- /// when it is safe to call `sse2` routines.)
- #[target_feature(enable = "sse2")]
- #[inline]
- unsafe fn rfind_raw_impl(
- &self,
- start: *const u8,
- end: *const u8,
- ) -> Option<*const u8> {
- self.0.rfind_raw(start, end)
- }
-
- /// Returns an iterator over all occurrences of the needle byte in the
- /// given haystack.
- ///
- /// The iterator returned implements `DoubleEndedIterator`. This means it
- /// can also be used to find occurrences in reverse order.
- #[inline]
- pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> {
- ThreeIter { searcher: self, it: generic::Iter::new(haystack) }
- }
-}
-
-/// An iterator over all occurrences of three possible bytes in a haystack.
-///
-/// This iterator implements `DoubleEndedIterator`, which means it can also be
-/// used to find occurrences in reverse order.
-///
-/// This iterator is created by the [`Three::iter`] method.
-///
-/// The lifetime parameters are as follows:
-///
-/// * `'a` refers to the lifetime of the underlying [`Three`] searcher.
-/// * `'h` refers to the lifetime of the haystack being searched.
-#[derive(Clone, Debug)]
-pub struct ThreeIter<'a, 'h> {
- searcher: &'a Three,
- it: generic::Iter<'h>,
-}
-
-impl<'a, 'h> Iterator for ThreeIter<'a, 'h> {
- type Item = usize;
-
- #[inline]
- fn next(&mut self) -> Option<usize> {
- // SAFETY: We rely on the generic iterator to provide valid start
- // and end pointers, but we guarantee that any pointer returned by
- // 'find_raw' falls within the bounds of the start and end pointer.
- unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
- }
-
- #[inline]
- fn size_hint(&self) -> (usize, Option<usize>) {
- self.it.size_hint()
- }
-}
-
-impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> {
- #[inline]
- fn next_back(&mut self) -> Option<usize> {
- // SAFETY: We rely on the generic iterator to provide valid start
- // and end pointers, but we guarantee that any pointer returned by
- // 'rfind_raw' falls within the bounds of the start and end pointer.
- unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
- }
-}
-
-impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- define_memchr_quickcheck!(super);
-
- #[test]
- fn forward_one() {
- crate::tests::memchr::Runner::new(1).forward_iter(
- |haystack, needles| {
- Some(One::new(needles[0])?.iter(haystack).collect())
- },
- )
- }
-
- #[test]
- fn reverse_one() {
- crate::tests::memchr::Runner::new(1).reverse_iter(
- |haystack, needles| {
- Some(One::new(needles[0])?.iter(haystack).rev().collect())
- },
- )
- }
-
- #[test]
- fn count_one() {
- crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| {
- Some(One::new(needles[0])?.iter(haystack).count())
- })
- }
-
- #[test]
- fn forward_two() {
- crate::tests::memchr::Runner::new(2).forward_iter(
- |haystack, needles| {
- let n1 = needles.get(0).copied()?;
- let n2 = needles.get(1).copied()?;
- Some(Two::new(n1, n2)?.iter(haystack).collect())
- },
- )
- }
-
- #[test]
- fn reverse_two() {
- crate::tests::memchr::Runner::new(2).reverse_iter(
- |haystack, needles| {
- let n1 = needles.get(0).copied()?;
- let n2 = needles.get(1).copied()?;
- Some(Two::new(n1, n2)?.iter(haystack).rev().collect())
- },
- )
- }
-
- #[test]
- fn forward_three() {
- crate::tests::memchr::Runner::new(3).forward_iter(
- |haystack, needles| {
- let n1 = needles.get(0).copied()?;
- let n2 = needles.get(1).copied()?;
- let n3 = needles.get(2).copied()?;
- Some(Three::new(n1, n2, n3)?.iter(haystack).collect())
- },
- )
- }
-
- #[test]
- fn reverse_three() {
- crate::tests::memchr::Runner::new(3).reverse_iter(
- |haystack, needles| {
- let n1 = needles.get(0).copied()?;
- let n2 = needles.get(1).copied()?;
- let n3 = needles.get(2).copied()?;
- Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect())
- },
- )
- }
-}
diff --git a/vendor/memchr/src/arch/x86_64/sse2/mod.rs b/vendor/memchr/src/arch/x86_64/sse2/mod.rs
deleted file mode 100644
index bcb8307..0000000
--- a/vendor/memchr/src/arch/x86_64/sse2/mod.rs
+++ /dev/null
@@ -1,6 +0,0 @@
-/*!
-Algorithms for the `x86_64` target using 128-bit vectors via SSE2.
-*/
-
-pub mod memchr;
-pub mod packedpair;
diff --git a/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs b/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs
deleted file mode 100644
index c8b5b99..0000000
--- a/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs
+++ /dev/null
@@ -1,232 +0,0 @@
-/*!
-A 128-bit vector implementation of the "packed pair" SIMD algorithm.
-
-The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
-difference is that it (by default) uses a background distribution of byte
-frequencies to heuristically select the pair of bytes to search for.
-
-[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
-*/
-
-use core::arch::x86_64::__m128i;
-
-use crate::arch::{all::packedpair::Pair, generic::packedpair};
-
-/// A "packed pair" finder that uses 128-bit vector operations.
-///
-/// This finder picks two bytes that it believes have high predictive power
-/// for indicating an overall match of a needle. Depending on whether
-/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets
-/// where the needle matches or could match. In the prefilter case, candidates
-/// are reported whenever the [`Pair`] of bytes given matches.
-#[derive(Clone, Copy, Debug)]
-pub struct Finder(packedpair::Finder<__m128i>);
-
-impl Finder {
- /// Create a new pair searcher. The searcher returned can either report
- /// exact matches of `needle` or act as a prefilter and report candidate
- /// positions of `needle`.
- ///
- /// If SSE2 is unavailable in the current environment or if a [`Pair`]
- /// could not be constructed from the needle given, then `None` is
- /// returned.
- #[inline]
- pub fn new(needle: &[u8]) -> Option<Finder> {
- Finder::with_pair(needle, Pair::new(needle)?)
- }
-
- /// Create a new "packed pair" finder using the pair of bytes given.
- ///
- /// This constructor permits callers to control precisely which pair of
- /// bytes is used as a predicate.
- ///
- /// If SSE2 is unavailable in the current environment, then `None` is
- /// returned.
- #[inline]
- pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> {
- if Finder::is_available() {
- // SAFETY: we check that sse2 is available above. We are also
- // guaranteed to have needle.len() > 1 because we have a valid
- // Pair.
- unsafe { Some(Finder::with_pair_impl(needle, pair)) }
- } else {
- None
- }
- }
-
- /// Create a new `Finder` specific to SSE2 vectors and routines.
- ///
- /// # Safety
- ///
- /// Same as the safety for `packedpair::Finder::new`, and callers must also
- /// ensure that SSE2 is available.
- #[target_feature(enable = "sse2")]
- #[inline]
- unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder {
- let finder = packedpair::Finder::<__m128i>::new(needle, pair);
- Finder(finder)
- }
-
- /// Returns true when this implementation is available in the current
- /// environment.
- ///
- /// When this is true, it is guaranteed that [`Finder::with_pair`] will
- /// return a `Some` value. Similarly, when it is false, it is guaranteed
- /// that `Finder::with_pair` will return a `None` value. Notice that this
- /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely,
- /// even when `Finder::is_available` is true, it is not guaranteed that a
- /// valid [`Pair`] can be found from the needle given.
- ///
- /// Note also that for the lifetime of a single program, if this returns
- /// true then it will always return true.
- #[inline]
- pub fn is_available() -> bool {
- #[cfg(not(target_feature = "sse2"))]
- {
- false
- }
- #[cfg(target_feature = "sse2")]
- {
- true
- }
- }
-
- /// Execute a search using SSE2 vectors and routines.
- ///
- /// # Panics
- ///
- /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
- #[inline]
- pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
- // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
- unsafe { self.find_impl(haystack, needle) }
- }
-
- /// Run this finder on the given haystack as a prefilter.
- ///
- /// If a candidate match is found, then an offset where the needle *could*
- /// begin in the haystack is returned.
- ///
- /// # Panics
- ///
- /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
- #[inline]
- pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> {
- // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
- unsafe { self.find_prefilter_impl(haystack) }
- }
-
- /// Execute a search using SSE2 vectors and routines.
- ///
- /// # Panics
- ///
- /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
- ///
- /// # Safety
- ///
- /// (The target feature safety obligation is automatically fulfilled by
- /// virtue of being a method on `Finder`, which can only be constructed
- /// when it is safe to call `sse2` routines.)
- #[target_feature(enable = "sse2")]
- #[inline]
- unsafe fn find_impl(
- &self,
- haystack: &[u8],
- needle: &[u8],
- ) -> Option<usize> {
- self.0.find(haystack, needle)
- }
-
- /// Execute a prefilter search using SSE2 vectors and routines.
- ///
- /// # Panics
- ///
- /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
- ///
- /// # Safety
- ///
- /// (The target feature safety obligation is automatically fulfilled by
- /// virtue of being a method on `Finder`, which can only be constructed
- /// when it is safe to call `sse2` routines.)
- #[target_feature(enable = "sse2")]
- #[inline]
- unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> {
- self.0.find_prefilter(haystack)
- }
-
- /// Returns the pair of offsets (into the needle) used to check as a
- /// predicate before confirming whether a needle exists at a particular
- /// position.
- #[inline]
- pub fn pair(&self) -> &Pair {
- self.0.pair()
- }
-
- /// Returns the minimum haystack length that this `Finder` can search.
- ///
- /// Using a haystack with length smaller than this in a search will result
- /// in a panic. The reason for this restriction is that this finder is
- /// meant to be a low-level component that is part of a larger substring
- /// strategy. In that sense, it avoids trying to handle all cases and
- /// instead only handles the cases that it can handle very well.
- #[inline]
- pub fn min_haystack_len(&self) -> usize {
- self.0.min_haystack_len()
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> {
- let f = Finder::new(needle)?;
- if haystack.len() < f.min_haystack_len() {
- return None;
- }
- Some(f.find(haystack, needle))
- }
-
- define_substring_forward_quickcheck!(find);
-
- #[test]
- fn forward_substring() {
- crate::tests::substring::Runner::new().fwd(find).run()
- }
-
- #[test]
- fn forward_packedpair() {
- fn find(
- haystack: &[u8],
- needle: &[u8],
- index1: u8,
- index2: u8,
- ) -> Option<Option<usize>> {
- let pair = Pair::with_indices(needle, index1, index2)?;
- let f = Finder::with_pair(needle, pair)?;
- if haystack.len() < f.min_haystack_len() {
- return None;
- }
- Some(f.find(haystack, needle))
- }
- crate::tests::packedpair::Runner::new().fwd(find).run()
- }
-
- #[test]
- fn forward_packedpair_prefilter() {
- fn find(
- haystack: &[u8],
- needle: &[u8],
- index1: u8,
- index2: u8,
- ) -> Option<Option<usize>> {
- let pair = Pair::with_indices(needle, index1, index2)?;
- let f = Finder::with_pair(needle, pair)?;
- if haystack.len() < f.min_haystack_len() {
- return None;
- }
- Some(f.find_prefilter(haystack))
- }
- crate::tests::packedpair::Runner::new().fwd(find).run()
- }
-}