diff options
Diffstat (limited to 'vendor/memchr/src/memmem')
-rw-r--r-- | vendor/memchr/src/memmem/mod.rs | 737 | ||||
-rw-r--r-- | vendor/memchr/src/memmem/searcher.rs | 1030 |
2 files changed, 1767 insertions, 0 deletions
diff --git a/vendor/memchr/src/memmem/mod.rs b/vendor/memchr/src/memmem/mod.rs new file mode 100644 index 0000000..4f04943 --- /dev/null +++ b/vendor/memchr/src/memmem/mod.rs @@ -0,0 +1,737 @@ +/*! +This module provides forward and reverse substring search routines. + +Unlike the standard library's substring search routines, these work on +arbitrary bytes. For all non-empty needles, these routines will report exactly +the same values as the corresponding routines in the standard library. For +the empty needle, the standard library reports matches only at valid UTF-8 +boundaries, where as these routines will report matches at every position. + +Other than being able to work on arbitrary bytes, the primary reason to prefer +these routines over the standard library routines is that these will generally +be faster. In some cases, significantly so. + +# Example: iterating over substring matches + +This example shows how to use [`find_iter`] to find occurrences of a substring +in a haystack. + +``` +use memchr::memmem; + +let haystack = b"foo bar foo baz foo"; + +let mut it = memmem::find_iter(haystack, "foo"); +assert_eq!(Some(0), it.next()); +assert_eq!(Some(8), it.next()); +assert_eq!(Some(16), it.next()); +assert_eq!(None, it.next()); +``` + +# Example: iterating over substring matches in reverse + +This example shows how to use [`rfind_iter`] to find occurrences of a substring +in a haystack starting from the end of the haystack. + +**NOTE:** This module does not implement double ended iterators, so reverse +searches aren't done by calling `rev` on a forward iterator. + +``` +use memchr::memmem; + +let haystack = b"foo bar foo baz foo"; + +let mut it = memmem::rfind_iter(haystack, "foo"); +assert_eq!(Some(16), it.next()); +assert_eq!(Some(8), it.next()); +assert_eq!(Some(0), it.next()); +assert_eq!(None, it.next()); +``` + +# Example: repeating a search for the same needle + +It may be possible for the overhead of constructing a substring searcher to be +measurable in some workloads. In cases where the same needle is used to search +many haystacks, it is possible to do construction once and thus to avoid it for +subsequent searches. This can be done with a [`Finder`] (or a [`FinderRev`] for +reverse searches). + +``` +use memchr::memmem; + +let finder = memmem::Finder::new("foo"); + +assert_eq!(Some(4), finder.find(b"baz foo quux")); +assert_eq!(None, finder.find(b"quux baz bar")); +``` +*/ + +pub use crate::memmem::searcher::PrefilterConfig as Prefilter; + +// This is exported here for use in the crate::arch::all::twoway +// implementation. This is essentially an abstraction breaker. Namely, the +// public API of twoway doesn't support providing a prefilter, but its crate +// internal API does. The main reason for this is that I didn't want to do the +// API design required to support it without a concrete use case. +pub(crate) use crate::memmem::searcher::Pre; + +use crate::{ + arch::all::{ + packedpair::{DefaultFrequencyRank, HeuristicFrequencyRank}, + rabinkarp, + }, + cow::CowBytes, + memmem::searcher::{PrefilterState, Searcher, SearcherRev}, +}; + +mod searcher; + +/// Returns an iterator over all non-overlapping occurrences of a substring in +/// a haystack. +/// +/// # Complexity +/// +/// This routine is guaranteed to have worst case linear time complexity +/// with respect to both the needle and the haystack. That is, this runs +/// in `O(needle.len() + haystack.len())` time. +/// +/// This routine is also guaranteed to have worst case constant space +/// complexity. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use memchr::memmem; +/// +/// let haystack = b"foo bar foo baz foo"; +/// let mut it = memmem::find_iter(haystack, b"foo"); +/// assert_eq!(Some(0), it.next()); +/// assert_eq!(Some(8), it.next()); +/// assert_eq!(Some(16), it.next()); +/// assert_eq!(None, it.next()); +/// ``` +#[inline] +pub fn find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( + haystack: &'h [u8], + needle: &'n N, +) -> FindIter<'h, 'n> { + FindIter::new(haystack, Finder::new(needle)) +} + +/// Returns a reverse iterator over all non-overlapping occurrences of a +/// substring in a haystack. +/// +/// # Complexity +/// +/// This routine is guaranteed to have worst case linear time complexity +/// with respect to both the needle and the haystack. That is, this runs +/// in `O(needle.len() + haystack.len())` time. +/// +/// This routine is also guaranteed to have worst case constant space +/// complexity. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use memchr::memmem; +/// +/// let haystack = b"foo bar foo baz foo"; +/// let mut it = memmem::rfind_iter(haystack, b"foo"); +/// assert_eq!(Some(16), it.next()); +/// assert_eq!(Some(8), it.next()); +/// assert_eq!(Some(0), it.next()); +/// assert_eq!(None, it.next()); +/// ``` +#[inline] +pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( + haystack: &'h [u8], + needle: &'n N, +) -> FindRevIter<'h, 'n> { + FindRevIter::new(haystack, FinderRev::new(needle)) +} + +/// Returns the index of the first occurrence of the given needle. +/// +/// Note that if you're are searching for the same needle in many different +/// small haystacks, it may be faster to initialize a [`Finder`] once, +/// and reuse it for each search. +/// +/// # Complexity +/// +/// This routine is guaranteed to have worst case linear time complexity +/// with respect to both the needle and the haystack. That is, this runs +/// in `O(needle.len() + haystack.len())` time. +/// +/// This routine is also guaranteed to have worst case constant space +/// complexity. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use memchr::memmem; +/// +/// let haystack = b"foo bar baz"; +/// assert_eq!(Some(0), memmem::find(haystack, b"foo")); +/// assert_eq!(Some(4), memmem::find(haystack, b"bar")); +/// assert_eq!(None, memmem::find(haystack, b"quux")); +/// ``` +#[inline] +pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> { + if haystack.len() < 64 { + rabinkarp::Finder::new(needle).find(haystack, needle) + } else { + Finder::new(needle).find(haystack) + } +} + +/// Returns the index of the last occurrence of the given needle. +/// +/// Note that if you're are searching for the same needle in many different +/// small haystacks, it may be faster to initialize a [`FinderRev`] once, +/// and reuse it for each search. +/// +/// # Complexity +/// +/// This routine is guaranteed to have worst case linear time complexity +/// with respect to both the needle and the haystack. That is, this runs +/// in `O(needle.len() + haystack.len())` time. +/// +/// This routine is also guaranteed to have worst case constant space +/// complexity. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use memchr::memmem; +/// +/// let haystack = b"foo bar baz"; +/// assert_eq!(Some(0), memmem::rfind(haystack, b"foo")); +/// assert_eq!(Some(4), memmem::rfind(haystack, b"bar")); +/// assert_eq!(Some(8), memmem::rfind(haystack, b"ba")); +/// assert_eq!(None, memmem::rfind(haystack, b"quux")); +/// ``` +#[inline] +pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> { + if haystack.len() < 64 { + rabinkarp::FinderRev::new(needle).rfind(haystack, needle) + } else { + FinderRev::new(needle).rfind(haystack) + } +} + +/// An iterator over non-overlapping substring matches. +/// +/// Matches are reported by the byte offset at which they begin. +/// +/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the +/// needle. +#[derive(Debug, Clone)] +pub struct FindIter<'h, 'n> { + haystack: &'h [u8], + prestate: PrefilterState, + finder: Finder<'n>, + pos: usize, +} + +impl<'h, 'n> FindIter<'h, 'n> { + #[inline(always)] + pub(crate) fn new( + haystack: &'h [u8], + finder: Finder<'n>, + ) -> FindIter<'h, 'n> { + let prestate = PrefilterState::new(); + FindIter { haystack, prestate, finder, pos: 0 } + } + + /// Convert this iterator into its owned variant, such that it no longer + /// borrows the finder and needle. + /// + /// If this is already an owned iterator, then this is a no-op. Otherwise, + /// this copies the needle. + /// + /// This is only available when the `alloc` feature is enabled. + #[cfg(feature = "alloc")] + #[inline] + pub fn into_owned(self) -> FindIter<'h, 'static> { + FindIter { + haystack: self.haystack, + prestate: self.prestate, + finder: self.finder.into_owned(), + pos: self.pos, + } + } +} + +impl<'h, 'n> Iterator for FindIter<'h, 'n> { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + let needle = self.finder.needle(); + let haystack = self.haystack.get(self.pos..)?; + let idx = + self.finder.searcher.find(&mut self.prestate, haystack, needle)?; + + let pos = self.pos + idx; + self.pos = pos + needle.len().max(1); + + Some(pos) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + // The largest possible number of non-overlapping matches is the + // quotient of the haystack and the needle (or the length of the + // haystack, if the needle is empty) + match self.haystack.len().checked_sub(self.pos) { + None => (0, Some(0)), + Some(haystack_len) => match self.finder.needle().len() { + // Empty needles always succeed and match at every point + // (including the very end) + 0 => ( + haystack_len.saturating_add(1), + haystack_len.checked_add(1), + ), + needle_len => (0, Some(haystack_len / needle_len)), + }, + } + } +} + +/// An iterator over non-overlapping substring matches in reverse. +/// +/// Matches are reported by the byte offset at which they begin. +/// +/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the +/// needle. +#[derive(Clone, Debug)] +pub struct FindRevIter<'h, 'n> { + haystack: &'h [u8], + finder: FinderRev<'n>, + /// When searching with an empty needle, this gets set to `None` after + /// we've yielded the last element at `0`. + pos: Option<usize>, +} + +impl<'h, 'n> FindRevIter<'h, 'n> { + #[inline(always)] + pub(crate) fn new( + haystack: &'h [u8], + finder: FinderRev<'n>, + ) -> FindRevIter<'h, 'n> { + let pos = Some(haystack.len()); + FindRevIter { haystack, finder, pos } + } + + /// Convert this iterator into its owned variant, such that it no longer + /// borrows the finder and needle. + /// + /// If this is already an owned iterator, then this is a no-op. Otherwise, + /// this copies the needle. + /// + /// This is only available when the `std` feature is enabled. + #[cfg(feature = "alloc")] + #[inline] + pub fn into_owned(self) -> FindRevIter<'h, 'static> { + FindRevIter { + haystack: self.haystack, + finder: self.finder.into_owned(), + pos: self.pos, + } + } +} + +impl<'h, 'n> Iterator for FindRevIter<'h, 'n> { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + let pos = match self.pos { + None => return None, + Some(pos) => pos, + }; + let result = self.finder.rfind(&self.haystack[..pos]); + match result { + None => None, + Some(i) => { + if pos == i { + self.pos = pos.checked_sub(1); + } else { + self.pos = Some(i); + } + Some(i) + } + } + } +} + +/// A single substring searcher fixed to a particular needle. +/// +/// The purpose of this type is to permit callers to construct a substring +/// searcher that can be used to search haystacks without the overhead of +/// constructing the searcher in the first place. This is a somewhat niche +/// concern when it's necessary to re-use the same needle to search multiple +/// different haystacks with as little overhead as possible. In general, using +/// [`find`] is good enough, but `Finder` is useful when you can meaningfully +/// observe searcher construction time in a profile. +/// +/// When the `std` feature is enabled, then this type has an `into_owned` +/// version which permits building a `Finder` that is not connected to +/// the lifetime of its needle. +#[derive(Clone, Debug)] +pub struct Finder<'n> { + needle: CowBytes<'n>, + searcher: Searcher, +} + +impl<'n> Finder<'n> { + /// Create a new finder for the given needle. + #[inline] + pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n> { + FinderBuilder::new().build_forward(needle) + } + + /// Returns the index of the first occurrence of this needle in the given + /// haystack. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use memchr::memmem::Finder; + /// + /// let haystack = b"foo bar baz"; + /// assert_eq!(Some(0), Finder::new("foo").find(haystack)); + /// assert_eq!(Some(4), Finder::new("bar").find(haystack)); + /// assert_eq!(None, Finder::new("quux").find(haystack)); + /// ``` + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option<usize> { + let mut prestate = PrefilterState::new(); + let needle = self.needle.as_slice(); + self.searcher.find(&mut prestate, haystack, needle) + } + + /// Returns an iterator over all occurrences of a substring in a haystack. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use memchr::memmem::Finder; + /// + /// let haystack = b"foo bar foo baz foo"; + /// let finder = Finder::new(b"foo"); + /// let mut it = finder.find_iter(haystack); + /// assert_eq!(Some(0), it.next()); + /// assert_eq!(Some(8), it.next()); + /// assert_eq!(Some(16), it.next()); + /// assert_eq!(None, it.next()); + /// ``` + #[inline] + pub fn find_iter<'a, 'h>( + &'a self, + haystack: &'h [u8], + ) -> FindIter<'h, 'a> { + FindIter::new(haystack, self.as_ref()) + } + + /// Convert this finder into its owned variant, such that it no longer + /// borrows the needle. + /// + /// If this is already an owned finder, then this is a no-op. Otherwise, + /// this copies the needle. + /// + /// This is only available when the `alloc` feature is enabled. + #[cfg(feature = "alloc")] + #[inline] + pub fn into_owned(self) -> Finder<'static> { + Finder { + needle: self.needle.into_owned(), + searcher: self.searcher.clone(), + } + } + + /// Convert this finder into its borrowed variant. + /// + /// This is primarily useful if your finder is owned and you'd like to + /// store its borrowed variant in some intermediate data structure. + /// + /// Note that the lifetime parameter of the returned finder is tied to the + /// lifetime of `self`, and may be shorter than the `'n` lifetime of the + /// needle itself. Namely, a finder's needle can be either borrowed or + /// owned, so the lifetime of the needle returned must necessarily be the + /// shorter of the two. + #[inline] + pub fn as_ref(&self) -> Finder<'_> { + Finder { + needle: CowBytes::new(self.needle()), + searcher: self.searcher.clone(), + } + } + + /// Returns the needle that this finder searches for. + /// + /// Note that the lifetime of the needle returned is tied to the lifetime + /// of the finder, and may be shorter than the `'n` lifetime. Namely, a + /// finder's needle can be either borrowed or owned, so the lifetime of the + /// needle returned must necessarily be the shorter of the two. + #[inline] + pub fn needle(&self) -> &[u8] { + self.needle.as_slice() + } +} + +/// A single substring reverse searcher fixed to a particular needle. +/// +/// The purpose of this type is to permit callers to construct a substring +/// searcher that can be used to search haystacks without the overhead of +/// constructing the searcher in the first place. This is a somewhat niche +/// concern when it's necessary to re-use the same needle to search multiple +/// different haystacks with as little overhead as possible. In general, +/// using [`rfind`] is good enough, but `FinderRev` is useful when you can +/// meaningfully observe searcher construction time in a profile. +/// +/// When the `std` feature is enabled, then this type has an `into_owned` +/// version which permits building a `FinderRev` that is not connected to +/// the lifetime of its needle. +#[derive(Clone, Debug)] +pub struct FinderRev<'n> { + needle: CowBytes<'n>, + searcher: SearcherRev, +} + +impl<'n> FinderRev<'n> { + /// Create a new reverse finder for the given needle. + #[inline] + pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n> { + FinderBuilder::new().build_reverse(needle) + } + + /// Returns the index of the last occurrence of this needle in the given + /// haystack. + /// + /// The haystack may be any type that can be cheaply converted into a + /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use memchr::memmem::FinderRev; + /// + /// let haystack = b"foo bar baz"; + /// assert_eq!(Some(0), FinderRev::new("foo").rfind(haystack)); + /// assert_eq!(Some(4), FinderRev::new("bar").rfind(haystack)); + /// assert_eq!(None, FinderRev::new("quux").rfind(haystack)); + /// ``` + pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { + self.searcher.rfind(haystack.as_ref(), self.needle.as_slice()) + } + + /// Returns a reverse iterator over all occurrences of a substring in a + /// haystack. + /// + /// # Complexity + /// + /// This routine is guaranteed to have worst case linear time complexity + /// with respect to both the needle and the haystack. That is, this runs + /// in `O(needle.len() + haystack.len())` time. + /// + /// This routine is also guaranteed to have worst case constant space + /// complexity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use memchr::memmem::FinderRev; + /// + /// let haystack = b"foo bar foo baz foo"; + /// let finder = FinderRev::new(b"foo"); + /// let mut it = finder.rfind_iter(haystack); + /// assert_eq!(Some(16), it.next()); + /// assert_eq!(Some(8), it.next()); + /// assert_eq!(Some(0), it.next()); + /// assert_eq!(None, it.next()); + /// ``` + #[inline] + pub fn rfind_iter<'a, 'h>( + &'a self, + haystack: &'h [u8], + ) -> FindRevIter<'h, 'a> { + FindRevIter::new(haystack, self.as_ref()) + } + + /// Convert this finder into its owned variant, such that it no longer + /// borrows the needle. + /// + /// If this is already an owned finder, then this is a no-op. Otherwise, + /// this copies the needle. + /// + /// This is only available when the `std` feature is enabled. + #[cfg(feature = "alloc")] + #[inline] + pub fn into_owned(self) -> FinderRev<'static> { + FinderRev { + needle: self.needle.into_owned(), + searcher: self.searcher.clone(), + } + } + + /// Convert this finder into its borrowed variant. + /// + /// This is primarily useful if your finder is owned and you'd like to + /// store its borrowed variant in some intermediate data structure. + /// + /// Note that the lifetime parameter of the returned finder is tied to the + /// lifetime of `self`, and may be shorter than the `'n` lifetime of the + /// needle itself. Namely, a finder's needle can be either borrowed or + /// owned, so the lifetime of the needle returned must necessarily be the + /// shorter of the two. + #[inline] + pub fn as_ref(&self) -> FinderRev<'_> { + FinderRev { + needle: CowBytes::new(self.needle()), + searcher: self.searcher.clone(), + } + } + + /// Returns the needle that this finder searches for. + /// + /// Note that the lifetime of the needle returned is tied to the lifetime + /// of the finder, and may be shorter than the `'n` lifetime. Namely, a + /// finder's needle can be either borrowed or owned, so the lifetime of the + /// needle returned must necessarily be the shorter of the two. + #[inline] + pub fn needle(&self) -> &[u8] { + self.needle.as_slice() + } +} + +/// A builder for constructing non-default forward or reverse memmem finders. +/// +/// A builder is primarily useful for configuring a substring searcher. +/// Currently, the only configuration exposed is the ability to disable +/// heuristic prefilters used to speed up certain searches. +#[derive(Clone, Debug, Default)] +pub struct FinderBuilder { + prefilter: Prefilter, +} + +impl FinderBuilder { + /// Create a new finder builder with default settings. + pub fn new() -> FinderBuilder { + FinderBuilder::default() + } + + /// Build a forward finder using the given needle from the current + /// settings. + pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>( + &self, + needle: &'n B, + ) -> Finder<'n> { + self.build_forward_with_ranker(DefaultFrequencyRank, needle) + } + + /// Build a forward finder using the given needle and a custom heuristic for + /// determining the frequency of a given byte in the dataset. + /// See [`HeuristicFrequencyRank`] for more details. + pub fn build_forward_with_ranker< + 'n, + R: HeuristicFrequencyRank, + B: ?Sized + AsRef<[u8]>, + >( + &self, + ranker: R, + needle: &'n B, + ) -> Finder<'n> { + let needle = needle.as_ref(); + Finder { + needle: CowBytes::new(needle), + searcher: Searcher::new(self.prefilter, ranker, needle), + } + } + + /// Build a reverse finder using the given needle from the current + /// settings. + pub fn build_reverse<'n, B: ?Sized + AsRef<[u8]>>( + &self, + needle: &'n B, + ) -> FinderRev<'n> { + let needle = needle.as_ref(); + FinderRev { + needle: CowBytes::new(needle), + searcher: SearcherRev::new(needle), + } + } + + /// Configure the prefilter setting for the finder. + /// + /// See the documentation for [`Prefilter`] for more discussion on why + /// you might want to configure this. + pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder { + self.prefilter = prefilter; + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n).find(h))); + define_substring_reverse_quickcheck!(|h, n| Some( + FinderRev::new(n).rfind(h) + )); + + #[test] + fn forward() { + crate::tests::substring::Runner::new() + .fwd(|h, n| Some(Finder::new(n).find(h))) + .run(); + } + + #[test] + fn reverse() { + crate::tests::substring::Runner::new() + .rev(|h, n| Some(FinderRev::new(n).rfind(h))) + .run(); + } +} diff --git a/vendor/memchr/src/memmem/searcher.rs b/vendor/memchr/src/memmem/searcher.rs new file mode 100644 index 0000000..98b9bd6 --- /dev/null +++ b/vendor/memchr/src/memmem/searcher.rs @@ -0,0 +1,1030 @@ +use crate::arch::all::{ + packedpair::{HeuristicFrequencyRank, Pair}, + rabinkarp, twoway, +}; + +#[cfg(target_arch = "aarch64")] +use crate::arch::aarch64::neon::packedpair as neon; +#[cfg(target_arch = "wasm32")] +use crate::arch::wasm32::simd128::packedpair as simd128; +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +use crate::arch::x86_64::{ + avx2::packedpair as avx2, sse2::packedpair as sse2, +}; + +/// A "meta" substring searcher. +/// +/// To a first approximation, this chooses what it believes to be the "best" +/// substring search implemnetation based on the needle at construction time. +/// Then, every call to `find` will execute that particular implementation. To +/// a second approximation, multiple substring search algorithms may be used, +/// depending on the haystack. For example, for supremely short haystacks, +/// Rabin-Karp is typically used. +/// +/// See the documentation on `Prefilter` for an explanation of the dispatching +/// mechanism. The quick summary is that an enum has too much overhead and +/// we can't use dynamic dispatch via traits because we need to work in a +/// core-only environment. (Dynamic dispatch works in core-only, but you +/// need `&dyn Trait` and we really need a `Box<dyn Trait>` here. The latter +/// requires `alloc`.) So instead, we use a union and an appropriately paired +/// free function to read from the correct field on the union and execute the +/// chosen substring search implementation. +#[derive(Clone)] +pub(crate) struct Searcher { + call: SearcherKindFn, + kind: SearcherKind, + rabinkarp: rabinkarp::Finder, +} + +impl Searcher { + /// Creates a new "meta" substring searcher that attempts to choose the + /// best algorithm based on the needle, heuristics and what the current + /// target supports. + #[inline] + pub(crate) fn new<R: HeuristicFrequencyRank>( + prefilter: PrefilterConfig, + ranker: R, + needle: &[u8], + ) -> Searcher { + let rabinkarp = rabinkarp::Finder::new(needle); + if needle.len() <= 1 { + return if needle.is_empty() { + trace!("building empty substring searcher"); + Searcher { + call: searcher_kind_empty, + kind: SearcherKind { empty: () }, + rabinkarp, + } + } else { + trace!("building one-byte substring searcher"); + debug_assert_eq!(1, needle.len()); + Searcher { + call: searcher_kind_one_byte, + kind: SearcherKind { one_byte: needle[0] }, + rabinkarp, + } + }; + } + let pair = match Pair::with_ranker(needle, &ranker) { + Some(pair) => pair, + None => return Searcher::twoway(needle, rabinkarp, None), + }; + debug_assert_ne!( + pair.index1(), + pair.index2(), + "pair offsets should not be equivalent" + ); + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + { + if let Some(pp) = avx2::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building x86_64 AVX2 substring searcher"); + let kind = SearcherKind { avx2: pp }; + Searcher { call: searcher_kind_avx2, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::avx2(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if let Some(pp) = sse2::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building x86_64 SSE2 substring searcher"); + let kind = SearcherKind { sse2: pp }; + Searcher { call: searcher_kind_sse2, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::sse2(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + // We're pretty unlikely to get to this point, but it is + // possible to be running on x86_64 without SSE2. Namely, it's + // really up to the OS whether it wants to support vector + // registers or not. + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + #[cfg(target_arch = "wasm32")] + { + if let Some(pp) = simd128::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building wasm32 simd128 substring searcher"); + let kind = SearcherKind { simd128: pp }; + Searcher { call: searcher_kind_simd128, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::simd128(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + #[cfg(target_arch = "aarch64")] + { + if let Some(pp) = neon::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building aarch64 neon substring searcher"); + let kind = SearcherKind { neon: pp }; + Searcher { call: searcher_kind_neon, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::neon(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + #[cfg(not(any( + all(target_arch = "x86_64", target_feature = "sse2"), + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + } + + /// Creates a new searcher that always uses the Two-Way algorithm. This is + /// typically used when vector algorithms are unavailable or inappropriate. + /// (For example, when the needle is "too long.") + /// + /// If a prefilter is given, then the searcher returned will be accelerated + /// by the prefilter. + #[inline] + fn twoway( + needle: &[u8], + rabinkarp: rabinkarp::Finder, + prestrat: Option<Prefilter>, + ) -> Searcher { + let finder = twoway::Finder::new(needle); + match prestrat { + None => { + trace!("building scalar two-way substring searcher"); + let kind = SearcherKind { two_way: finder }; + Searcher { call: searcher_kind_two_way, kind, rabinkarp } + } + Some(prestrat) => { + trace!( + "building scalar two-way \ + substring searcher with a prefilter" + ); + let two_way_with_prefilter = + TwoWayWithPrefilter { finder, prestrat }; + let kind = SearcherKind { two_way_with_prefilter }; + Searcher { + call: searcher_kind_two_way_with_prefilter, + kind, + rabinkarp, + } + } + } + } + + /// Searches the given haystack for the given needle. The needle given + /// should be the same as the needle that this finder was initialized + /// with. + /// + /// Inlining this can lead to big wins for latency, and #[inline] doesn't + /// seem to be enough in some cases. + #[inline(always)] + pub(crate) fn find( + &self, + prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + if haystack.len() < needle.len() { + None + } else { + // SAFETY: By construction, we've ensured that the function + // in `self.call` is properly paired with the union used in + // `self.kind`. + unsafe { (self.call)(self, prestate, haystack, needle) } + } + } +} + +impl core::fmt::Debug for Searcher { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("Searcher") + .field("call", &"<searcher function>") + .field("kind", &"<searcher kind union>") + .field("rabinkarp", &self.rabinkarp) + .finish() + } +} + +/// A union indicating one of several possible substring search implementations +/// that are in active use. +/// +/// This union should only be read by one of the functions prefixed with +/// `searcher_kind_`. Namely, the correct function is meant to be paired with +/// the union by the caller, such that the function always reads from the +/// designated union field. +#[derive(Clone, Copy)] +union SearcherKind { + empty: (), + one_byte: u8, + two_way: twoway::Finder, + two_way_with_prefilter: TwoWayWithPrefilter, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + sse2: crate::arch::x86_64::sse2::packedpair::Finder, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + avx2: crate::arch::x86_64::avx2::packedpair::Finder, + #[cfg(target_arch = "wasm32")] + simd128: crate::arch::wasm32::simd128::packedpair::Finder, + #[cfg(target_arch = "aarch64")] + neon: crate::arch::aarch64::neon::packedpair::Finder, +} + +/// A two-way substring searcher with a prefilter. +#[derive(Copy, Clone, Debug)] +struct TwoWayWithPrefilter { + finder: twoway::Finder, + prestrat: Prefilter, +} + +/// The type of a substring search function. +/// +/// # Safety +/// +/// When using a function of this type, callers must ensure that the correct +/// function is paired with the value populated in `SearcherKind` union. +type SearcherKindFn = unsafe fn( + searcher: &Searcher, + prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize>; + +/// Reads from the `empty` field of `SearcherKind` to handle the case of +/// searching for the empty needle. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.empty` union field is set. +unsafe fn searcher_kind_empty( + _searcher: &Searcher, + _prestate: &mut PrefilterState, + _haystack: &[u8], + _needle: &[u8], +) -> Option<usize> { + Some(0) +} + +/// Reads from the `one_byte` field of `SearcherKind` to handle the case of +/// searching for a single byte needle. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.one_byte` union field is set. +unsafe fn searcher_kind_one_byte( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + _needle: &[u8], +) -> Option<usize> { + let needle = searcher.kind.one_byte; + crate::memchr(needle, haystack) +} + +/// Reads from the `two_way` field of `SearcherKind` to handle the case of +/// searching for an arbitrary needle without prefilter acceleration. Works on +/// all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.two_way` union field is set. +unsafe fn searcher_kind_two_way( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + if rabinkarp::is_fast(haystack, needle) { + searcher.rabinkarp.find(haystack, needle) + } else { + searcher.kind.two_way.find(haystack, needle) + } +} + +/// Reads from the `two_way_with_prefilter` field of `SearcherKind` to handle +/// the case of searching for an arbitrary needle with prefilter acceleration. +/// Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.two_way_with_prefilter` union +/// field is set. +unsafe fn searcher_kind_two_way_with_prefilter( + searcher: &Searcher, + prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + if rabinkarp::is_fast(haystack, needle) { + searcher.rabinkarp.find(haystack, needle) + } else { + let TwoWayWithPrefilter { ref finder, ref prestrat } = + searcher.kind.two_way_with_prefilter; + let pre = Pre { prestate, prestrat }; + finder.find_with_prefilter(Some(pre), haystack, needle) + } +} + +/// Reads from the `sse2` field of `SearcherKind` to execute the x86_64 SSE2 +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.sse2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn searcher_kind_sse2( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + let finder = &searcher.kind.sse2; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// Reads from the `avx2` field of `SearcherKind` to execute the x86_64 AVX2 +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.avx2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn searcher_kind_avx2( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + let finder = &searcher.kind.avx2; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// Reads from the `simd128` field of `SearcherKind` to execute the wasm32 +/// simd128 vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.simd128` union field is set. +#[cfg(target_arch = "wasm32")] +unsafe fn searcher_kind_simd128( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + let finder = &searcher.kind.simd128; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// Reads from the `neon` field of `SearcherKind` to execute the aarch64 neon +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.neon` union field is set. +#[cfg(target_arch = "aarch64")] +unsafe fn searcher_kind_neon( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option<usize> { + let finder = &searcher.kind.neon; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// A reverse substring searcher. +#[derive(Clone, Debug)] +pub(crate) struct SearcherRev { + kind: SearcherRevKind, + rabinkarp: rabinkarp::FinderRev, +} + +/// The kind of the reverse searcher. +/// +/// For the reverse case, we don't do any SIMD acceleration or prefilters. +/// There is no specific technical reason why we don't, but rather don't do it +/// because it's not clear it's worth the extra code to do so. If you have a +/// use case for it, please file an issue. +/// +/// We also don't do the union trick as we do with the forward case and +/// prefilters. Basically for the same reason we don't have prefilters or +/// vector algorithms for reverse searching: it's not clear it's worth doing. +/// Please file an issue if you have a compelling use case for fast reverse +/// substring search. +#[derive(Clone, Debug)] +enum SearcherRevKind { + Empty, + OneByte { needle: u8 }, + TwoWay { finder: twoway::FinderRev }, +} + +impl SearcherRev { + /// Creates a new searcher for finding occurrences of the given needle in + /// reverse. That is, it reports the last (instead of the first) occurrence + /// of a needle in a haystack. + #[inline] + pub(crate) fn new(needle: &[u8]) -> SearcherRev { + let kind = if needle.len() <= 1 { + if needle.is_empty() { + trace!("building empty reverse substring searcher"); + SearcherRevKind::Empty + } else { + trace!("building one-byte reverse substring searcher"); + debug_assert_eq!(1, needle.len()); + SearcherRevKind::OneByte { needle: needle[0] } + } + } else { + trace!("building scalar two-way reverse substring searcher"); + let finder = twoway::FinderRev::new(needle); + SearcherRevKind::TwoWay { finder } + }; + let rabinkarp = rabinkarp::FinderRev::new(needle); + SearcherRev { kind, rabinkarp } + } + + /// Searches the given haystack for the last occurrence of the given + /// needle. The needle given should be the same as the needle that this + /// finder was initialized with. + #[inline] + pub(crate) fn rfind( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option<usize> { + if haystack.len() < needle.len() { + return None; + } + match self.kind { + SearcherRevKind::Empty => Some(haystack.len()), + SearcherRevKind::OneByte { needle } => { + crate::memrchr(needle, haystack) + } + SearcherRevKind::TwoWay { ref finder } => { + if rabinkarp::is_fast(haystack, needle) { + self.rabinkarp.rfind(haystack, needle) + } else { + finder.rfind(haystack, needle) + } + } + } + } +} + +/// Prefilter controls whether heuristics are used to accelerate searching. +/// +/// A prefilter refers to the idea of detecting candidate matches very quickly, +/// and then confirming whether those candidates are full matches. This +/// idea can be quite effective since it's often the case that looking for +/// candidates can be a lot faster than running a complete substring search +/// over the entire input. Namely, looking for candidates can be done with +/// extremely fast vectorized code. +/// +/// The downside of a prefilter is that it assumes false positives (which are +/// candidates generated by a prefilter that aren't matches) are somewhat rare +/// relative to the frequency of full matches. That is, if a lot of false +/// positives are generated, then it's possible for search time to be worse +/// than if the prefilter wasn't enabled in the first place. +/// +/// Another downside of a prefilter is that it can result in highly variable +/// performance, where some cases are extraordinarily fast and others aren't. +/// Typically, variable performance isn't a problem, but it may be for your use +/// case. +/// +/// The use of prefilters in this implementation does use a heuristic to detect +/// when a prefilter might not be carrying its weight, and will dynamically +/// disable its use. Nevertheless, this configuration option gives callers +/// the ability to disable prefilters if you have knowledge that they won't be +/// useful. +#[derive(Clone, Copy, Debug)] +#[non_exhaustive] +pub enum PrefilterConfig { + /// Never used a prefilter in substring search. + None, + /// Automatically detect whether a heuristic prefilter should be used. If + /// it is used, then heuristics will be used to dynamically disable the + /// prefilter if it is believed to not be carrying its weight. + Auto, +} + +impl Default for PrefilterConfig { + fn default() -> PrefilterConfig { + PrefilterConfig::Auto + } +} + +impl PrefilterConfig { + /// Returns true when this prefilter is set to the `None` variant. + fn is_none(&self) -> bool { + matches!(*self, PrefilterConfig::None) + } +} + +/// The implementation of a prefilter. +/// +/// This type encapsulates dispatch to one of several possible choices for a +/// prefilter. Generally speaking, all prefilters have the same approximate +/// algorithm: they choose a couple of bytes from the needle that are believed +/// to be rare, use a fast vector algorithm to look for those bytes and return +/// positions as candidates for some substring search algorithm (currently only +/// Two-Way) to confirm as a match or not. +/// +/// The differences between the algorithms are actually at the vector +/// implementation level. Namely, we need different routines based on both +/// which target architecture we're on and what CPU features are supported. +/// +/// The straight-forwardly obvious approach here is to use an enum, and make +/// `Prefilter::find` do case analysis to determine which algorithm was +/// selected and invoke it. However, I've observed that this leads to poor +/// codegen in some cases, especially in latency sensitive benchmarks. That is, +/// this approach comes with overhead that I wasn't able to eliminate. +/// +/// The second obvious approach is to use dynamic dispatch with traits. Doing +/// that in this context where `Prefilter` owns the selection generally +/// requires heap allocation, and this code is designed to run in core-only +/// environments. +/// +/// So we settle on using a union (that's `PrefilterKind`) and a function +/// pointer (that's `PrefilterKindFn`). We select the right function pointer +/// based on which field in the union we set, and that function in turn +/// knows which field of the union to access. The downside of this approach +/// is that it forces us to think about safety, but the upside is that +/// there are some nice latency improvements to benchmarks. (Especially the +/// `memmem/sliceslice/short` benchmark.) +/// +/// In cases where we've selected a vector algorithm and the haystack given +/// is too short, we fallback to the scalar version of `memchr` on the +/// `rarest_byte`. (The scalar version of `memchr` is still better than a naive +/// byte-at-a-time loop because it will read in `usize`-sized chunks at a +/// time.) +#[derive(Clone, Copy)] +struct Prefilter { + call: PrefilterKindFn, + kind: PrefilterKind, + rarest_byte: u8, + rarest_offset: u8, +} + +impl Prefilter { + /// Return a "fallback" prefilter, but only if it is believed to be + /// effective. + #[inline] + fn fallback<R: HeuristicFrequencyRank>( + ranker: R, + pair: Pair, + needle: &[u8], + ) -> Option<Prefilter> { + /// The maximum frequency rank permitted for the fallback prefilter. + /// If the rarest byte in the needle has a frequency rank above this + /// value, then no prefilter is used if the fallback prefilter would + /// otherwise be selected. + const MAX_FALLBACK_RANK: u8 = 250; + + trace!("building fallback prefilter"); + let rarest_offset = pair.index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + let rarest_rank = ranker.rank(rarest_byte); + if rarest_rank > MAX_FALLBACK_RANK { + None + } else { + let finder = crate::arch::all::packedpair::Finder::with_pair( + needle, + pair.clone(), + )?; + let call = prefilter_kind_fallback; + let kind = PrefilterKind { fallback: finder }; + Some(Prefilter { call, kind, rarest_byte, rarest_offset }) + } + } + + /// Return a prefilter using a x86_64 SSE2 vector algorithm. + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + #[inline] + fn sse2(finder: sse2::Finder, needle: &[u8]) -> Prefilter { + trace!("building x86_64 SSE2 prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_sse2, + kind: PrefilterKind { sse2: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a prefilter using a x86_64 AVX2 vector algorithm. + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + #[inline] + fn avx2(finder: avx2::Finder, needle: &[u8]) -> Prefilter { + trace!("building x86_64 AVX2 prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_avx2, + kind: PrefilterKind { avx2: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a prefilter using a wasm32 simd128 vector algorithm. + #[cfg(target_arch = "wasm32")] + #[inline] + fn simd128(finder: simd128::Finder, needle: &[u8]) -> Prefilter { + trace!("building wasm32 simd128 prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_simd128, + kind: PrefilterKind { simd128: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a prefilter using a aarch64 neon vector algorithm. + #[cfg(target_arch = "aarch64")] + #[inline] + fn neon(finder: neon::Finder, needle: &[u8]) -> Prefilter { + trace!("building aarch64 neon prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_neon, + kind: PrefilterKind { neon: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a *candidate* position for a match. + /// + /// When this returns an offset, it implies that a match could begin at + /// that offset, but it may not. That is, it is possible for a false + /// positive to be returned. + /// + /// When `None` is returned, then it is guaranteed that there are no + /// matches for the needle in the given haystack. That is, it is impossible + /// for a false negative to be returned. + /// + /// The purpose of this routine is to look for candidate matching positions + /// as quickly as possible before running a (likely) slower confirmation + /// step. + #[inline] + fn find(&self, haystack: &[u8]) -> Option<usize> { + // SAFETY: By construction, we've ensured that the function in + // `self.call` is properly paired with the union used in `self.kind`. + unsafe { (self.call)(self, haystack) } + } + + /// A "simple" prefilter that just looks for the occurrence of the rarest + /// byte from the needle. This is generally only used for very small + /// haystacks. + #[inline] + fn find_simple(&self, haystack: &[u8]) -> Option<usize> { + // We don't use crate::memchr here because the haystack should be small + // enough that memchr won't be able to use vector routines anyway. So + // we just skip straight to the fallback implementation which is likely + // faster. (A byte-at-a-time loop is only used when the haystack is + // smaller than `size_of::<usize>()`.) + crate::arch::all::memchr::One::new(self.rarest_byte) + .find(haystack) + .map(|i| i.saturating_sub(usize::from(self.rarest_offset))) + } +} + +impl core::fmt::Debug for Prefilter { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("Prefilter") + .field("call", &"<prefilter function>") + .field("kind", &"<prefilter kind union>") + .field("rarest_byte", &self.rarest_byte) + .field("rarest_offset", &self.rarest_offset) + .finish() + } +} + +/// A union indicating one of several possible prefilters that are in active +/// use. +/// +/// This union should only be read by one of the functions prefixed with +/// `prefilter_kind_`. Namely, the correct function is meant to be paired with +/// the union by the caller, such that the function always reads from the +/// designated union field. +#[derive(Clone, Copy)] +union PrefilterKind { + fallback: crate::arch::all::packedpair::Finder, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + sse2: crate::arch::x86_64::sse2::packedpair::Finder, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + avx2: crate::arch::x86_64::avx2::packedpair::Finder, + #[cfg(target_arch = "wasm32")] + simd128: crate::arch::wasm32::simd128::packedpair::Finder, + #[cfg(target_arch = "aarch64")] + neon: crate::arch::aarch64::neon::packedpair::Finder, +} + +/// The type of a prefilter function. +/// +/// # Safety +/// +/// When using a function of this type, callers must ensure that the correct +/// function is paired with the value populated in `PrefilterKind` union. +type PrefilterKindFn = + unsafe fn(strat: &Prefilter, haystack: &[u8]) -> Option<usize>; + +/// Reads from the `fallback` field of `PrefilterKind` to execute the fallback +/// prefilter. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.fallback` union field is set. +unsafe fn prefilter_kind_fallback( + strat: &Prefilter, + haystack: &[u8], +) -> Option<usize> { + strat.kind.fallback.find_prefilter(haystack) +} + +/// Reads from the `sse2` field of `PrefilterKind` to execute the x86_64 SSE2 +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.sse2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn prefilter_kind_sse2( + strat: &Prefilter, + haystack: &[u8], +) -> Option<usize> { + let finder = &strat.kind.sse2; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// Reads from the `avx2` field of `PrefilterKind` to execute the x86_64 AVX2 +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.avx2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn prefilter_kind_avx2( + strat: &Prefilter, + haystack: &[u8], +) -> Option<usize> { + let finder = &strat.kind.avx2; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// Reads from the `simd128` field of `PrefilterKind` to execute the wasm32 +/// simd128 prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.simd128` union field is set. +#[cfg(target_arch = "wasm32")] +unsafe fn prefilter_kind_simd128( + strat: &Prefilter, + haystack: &[u8], +) -> Option<usize> { + let finder = &strat.kind.simd128; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// Reads from the `neon` field of `PrefilterKind` to execute the aarch64 neon +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.neon` union field is set. +#[cfg(target_arch = "aarch64")] +unsafe fn prefilter_kind_neon( + strat: &Prefilter, + haystack: &[u8], +) -> Option<usize> { + let finder = &strat.kind.neon; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// PrefilterState tracks state associated with the effectiveness of a +/// prefilter. It is used to track how many bytes, on average, are skipped by +/// the prefilter. If this average dips below a certain threshold over time, +/// then the state renders the prefilter inert and stops using it. +/// +/// A prefilter state should be created for each search. (Where creating an +/// iterator is treated as a single search.) A prefilter state should only be +/// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert +/// `PrefilterState`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct PrefilterState { + /// The number of skips that has been executed. This is always 1 greater + /// than the actual number of skips. The special sentinel value of 0 + /// indicates that the prefilter is inert. This is useful to avoid + /// additional checks to determine whether the prefilter is still + /// "effective." Once a prefilter becomes inert, it should no longer be + /// used (according to our heuristics). + skips: u32, + /// The total number of bytes that have been skipped. + skipped: u32, +} + +impl PrefilterState { + /// The minimum number of skip attempts to try before considering whether + /// a prefilter is effective or not. + const MIN_SKIPS: u32 = 50; + + /// The minimum amount of bytes that skipping must average. + /// + /// This value was chosen based on varying it and checking + /// the microbenchmarks. In particular, this can impact the + /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set + /// too low. + const MIN_SKIP_BYTES: u32 = 8; + + /// Create a fresh prefilter state. + #[inline] + pub(crate) fn new() -> PrefilterState { + PrefilterState { skips: 1, skipped: 0 } + } + + /// Update this state with the number of bytes skipped on the last + /// invocation of the prefilter. + #[inline] + fn update(&mut self, skipped: usize) { + self.skips = self.skips.saturating_add(1); + // We need to do this dance since it's technically possible for + // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the + // size of a prefilter state.) + self.skipped = match u32::try_from(skipped) { + Err(_) => core::u32::MAX, + Ok(skipped) => self.skipped.saturating_add(skipped), + }; + } + + /// Return true if and only if this state indicates that a prefilter is + /// still effective. + #[inline] + fn is_effective(&mut self) -> bool { + if self.is_inert() { + return false; + } + if self.skips() < PrefilterState::MIN_SKIPS { + return true; + } + if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() { + return true; + } + + // We're inert. + self.skips = 0; + false + } + + /// Returns true if the prefilter this state represents should no longer + /// be used. + #[inline] + fn is_inert(&self) -> bool { + self.skips == 0 + } + + /// Returns the total number of times the prefilter has been used. + #[inline] + fn skips(&self) -> u32 { + // Remember, `0` is a sentinel value indicating inertness, so we + // always need to subtract `1` to get our actual number of skips. + self.skips.saturating_sub(1) + } +} + +/// A combination of prefilter effectiveness state and the prefilter itself. +#[derive(Debug)] +pub(crate) struct Pre<'a> { + /// State that tracks the effectiveness of a prefilter. + prestate: &'a mut PrefilterState, + /// The actual prefilter. + prestrat: &'a Prefilter, +} + +impl<'a> Pre<'a> { + /// Call this prefilter on the given haystack with the given needle. + #[inline] + pub(crate) fn find(&mut self, haystack: &[u8]) -> Option<usize> { + let result = self.prestrat.find(haystack); + self.prestate.update(result.unwrap_or(haystack.len())); + result + } + + /// Return true if and only if this prefilter should be used. + #[inline] + pub(crate) fn is_effective(&mut self) -> bool { + self.prestate.is_effective() + } +} + +/// Returns true if the needle has the right characteristics for a vector +/// algorithm to handle the entirety of substring search. +/// +/// Vector algorithms can be used for prefilters for other substring search +/// algorithms (like Two-Way), but they can also be used for substring search +/// on their own. When used for substring search, vector algorithms will +/// quickly identify candidate match positions (just like in the prefilter +/// case), but instead of returning the candidate position they will try to +/// confirm the match themselves. Confirmation happens via `memcmp`. This +/// works well for short needles, but can break down when many false candidate +/// positions are generated for large needles. Thus, we only permit vector +/// algorithms to own substring search when the needle is of a certain length. +#[inline] +fn do_packed_search(needle: &[u8]) -> bool { + /// The minimum length of a needle required for this algorithm. The minimum + /// is 2 since a length of 1 should just use memchr and a length of 0 isn't + /// a case handled by this searcher. + const MIN_LEN: usize = 2; + + /// The maximum length of a needle required for this algorithm. + /// + /// In reality, there is no hard max here. The code below can handle any + /// length needle. (Perhaps that suggests there are missing optimizations.) + /// Instead, this is a heuristic and a bound guaranteeing our linear time + /// complexity. + /// + /// It is a heuristic because when a candidate match is found, memcmp is + /// run. For very large needles with lots of false positives, memcmp can + /// make the code run quite slow. + /// + /// It is a bound because the worst case behavior with memcmp is + /// multiplicative in the size of the needle and haystack, and we want + /// to keep that additive. This bound ensures we still meet that bound + /// theoretically, since it's just a constant. We aren't acting in bad + /// faith here, memcmp on tiny needles is so fast that even in pathological + /// cases (see pathological vector benchmarks), this is still just as fast + /// or faster in practice. + /// + /// This specific number was chosen by tweaking a bit and running + /// benchmarks. The rare-medium-needle, for example, gets about 5% faster + /// by using this algorithm instead of a prefilter-accelerated Two-Way. + /// There's also a theoretical desire to keep this number reasonably + /// low, to mitigate the impact of pathological cases. I did try 64, and + /// some benchmarks got a little better, and others (particularly the + /// pathological ones), got a lot worse. So... 32 it is? + const MAX_LEN: usize = 32; + MIN_LEN <= needle.len() && needle.len() <= MAX_LEN +} |