aboutsummaryrefslogtreecommitdiff
path: root/vendor/memchr/src/memmem/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/memchr/src/memmem/mod.rs')
-rw-r--r--vendor/memchr/src/memmem/mod.rs737
1 files changed, 0 insertions, 737 deletions
diff --git a/vendor/memchr/src/memmem/mod.rs b/vendor/memchr/src/memmem/mod.rs
deleted file mode 100644
index 4f04943..0000000
--- a/vendor/memchr/src/memmem/mod.rs
+++ /dev/null
@@ -1,737 +0,0 @@
-/*!
-This module provides forward and reverse substring search routines.
-
-Unlike the standard library's substring search routines, these work on
-arbitrary bytes. For all non-empty needles, these routines will report exactly
-the same values as the corresponding routines in the standard library. For
-the empty needle, the standard library reports matches only at valid UTF-8
-boundaries, where as these routines will report matches at every position.
-
-Other than being able to work on arbitrary bytes, the primary reason to prefer
-these routines over the standard library routines is that these will generally
-be faster. In some cases, significantly so.
-
-# Example: iterating over substring matches
-
-This example shows how to use [`find_iter`] to find occurrences of a substring
-in a haystack.
-
-```
-use memchr::memmem;
-
-let haystack = b"foo bar foo baz foo";
-
-let mut it = memmem::find_iter(haystack, "foo");
-assert_eq!(Some(0), it.next());
-assert_eq!(Some(8), it.next());
-assert_eq!(Some(16), it.next());
-assert_eq!(None, it.next());
-```
-
-# Example: iterating over substring matches in reverse
-
-This example shows how to use [`rfind_iter`] to find occurrences of a substring
-in a haystack starting from the end of the haystack.
-
-**NOTE:** This module does not implement double ended iterators, so reverse
-searches aren't done by calling `rev` on a forward iterator.
-
-```
-use memchr::memmem;
-
-let haystack = b"foo bar foo baz foo";
-
-let mut it = memmem::rfind_iter(haystack, "foo");
-assert_eq!(Some(16), it.next());
-assert_eq!(Some(8), it.next());
-assert_eq!(Some(0), it.next());
-assert_eq!(None, it.next());
-```
-
-# Example: repeating a search for the same needle
-
-It may be possible for the overhead of constructing a substring searcher to be
-measurable in some workloads. In cases where the same needle is used to search
-many haystacks, it is possible to do construction once and thus to avoid it for
-subsequent searches. This can be done with a [`Finder`] (or a [`FinderRev`] for
-reverse searches).
-
-```
-use memchr::memmem;
-
-let finder = memmem::Finder::new("foo");
-
-assert_eq!(Some(4), finder.find(b"baz foo quux"));
-assert_eq!(None, finder.find(b"quux baz bar"));
-```
-*/
-
-pub use crate::memmem::searcher::PrefilterConfig as Prefilter;
-
-// This is exported here for use in the crate::arch::all::twoway
-// implementation. This is essentially an abstraction breaker. Namely, the
-// public API of twoway doesn't support providing a prefilter, but its crate
-// internal API does. The main reason for this is that I didn't want to do the
-// API design required to support it without a concrete use case.
-pub(crate) use crate::memmem::searcher::Pre;
-
-use crate::{
- arch::all::{
- packedpair::{DefaultFrequencyRank, HeuristicFrequencyRank},
- rabinkarp,
- },
- cow::CowBytes,
- memmem::searcher::{PrefilterState, Searcher, SearcherRev},
-};
-
-mod searcher;
-
-/// Returns an iterator over all non-overlapping occurrences of a substring in
-/// a haystack.
-///
-/// # Complexity
-///
-/// This routine is guaranteed to have worst case linear time complexity
-/// with respect to both the needle and the haystack. That is, this runs
-/// in `O(needle.len() + haystack.len())` time.
-///
-/// This routine is also guaranteed to have worst case constant space
-/// complexity.
-///
-/// # Examples
-///
-/// Basic usage:
-///
-/// ```
-/// use memchr::memmem;
-///
-/// let haystack = b"foo bar foo baz foo";
-/// let mut it = memmem::find_iter(haystack, b"foo");
-/// assert_eq!(Some(0), it.next());
-/// assert_eq!(Some(8), it.next());
-/// assert_eq!(Some(16), it.next());
-/// assert_eq!(None, it.next());
-/// ```
-#[inline]
-pub fn find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>(
- haystack: &'h [u8],
- needle: &'n N,
-) -> FindIter<'h, 'n> {
- FindIter::new(haystack, Finder::new(needle))
-}
-
-/// Returns a reverse iterator over all non-overlapping occurrences of a
-/// substring in a haystack.
-///
-/// # Complexity
-///
-/// This routine is guaranteed to have worst case linear time complexity
-/// with respect to both the needle and the haystack. That is, this runs
-/// in `O(needle.len() + haystack.len())` time.
-///
-/// This routine is also guaranteed to have worst case constant space
-/// complexity.
-///
-/// # Examples
-///
-/// Basic usage:
-///
-/// ```
-/// use memchr::memmem;
-///
-/// let haystack = b"foo bar foo baz foo";
-/// let mut it = memmem::rfind_iter(haystack, b"foo");
-/// assert_eq!(Some(16), it.next());
-/// assert_eq!(Some(8), it.next());
-/// assert_eq!(Some(0), it.next());
-/// assert_eq!(None, it.next());
-/// ```
-#[inline]
-pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>(
- haystack: &'h [u8],
- needle: &'n N,
-) -> FindRevIter<'h, 'n> {
- FindRevIter::new(haystack, FinderRev::new(needle))
-}
-
-/// Returns the index of the first occurrence of the given needle.
-///
-/// Note that if you're are searching for the same needle in many different
-/// small haystacks, it may be faster to initialize a [`Finder`] once,
-/// and reuse it for each search.
-///
-/// # Complexity
-///
-/// This routine is guaranteed to have worst case linear time complexity
-/// with respect to both the needle and the haystack. That is, this runs
-/// in `O(needle.len() + haystack.len())` time.
-///
-/// This routine is also guaranteed to have worst case constant space
-/// complexity.
-///
-/// # Examples
-///
-/// Basic usage:
-///
-/// ```
-/// use memchr::memmem;
-///
-/// let haystack = b"foo bar baz";
-/// assert_eq!(Some(0), memmem::find(haystack, b"foo"));
-/// assert_eq!(Some(4), memmem::find(haystack, b"bar"));
-/// assert_eq!(None, memmem::find(haystack, b"quux"));
-/// ```
-#[inline]
-pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
- if haystack.len() < 64 {
- rabinkarp::Finder::new(needle).find(haystack, needle)
- } else {
- Finder::new(needle).find(haystack)
- }
-}
-
-/// Returns the index of the last occurrence of the given needle.
-///
-/// Note that if you're are searching for the same needle in many different
-/// small haystacks, it may be faster to initialize a [`FinderRev`] once,
-/// and reuse it for each search.
-///
-/// # Complexity
-///
-/// This routine is guaranteed to have worst case linear time complexity
-/// with respect to both the needle and the haystack. That is, this runs
-/// in `O(needle.len() + haystack.len())` time.
-///
-/// This routine is also guaranteed to have worst case constant space
-/// complexity.
-///
-/// # Examples
-///
-/// Basic usage:
-///
-/// ```
-/// use memchr::memmem;
-///
-/// let haystack = b"foo bar baz";
-/// assert_eq!(Some(0), memmem::rfind(haystack, b"foo"));
-/// assert_eq!(Some(4), memmem::rfind(haystack, b"bar"));
-/// assert_eq!(Some(8), memmem::rfind(haystack, b"ba"));
-/// assert_eq!(None, memmem::rfind(haystack, b"quux"));
-/// ```
-#[inline]
-pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> {
- if haystack.len() < 64 {
- rabinkarp::FinderRev::new(needle).rfind(haystack, needle)
- } else {
- FinderRev::new(needle).rfind(haystack)
- }
-}
-
-/// An iterator over non-overlapping substring matches.
-///
-/// Matches are reported by the byte offset at which they begin.
-///
-/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
-/// needle.
-#[derive(Debug, Clone)]
-pub struct FindIter<'h, 'n> {
- haystack: &'h [u8],
- prestate: PrefilterState,
- finder: Finder<'n>,
- pos: usize,
-}
-
-impl<'h, 'n> FindIter<'h, 'n> {
- #[inline(always)]
- pub(crate) fn new(
- haystack: &'h [u8],
- finder: Finder<'n>,
- ) -> FindIter<'h, 'n> {
- let prestate = PrefilterState::new();
- FindIter { haystack, prestate, finder, pos: 0 }
- }
-
- /// Convert this iterator into its owned variant, such that it no longer
- /// borrows the finder and needle.
- ///
- /// If this is already an owned iterator, then this is a no-op. Otherwise,
- /// this copies the needle.
- ///
- /// This is only available when the `alloc` feature is enabled.
- #[cfg(feature = "alloc")]
- #[inline]
- pub fn into_owned(self) -> FindIter<'h, 'static> {
- FindIter {
- haystack: self.haystack,
- prestate: self.prestate,
- finder: self.finder.into_owned(),
- pos: self.pos,
- }
- }
-}
-
-impl<'h, 'n> Iterator for FindIter<'h, 'n> {
- type Item = usize;
-
- fn next(&mut self) -> Option<usize> {
- let needle = self.finder.needle();
- let haystack = self.haystack.get(self.pos..)?;
- let idx =
- self.finder.searcher.find(&mut self.prestate, haystack, needle)?;
-
- let pos = self.pos + idx;
- self.pos = pos + needle.len().max(1);
-
- Some(pos)
- }
-
- fn size_hint(&self) -> (usize, Option<usize>) {
- // The largest possible number of non-overlapping matches is the
- // quotient of the haystack and the needle (or the length of the
- // haystack, if the needle is empty)
- match self.haystack.len().checked_sub(self.pos) {
- None => (0, Some(0)),
- Some(haystack_len) => match self.finder.needle().len() {
- // Empty needles always succeed and match at every point
- // (including the very end)
- 0 => (
- haystack_len.saturating_add(1),
- haystack_len.checked_add(1),
- ),
- needle_len => (0, Some(haystack_len / needle_len)),
- },
- }
- }
-}
-
-/// An iterator over non-overlapping substring matches in reverse.
-///
-/// Matches are reported by the byte offset at which they begin.
-///
-/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
-/// needle.
-#[derive(Clone, Debug)]
-pub struct FindRevIter<'h, 'n> {
- haystack: &'h [u8],
- finder: FinderRev<'n>,
- /// When searching with an empty needle, this gets set to `None` after
- /// we've yielded the last element at `0`.
- pos: Option<usize>,
-}
-
-impl<'h, 'n> FindRevIter<'h, 'n> {
- #[inline(always)]
- pub(crate) fn new(
- haystack: &'h [u8],
- finder: FinderRev<'n>,
- ) -> FindRevIter<'h, 'n> {
- let pos = Some(haystack.len());
- FindRevIter { haystack, finder, pos }
- }
-
- /// Convert this iterator into its owned variant, such that it no longer
- /// borrows the finder and needle.
- ///
- /// If this is already an owned iterator, then this is a no-op. Otherwise,
- /// this copies the needle.
- ///
- /// This is only available when the `std` feature is enabled.
- #[cfg(feature = "alloc")]
- #[inline]
- pub fn into_owned(self) -> FindRevIter<'h, 'static> {
- FindRevIter {
- haystack: self.haystack,
- finder: self.finder.into_owned(),
- pos: self.pos,
- }
- }
-}
-
-impl<'h, 'n> Iterator for FindRevIter<'h, 'n> {
- type Item = usize;
-
- fn next(&mut self) -> Option<usize> {
- let pos = match self.pos {
- None => return None,
- Some(pos) => pos,
- };
- let result = self.finder.rfind(&self.haystack[..pos]);
- match result {
- None => None,
- Some(i) => {
- if pos == i {
- self.pos = pos.checked_sub(1);
- } else {
- self.pos = Some(i);
- }
- Some(i)
- }
- }
- }
-}
-
-/// A single substring searcher fixed to a particular needle.
-///
-/// The purpose of this type is to permit callers to construct a substring
-/// searcher that can be used to search haystacks without the overhead of
-/// constructing the searcher in the first place. This is a somewhat niche
-/// concern when it's necessary to re-use the same needle to search multiple
-/// different haystacks with as little overhead as possible. In general, using
-/// [`find`] is good enough, but `Finder` is useful when you can meaningfully
-/// observe searcher construction time in a profile.
-///
-/// When the `std` feature is enabled, then this type has an `into_owned`
-/// version which permits building a `Finder` that is not connected to
-/// the lifetime of its needle.
-#[derive(Clone, Debug)]
-pub struct Finder<'n> {
- needle: CowBytes<'n>,
- searcher: Searcher,
-}
-
-impl<'n> Finder<'n> {
- /// Create a new finder for the given needle.
- #[inline]
- pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n> {
- FinderBuilder::new().build_forward(needle)
- }
-
- /// Returns the index of the first occurrence of this needle in the given
- /// haystack.
- ///
- /// # Complexity
- ///
- /// This routine is guaranteed to have worst case linear time complexity
- /// with respect to both the needle and the haystack. That is, this runs
- /// in `O(needle.len() + haystack.len())` time.
- ///
- /// This routine is also guaranteed to have worst case constant space
- /// complexity.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// use memchr::memmem::Finder;
- ///
- /// let haystack = b"foo bar baz";
- /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
- /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
- /// assert_eq!(None, Finder::new("quux").find(haystack));
- /// ```
- #[inline]
- pub fn find(&self, haystack: &[u8]) -> Option<usize> {
- let mut prestate = PrefilterState::new();
- let needle = self.needle.as_slice();
- self.searcher.find(&mut prestate, haystack, needle)
- }
-
- /// Returns an iterator over all occurrences of a substring in a haystack.
- ///
- /// # Complexity
- ///
- /// This routine is guaranteed to have worst case linear time complexity
- /// with respect to both the needle and the haystack. That is, this runs
- /// in `O(needle.len() + haystack.len())` time.
- ///
- /// This routine is also guaranteed to have worst case constant space
- /// complexity.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// use memchr::memmem::Finder;
- ///
- /// let haystack = b"foo bar foo baz foo";
- /// let finder = Finder::new(b"foo");
- /// let mut it = finder.find_iter(haystack);
- /// assert_eq!(Some(0), it.next());
- /// assert_eq!(Some(8), it.next());
- /// assert_eq!(Some(16), it.next());
- /// assert_eq!(None, it.next());
- /// ```
- #[inline]
- pub fn find_iter<'a, 'h>(
- &'a self,
- haystack: &'h [u8],
- ) -> FindIter<'h, 'a> {
- FindIter::new(haystack, self.as_ref())
- }
-
- /// Convert this finder into its owned variant, such that it no longer
- /// borrows the needle.
- ///
- /// If this is already an owned finder, then this is a no-op. Otherwise,
- /// this copies the needle.
- ///
- /// This is only available when the `alloc` feature is enabled.
- #[cfg(feature = "alloc")]
- #[inline]
- pub fn into_owned(self) -> Finder<'static> {
- Finder {
- needle: self.needle.into_owned(),
- searcher: self.searcher.clone(),
- }
- }
-
- /// Convert this finder into its borrowed variant.
- ///
- /// This is primarily useful if your finder is owned and you'd like to
- /// store its borrowed variant in some intermediate data structure.
- ///
- /// Note that the lifetime parameter of the returned finder is tied to the
- /// lifetime of `self`, and may be shorter than the `'n` lifetime of the
- /// needle itself. Namely, a finder's needle can be either borrowed or
- /// owned, so the lifetime of the needle returned must necessarily be the
- /// shorter of the two.
- #[inline]
- pub fn as_ref(&self) -> Finder<'_> {
- Finder {
- needle: CowBytes::new(self.needle()),
- searcher: self.searcher.clone(),
- }
- }
-
- /// Returns the needle that this finder searches for.
- ///
- /// Note that the lifetime of the needle returned is tied to the lifetime
- /// of the finder, and may be shorter than the `'n` lifetime. Namely, a
- /// finder's needle can be either borrowed or owned, so the lifetime of the
- /// needle returned must necessarily be the shorter of the two.
- #[inline]
- pub fn needle(&self) -> &[u8] {
- self.needle.as_slice()
- }
-}
-
-/// A single substring reverse searcher fixed to a particular needle.
-///
-/// The purpose of this type is to permit callers to construct a substring
-/// searcher that can be used to search haystacks without the overhead of
-/// constructing the searcher in the first place. This is a somewhat niche
-/// concern when it's necessary to re-use the same needle to search multiple
-/// different haystacks with as little overhead as possible. In general,
-/// using [`rfind`] is good enough, but `FinderRev` is useful when you can
-/// meaningfully observe searcher construction time in a profile.
-///
-/// When the `std` feature is enabled, then this type has an `into_owned`
-/// version which permits building a `FinderRev` that is not connected to
-/// the lifetime of its needle.
-#[derive(Clone, Debug)]
-pub struct FinderRev<'n> {
- needle: CowBytes<'n>,
- searcher: SearcherRev,
-}
-
-impl<'n> FinderRev<'n> {
- /// Create a new reverse finder for the given needle.
- #[inline]
- pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n> {
- FinderBuilder::new().build_reverse(needle)
- }
-
- /// Returns the index of the last occurrence of this needle in the given
- /// haystack.
- ///
- /// The haystack may be any type that can be cheaply converted into a
- /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
- ///
- /// # Complexity
- ///
- /// This routine is guaranteed to have worst case linear time complexity
- /// with respect to both the needle and the haystack. That is, this runs
- /// in `O(needle.len() + haystack.len())` time.
- ///
- /// This routine is also guaranteed to have worst case constant space
- /// complexity.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// use memchr::memmem::FinderRev;
- ///
- /// let haystack = b"foo bar baz";
- /// assert_eq!(Some(0), FinderRev::new("foo").rfind(haystack));
- /// assert_eq!(Some(4), FinderRev::new("bar").rfind(haystack));
- /// assert_eq!(None, FinderRev::new("quux").rfind(haystack));
- /// ```
- pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
- self.searcher.rfind(haystack.as_ref(), self.needle.as_slice())
- }
-
- /// Returns a reverse iterator over all occurrences of a substring in a
- /// haystack.
- ///
- /// # Complexity
- ///
- /// This routine is guaranteed to have worst case linear time complexity
- /// with respect to both the needle and the haystack. That is, this runs
- /// in `O(needle.len() + haystack.len())` time.
- ///
- /// This routine is also guaranteed to have worst case constant space
- /// complexity.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// use memchr::memmem::FinderRev;
- ///
- /// let haystack = b"foo bar foo baz foo";
- /// let finder = FinderRev::new(b"foo");
- /// let mut it = finder.rfind_iter(haystack);
- /// assert_eq!(Some(16), it.next());
- /// assert_eq!(Some(8), it.next());
- /// assert_eq!(Some(0), it.next());
- /// assert_eq!(None, it.next());
- /// ```
- #[inline]
- pub fn rfind_iter<'a, 'h>(
- &'a self,
- haystack: &'h [u8],
- ) -> FindRevIter<'h, 'a> {
- FindRevIter::new(haystack, self.as_ref())
- }
-
- /// Convert this finder into its owned variant, such that it no longer
- /// borrows the needle.
- ///
- /// If this is already an owned finder, then this is a no-op. Otherwise,
- /// this copies the needle.
- ///
- /// This is only available when the `std` feature is enabled.
- #[cfg(feature = "alloc")]
- #[inline]
- pub fn into_owned(self) -> FinderRev<'static> {
- FinderRev {
- needle: self.needle.into_owned(),
- searcher: self.searcher.clone(),
- }
- }
-
- /// Convert this finder into its borrowed variant.
- ///
- /// This is primarily useful if your finder is owned and you'd like to
- /// store its borrowed variant in some intermediate data structure.
- ///
- /// Note that the lifetime parameter of the returned finder is tied to the
- /// lifetime of `self`, and may be shorter than the `'n` lifetime of the
- /// needle itself. Namely, a finder's needle can be either borrowed or
- /// owned, so the lifetime of the needle returned must necessarily be the
- /// shorter of the two.
- #[inline]
- pub fn as_ref(&self) -> FinderRev<'_> {
- FinderRev {
- needle: CowBytes::new(self.needle()),
- searcher: self.searcher.clone(),
- }
- }
-
- /// Returns the needle that this finder searches for.
- ///
- /// Note that the lifetime of the needle returned is tied to the lifetime
- /// of the finder, and may be shorter than the `'n` lifetime. Namely, a
- /// finder's needle can be either borrowed or owned, so the lifetime of the
- /// needle returned must necessarily be the shorter of the two.
- #[inline]
- pub fn needle(&self) -> &[u8] {
- self.needle.as_slice()
- }
-}
-
-/// A builder for constructing non-default forward or reverse memmem finders.
-///
-/// A builder is primarily useful for configuring a substring searcher.
-/// Currently, the only configuration exposed is the ability to disable
-/// heuristic prefilters used to speed up certain searches.
-#[derive(Clone, Debug, Default)]
-pub struct FinderBuilder {
- prefilter: Prefilter,
-}
-
-impl FinderBuilder {
- /// Create a new finder builder with default settings.
- pub fn new() -> FinderBuilder {
- FinderBuilder::default()
- }
-
- /// Build a forward finder using the given needle from the current
- /// settings.
- pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>(
- &self,
- needle: &'n B,
- ) -> Finder<'n> {
- self.build_forward_with_ranker(DefaultFrequencyRank, needle)
- }
-
- /// Build a forward finder using the given needle and a custom heuristic for
- /// determining the frequency of a given byte in the dataset.
- /// See [`HeuristicFrequencyRank`] for more details.
- pub fn build_forward_with_ranker<
- 'n,
- R: HeuristicFrequencyRank,
- B: ?Sized + AsRef<[u8]>,
- >(
- &self,
- ranker: R,
- needle: &'n B,
- ) -> Finder<'n> {
- let needle = needle.as_ref();
- Finder {
- needle: CowBytes::new(needle),
- searcher: Searcher::new(self.prefilter, ranker, needle),
- }
- }
-
- /// Build a reverse finder using the given needle from the current
- /// settings.
- pub fn build_reverse<'n, B: ?Sized + AsRef<[u8]>>(
- &self,
- needle: &'n B,
- ) -> FinderRev<'n> {
- let needle = needle.as_ref();
- FinderRev {
- needle: CowBytes::new(needle),
- searcher: SearcherRev::new(needle),
- }
- }
-
- /// Configure the prefilter setting for the finder.
- ///
- /// See the documentation for [`Prefilter`] for more discussion on why
- /// you might want to configure this.
- pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder {
- self.prefilter = prefilter;
- self
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n).find(h)));
- define_substring_reverse_quickcheck!(|h, n| Some(
- FinderRev::new(n).rfind(h)
- ));
-
- #[test]
- fn forward() {
- crate::tests::substring::Runner::new()
- .fwd(|h, n| Some(Finder::new(n).find(h)))
- .run();
- }
-
- #[test]
- fn reverse() {
- crate::tests::substring::Runner::new()
- .rev(|h, n| Some(FinderRev::new(n).rfind(h)))
- .run();
- }
-}