From 1b6a04ca5504955c571d1c97504fb45ea0befee4 Mon Sep 17 00:00:00 2001 From: Valentin Popov Date: Mon, 8 Jan 2024 01:21:28 +0400 Subject: Initial vendor packages Signed-off-by: Valentin Popov --- vendor/memchr/src/arch/all/shiftor.rs | 89 +++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 vendor/memchr/src/arch/all/shiftor.rs (limited to 'vendor/memchr/src/arch/all/shiftor.rs') diff --git a/vendor/memchr/src/arch/all/shiftor.rs b/vendor/memchr/src/arch/all/shiftor.rs new file mode 100644 index 0000000..b690564 --- /dev/null +++ b/vendor/memchr/src/arch/all/shiftor.rs @@ -0,0 +1,89 @@ +/*! +An implementation of the [Shift-Or substring search algorithm][shiftor]. + +[shiftor]: https://en.wikipedia.org/wiki/Bitap_algorithm +*/ + +use alloc::boxed::Box; + +/// The type of our mask. +/// +/// While we don't expose anyway to configure this in the public API, if one +/// really needs less memory usage or support for longer needles, then it is +/// suggested to copy the code from this module and modify it to fit your +/// needs. The code below is written to be correct regardless of whether Mask +/// is a u8, u16, u32, u64 or u128. +type Mask = u16; + +/// A forward substring searcher using the Shift-Or algorithm. +#[derive(Debug)] +pub struct Finder { + masks: Box<[Mask; 256]>, + needle_len: usize, +} + +impl Finder { + const MAX_NEEDLE_LEN: usize = (Mask::BITS - 1) as usize; + + /// Create a new Shift-Or forward searcher for the given `needle`. + /// + /// The needle may be empty. The empty needle matches at every byte offset. + #[inline] + pub fn new(needle: &[u8]) -> Option { + let needle_len = needle.len(); + if needle_len > Finder::MAX_NEEDLE_LEN { + // A match is found when bit 7 is set in 'result' in the search + // routine below. So our needle can't be bigger than 7. We could + // permit bigger needles by using u16, u32 or u64 for our mask + // entries. But this is all we need for this example. + return None; + } + let mut searcher = Finder { masks: Box::from([!0; 256]), needle_len }; + for (i, &byte) in needle.iter().enumerate() { + searcher.masks[usize::from(byte)] &= !(1 << i); + } + Some(searcher) + } + + /// Return the first occurrence of the needle given to `Finder::new` in + /// the `haystack` given. If no such occurrence exists, then `None` is + /// returned. + /// + /// Unlike most other substring search implementations in this crate, this + /// finder does not require passing the needle at search time. A match can + /// be determined without the needle at all since the required information + /// is already encoded into this finder at construction time. + /// + /// The maximum value this can return is `haystack.len()`, which can only + /// occur when the needle and haystack both have length zero. Otherwise, + /// for non-empty haystacks, the maximum value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + if self.needle_len == 0 { + return Some(0); + } + let mut result = !1; + for (i, &byte) in haystack.iter().enumerate() { + result |= self.masks[usize::from(byte)]; + result <<= 1; + if result & (1 << self.needle_len) == 0 { + return Some(i + 1 - self.needle_len); + } + } + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n)?.find(h))); + + #[test] + fn forward() { + crate::tests::substring::Runner::new() + .fwd(|h, n| Some(Finder::new(n)?.find(h))) + .run(); + } +} -- cgit v1.2.3