/*! A 128-bit vector implementation of the "packed pair" SIMD algorithm. The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main difference is that it (by default) uses a background distribution of byte frequencies to heuristically select the pair of bytes to search for. [generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last */ use core::arch::aarch64::uint8x16_t; use crate::arch::{all::packedpair::Pair, generic::packedpair}; /// A "packed pair" finder that uses 128-bit vector operations. /// /// This finder picks two bytes that it believes have high predictive power /// for indicating an overall match of a needle. Depending on whether /// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets /// where the needle matches or could match. In the prefilter case, candidates /// are reported whenever the [`Pair`] of bytes given matches. #[derive(Clone, Copy, Debug)] pub struct Finder(packedpair::Finder); /// A "packed pair" finder that uses 128-bit vector operations. /// /// This finder picks two bytes that it believes have high predictive power /// for indicating an overall match of a needle. Depending on whether /// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets /// where the needle matches or could match. In the prefilter case, candidates /// are reported whenever the [`Pair`] of bytes given matches. impl Finder { /// Create a new pair searcher. The searcher returned can either report /// exact matches of `needle` or act as a prefilter and report candidate /// positions of `needle`. /// /// If neon is unavailable in the current environment or if a [`Pair`] /// could not be constructed from the needle given, then `None` is /// returned. #[inline] pub fn new(needle: &[u8]) -> Option { Finder::with_pair(needle, Pair::new(needle)?) } /// Create a new "packed pair" finder using the pair of bytes given. /// /// This constructor permits callers to control precisely which pair of /// bytes is used as a predicate. /// /// If neon is unavailable in the current environment, then `None` is /// returned. #[inline] pub fn with_pair(needle: &[u8], pair: Pair) -> Option { if Finder::is_available() { // SAFETY: we check that sse2 is available above. We are also // guaranteed to have needle.len() > 1 because we have a valid // Pair. unsafe { Some(Finder::with_pair_impl(needle, pair)) } } else { None } } /// Create a new `Finder` specific to neon vectors and routines. /// /// # Safety /// /// Same as the safety for `packedpair::Finder::new`, and callers must also /// ensure that neon is available. #[target_feature(enable = "neon")] #[inline] unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { let finder = packedpair::Finder::::new(needle, pair); Finder(finder) } /// Returns true when this implementation is available in the current /// environment. /// /// When this is true, it is guaranteed that [`Finder::with_pair`] will /// return a `Some` value. Similarly, when it is false, it is guaranteed /// that `Finder::with_pair` will return a `None` value. Notice that this /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, /// even when `Finder::is_available` is true, it is not guaranteed that a /// valid [`Pair`] can be found from the needle given. /// /// Note also that for the lifetime of a single program, if this returns /// true then it will always return true. #[inline] pub fn is_available() -> bool { #[cfg(target_feature = "neon")] { true } #[cfg(not(target_feature = "neon"))] { false } } /// Execute a search using neon vectors and routines. /// /// # Panics /// /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. #[inline] pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option { // SAFETY: Building a `Finder` means it's safe to call 'neon' routines. unsafe { self.find_impl(haystack, needle) } } /// Execute a search using neon vectors and routines. /// /// # Panics /// /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. #[inline] pub fn find_prefilter(&self, haystack: &[u8]) -> Option { // SAFETY: Building a `Finder` means it's safe to call 'neon' routines. unsafe { self.find_prefilter_impl(haystack) } } /// Execute a search using neon vectors and routines. /// /// # Panics /// /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. /// /// # Safety /// /// (The target feature safety obligation is automatically fulfilled by /// virtue of being a method on `Finder`, which can only be constructed /// when it is safe to call `neon` routines.) #[target_feature(enable = "neon")] #[inline] unsafe fn find_impl( &self, haystack: &[u8], needle: &[u8], ) -> Option { self.0.find(haystack, needle) } /// Execute a prefilter search using neon vectors and routines. /// /// # Panics /// /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. /// /// # Safety /// /// (The target feature safety obligation is automatically fulfilled by /// virtue of being a method on `Finder`, which can only be constructed /// when it is safe to call `neon` routines.) #[target_feature(enable = "neon")] #[inline] unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option { self.0.find_prefilter(haystack) } /// Returns the pair of offsets (into the needle) used to check as a /// predicate before confirming whether a needle exists at a particular /// position. #[inline] pub fn pair(&self) -> &Pair { self.0.pair() } /// Returns the minimum haystack length that this `Finder` can search. /// /// Using a haystack with length smaller than this in a search will result /// in a panic. The reason for this restriction is that this finder is /// meant to be a low-level component that is part of a larger substring /// strategy. In that sense, it avoids trying to handle all cases and /// instead only handles the cases that it can handle very well. #[inline] pub fn min_haystack_len(&self) -> usize { self.0.min_haystack_len() } } #[cfg(test)] mod tests { use super::*; fn find(haystack: &[u8], needle: &[u8]) -> Option> { let f = Finder::new(needle)?; if haystack.len() < f.min_haystack_len() { return None; } Some(f.find(haystack, needle)) } define_substring_forward_quickcheck!(find); #[test] fn forward_substring() { crate::tests::substring::Runner::new().fwd(find).run() } #[test] fn forward_packedpair() { fn find( haystack: &[u8], needle: &[u8], index1: u8, index2: u8, ) -> Option> { let pair = Pair::with_indices(needle, index1, index2)?; let f = Finder::with_pair(needle, pair)?; if haystack.len() < f.min_haystack_len() { return None; } Some(f.find(haystack, needle)) } crate::tests::packedpair::Runner::new().fwd(find).run() } #[test] fn forward_packedpair_prefilter() { fn find( haystack: &[u8], needle: &[u8], index1: u8, index2: u8, ) -> Option> { let pair = Pair::with_indices(needle, index1, index2)?; let f = Finder::with_pair(needle, pair)?; if haystack.len() < f.min_haystack_len() { return None; } Some(f.find_prefilter(haystack)) } crate::tests::packedpair::Runner::new().fwd(find).run() } }