diff options
author | Valentin Popov <valentin@popov.link> | 2024-07-19 15:37:58 +0300 |
---|---|---|
committer | Valentin Popov <valentin@popov.link> | 2024-07-19 15:37:58 +0300 |
commit | a990de90fe41456a23e58bd087d2f107d321f3a1 (patch) | |
tree | 15afc392522a9e85dc3332235e311b7d39352ea9 /vendor/memchr/src/arch/all/packedpair | |
parent | 3d48cd3f81164bbfc1a755dc1d4a9a02f98c8ddd (diff) | |
download | fparkan-a990de90fe41456a23e58bd087d2f107d321f3a1.tar.xz fparkan-a990de90fe41456a23e58bd087d2f107d321f3a1.zip |
Deleted vendor folder
Diffstat (limited to 'vendor/memchr/src/arch/all/packedpair')
-rw-r--r-- | vendor/memchr/src/arch/all/packedpair/default_rank.rs | 258 | ||||
-rw-r--r-- | vendor/memchr/src/arch/all/packedpair/mod.rs | 359 |
2 files changed, 0 insertions, 617 deletions
diff --git a/vendor/memchr/src/arch/all/packedpair/default_rank.rs b/vendor/memchr/src/arch/all/packedpair/default_rank.rs deleted file mode 100644 index 6aa3895..0000000 --- a/vendor/memchr/src/arch/all/packedpair/default_rank.rs +++ /dev/null @@ -1,258 +0,0 @@ -pub(crate) const RANK: [u8; 256] = [ - 55, // '\x00' - 52, // '\x01' - 51, // '\x02' - 50, // '\x03' - 49, // '\x04' - 48, // '\x05' - 47, // '\x06' - 46, // '\x07' - 45, // '\x08' - 103, // '\t' - 242, // '\n' - 66, // '\x0b' - 67, // '\x0c' - 229, // '\r' - 44, // '\x0e' - 43, // '\x0f' - 42, // '\x10' - 41, // '\x11' - 40, // '\x12' - 39, // '\x13' - 38, // '\x14' - 37, // '\x15' - 36, // '\x16' - 35, // '\x17' - 34, // '\x18' - 33, // '\x19' - 56, // '\x1a' - 32, // '\x1b' - 31, // '\x1c' - 30, // '\x1d' - 29, // '\x1e' - 28, // '\x1f' - 255, // ' ' - 148, // '!' - 164, // '"' - 149, // '#' - 136, // '$' - 160, // '%' - 155, // '&' - 173, // "'" - 221, // '(' - 222, // ')' - 134, // '*' - 122, // '+' - 232, // ',' - 202, // '-' - 215, // '.' - 224, // '/' - 208, // '0' - 220, // '1' - 204, // '2' - 187, // '3' - 183, // '4' - 179, // '5' - 177, // '6' - 168, // '7' - 178, // '8' - 200, // '9' - 226, // ':' - 195, // ';' - 154, // '<' - 184, // '=' - 174, // '>' - 126, // '?' - 120, // '@' - 191, // 'A' - 157, // 'B' - 194, // 'C' - 170, // 'D' - 189, // 'E' - 162, // 'F' - 161, // 'G' - 150, // 'H' - 193, // 'I' - 142, // 'J' - 137, // 'K' - 171, // 'L' - 176, // 'M' - 185, // 'N' - 167, // 'O' - 186, // 'P' - 112, // 'Q' - 175, // 'R' - 192, // 'S' - 188, // 'T' - 156, // 'U' - 140, // 'V' - 143, // 'W' - 123, // 'X' - 133, // 'Y' - 128, // 'Z' - 147, // '[' - 138, // '\\' - 146, // ']' - 114, // '^' - 223, // '_' - 151, // '`' - 249, // 'a' - 216, // 'b' - 238, // 'c' - 236, // 'd' - 253, // 'e' - 227, // 'f' - 218, // 'g' - 230, // 'h' - 247, // 'i' - 135, // 'j' - 180, // 'k' - 241, // 'l' - 233, // 'm' - 246, // 'n' - 244, // 'o' - 231, // 'p' - 139, // 'q' - 245, // 'r' - 243, // 's' - 251, // 't' - 235, // 'u' - 201, // 'v' - 196, // 'w' - 240, // 'x' - 214, // 'y' - 152, // 'z' - 182, // '{' - 205, // '|' - 181, // '}' - 127, // '~' - 27, // '\x7f' - 212, // '\x80' - 211, // '\x81' - 210, // '\x82' - 213, // '\x83' - 228, // '\x84' - 197, // '\x85' - 169, // '\x86' - 159, // '\x87' - 131, // '\x88' - 172, // '\x89' - 105, // '\x8a' - 80, // '\x8b' - 98, // '\x8c' - 96, // '\x8d' - 97, // '\x8e' - 81, // '\x8f' - 207, // '\x90' - 145, // '\x91' - 116, // '\x92' - 115, // '\x93' - 144, // '\x94' - 130, // '\x95' - 153, // '\x96' - 121, // '\x97' - 107, // '\x98' - 132, // '\x99' - 109, // '\x9a' - 110, // '\x9b' - 124, // '\x9c' - 111, // '\x9d' - 82, // '\x9e' - 108, // '\x9f' - 118, // '\xa0' - 141, // '¡' - 113, // '¢' - 129, // '£' - 119, // '¤' - 125, // '¥' - 165, // '¦' - 117, // '§' - 92, // '¨' - 106, // '©' - 83, // 'ª' - 72, // '«' - 99, // '¬' - 93, // '\xad' - 65, // '®' - 79, // '¯' - 166, // '°' - 237, // '±' - 163, // '²' - 199, // '³' - 190, // '´' - 225, // 'µ' - 209, // '¶' - 203, // '·' - 198, // '¸' - 217, // '¹' - 219, // 'º' - 206, // '»' - 234, // '¼' - 248, // '½' - 158, // '¾' - 239, // '¿' - 255, // 'À' - 255, // 'Á' - 255, // 'Â' - 255, // 'Ã' - 255, // 'Ä' - 255, // 'Å' - 255, // 'Æ' - 255, // 'Ç' - 255, // 'È' - 255, // 'É' - 255, // 'Ê' - 255, // 'Ë' - 255, // 'Ì' - 255, // 'Í' - 255, // 'Î' - 255, // 'Ï' - 255, // 'Ð' - 255, // 'Ñ' - 255, // 'Ò' - 255, // 'Ó' - 255, // 'Ô' - 255, // 'Õ' - 255, // 'Ö' - 255, // '×' - 255, // 'Ø' - 255, // 'Ù' - 255, // 'Ú' - 255, // 'Û' - 255, // 'Ü' - 255, // 'Ý' - 255, // 'Þ' - 255, // 'ß' - 255, // 'à' - 255, // 'á' - 255, // 'â' - 255, // 'ã' - 255, // 'ä' - 255, // 'å' - 255, // 'æ' - 255, // 'ç' - 255, // 'è' - 255, // 'é' - 255, // 'ê' - 255, // 'ë' - 255, // 'ì' - 255, // 'í' - 255, // 'î' - 255, // 'ï' - 255, // 'ð' - 255, // 'ñ' - 255, // 'ò' - 255, // 'ó' - 255, // 'ô' - 255, // 'õ' - 255, // 'ö' - 255, // '÷' - 255, // 'ø' - 255, // 'ù' - 255, // 'ú' - 255, // 'û' - 255, // 'ü' - 255, // 'ý' - 255, // 'þ' - 255, // 'ÿ' -]; diff --git a/vendor/memchr/src/arch/all/packedpair/mod.rs b/vendor/memchr/src/arch/all/packedpair/mod.rs deleted file mode 100644 index 148a985..0000000 --- a/vendor/memchr/src/arch/all/packedpair/mod.rs +++ /dev/null @@ -1,359 +0,0 @@ -/*! -Provides an architecture independent implementation of the "packed pair" -algorithm. - -The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main -difference is that it (by default) uses a background distribution of byte -frequencies to heuristically select the pair of bytes to search for. Note that -this module provides an architecture independent version that doesn't do as -good of a job keeping the search for candidates inside a SIMD hot path. It -however can be good enough in many circumstances. - -[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last -*/ - -use crate::memchr; - -mod default_rank; - -/// An architecture independent "packed pair" finder. -/// -/// This finder picks two bytes that it believes have high predictive power for -/// indicating an overall match of a needle. At search time, it reports offsets -/// where the needle could match based on whether the pair of bytes it chose -/// match. -/// -/// This is architecture independent because it utilizes `memchr` to find the -/// occurrence of one of the bytes in the pair, and then checks whether the -/// second byte matches. If it does, in the case of [`Finder::find_prefilter`], -/// the location at which the needle could match is returned. -/// -/// It is generally preferred to use architecture specific routines for a -/// "packed pair" prefilter, but this can be a useful fallback when the -/// architecture independent routines are unavailable. -#[derive(Clone, Copy, Debug)] -pub struct Finder { - pair: Pair, - byte1: u8, - byte2: u8, -} - -impl Finder { - /// Create a new prefilter that reports possible locations where the given - /// needle matches. - #[inline] - pub fn new(needle: &[u8]) -> Option<Finder> { - Finder::with_pair(needle, Pair::new(needle)?) - } - - /// Create a new prefilter using the pair given. - /// - /// If the prefilter could not be constructed, then `None` is returned. - /// - /// This constructor permits callers to control precisely which pair of - /// bytes is used as a predicate. - #[inline] - pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> { - let byte1 = needle[usize::from(pair.index1())]; - let byte2 = needle[usize::from(pair.index2())]; - // Currently this can never fail so we could just return a Finder, - // but it's conceivable this could change. - Some(Finder { pair, byte1, byte2 }) - } - - /// Run this finder on the given haystack as a prefilter. - /// - /// If a candidate match is found, then an offset where the needle *could* - /// begin in the haystack is returned. - #[inline] - pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> { - let mut i = 0; - let index1 = usize::from(self.pair.index1()); - let index2 = usize::from(self.pair.index2()); - loop { - // Use a fast vectorized implementation to skip to the next - // occurrence of the rarest byte (heuristically chosen) in the - // needle. - i += memchr(self.byte1, &haystack[i..])?; - let found = i; - i += 1; - - // If we can't align our first byte match with the haystack, then a - // match is impossible. - let aligned1 = match found.checked_sub(index1) { - None => continue, - Some(aligned1) => aligned1, - }; - - // Now align the second byte match with the haystack. A mismatch - // means that a match is impossible. - let aligned2 = match aligned1.checked_add(index2) { - None => continue, - Some(aligned_index2) => aligned_index2, - }; - if haystack.get(aligned2).map_or(true, |&b| b != self.byte2) { - continue; - } - - // We've done what we can. There might be a match here. - return Some(aligned1); - } - } - - /// Returns the pair of offsets (into the needle) used to check as a - /// predicate before confirming whether a needle exists at a particular - /// position. - #[inline] - pub fn pair(&self) -> &Pair { - &self.pair - } -} - -/// A pair of byte offsets into a needle to use as a predicate. -/// -/// This pair is used as a predicate to quickly filter out positions in a -/// haystack in which a needle cannot match. In some cases, this pair can even -/// be used in vector algorithms such that the vector algorithm only switches -/// over to scalar code once this pair has been found. -/// -/// A pair of offsets can be used in both substring search implementations and -/// in prefilters. The former will report matches of a needle in a haystack -/// where as the latter will only report possible matches of a needle. -/// -/// The offsets are limited each to a maximum of 255 to keep memory usage low. -/// Moreover, it's rarely advantageous to create a predicate using offsets -/// greater than 255 anyway. -/// -/// The only guarantee enforced on the pair of offsets is that they are not -/// equivalent. It is not necessarily the case that `index1 < index2` for -/// example. By convention, `index1` corresponds to the byte in the needle -/// that is believed to be most the predictive. Note also that because of the -/// requirement that the indices be both valid for the needle used to build -/// the pair and not equal, it follows that a pair can only be constructed for -/// needles with length at least 2. -#[derive(Clone, Copy, Debug)] -pub struct Pair { - index1: u8, - index2: u8, -} - -impl Pair { - /// Create a new pair of offsets from the given needle. - /// - /// If a pair could not be created (for example, if the needle is too - /// short), then `None` is returned. - /// - /// This chooses the pair in the needle that is believed to be as - /// predictive of an overall match of the needle as possible. - #[inline] - pub fn new(needle: &[u8]) -> Option<Pair> { - Pair::with_ranker(needle, DefaultFrequencyRank) - } - - /// Create a new pair of offsets from the given needle and ranker. - /// - /// This permits the caller to choose a background frequency distribution - /// with which bytes are selected. The idea is to select a pair of bytes - /// that is believed to strongly predict a match in the haystack. This - /// usually means selecting bytes that occur rarely in a haystack. - /// - /// If a pair could not be created (for example, if the needle is too - /// short), then `None` is returned. - #[inline] - pub fn with_ranker<R: HeuristicFrequencyRank>( - needle: &[u8], - ranker: R, - ) -> Option<Pair> { - if needle.len() <= 1 { - return None; - } - // Find the rarest two bytes. We make them distinct indices by - // construction. (The actual byte value may be the same in degenerate - // cases, but that's OK.) - let (mut rare1, mut index1) = (needle[0], 0); - let (mut rare2, mut index2) = (needle[1], 1); - if ranker.rank(rare2) < ranker.rank(rare1) { - core::mem::swap(&mut rare1, &mut rare2); - core::mem::swap(&mut index1, &mut index2); - } - let max = usize::from(core::u8::MAX); - for (i, &b) in needle.iter().enumerate().take(max).skip(2) { - if ranker.rank(b) < ranker.rank(rare1) { - rare2 = rare1; - index2 = index1; - rare1 = b; - index1 = u8::try_from(i).unwrap(); - } else if b != rare1 && ranker.rank(b) < ranker.rank(rare2) { - rare2 = b; - index2 = u8::try_from(i).unwrap(); - } - } - // While not strictly required for how a Pair is normally used, we - // really don't want these to be equivalent. If they were, it would - // reduce the effectiveness of candidate searching using these rare - // bytes by increasing the rate of false positives. - assert_ne!(index1, index2); - Some(Pair { index1, index2 }) - } - - /// Create a new pair using the offsets given for the needle given. - /// - /// This bypasses any sort of heuristic process for choosing the offsets - /// and permits the caller to choose the offsets themselves. - /// - /// Indices are limited to valid `u8` values so that a `Pair` uses less - /// memory. It is not possible to create a `Pair` with offsets bigger than - /// `u8::MAX`. It's likely that such a thing is not needed, but if it is, - /// it's suggested to build your own bespoke algorithm because you're - /// likely working on a very niche case. (File an issue if this suggestion - /// does not make sense to you.) - /// - /// If a pair could not be created (for example, if the needle is too - /// short), then `None` is returned. - #[inline] - pub fn with_indices( - needle: &[u8], - index1: u8, - index2: u8, - ) -> Option<Pair> { - // While not strictly required for how a Pair is normally used, we - // really don't want these to be equivalent. If they were, it would - // reduce the effectiveness of candidate searching using these rare - // bytes by increasing the rate of false positives. - if index1 == index2 { - return None; - } - // Similarly, invalid indices means the Pair is invalid too. - if usize::from(index1) >= needle.len() { - return None; - } - if usize::from(index2) >= needle.len() { - return None; - } - Some(Pair { index1, index2 }) - } - - /// Returns the first offset of the pair. - #[inline] - pub fn index1(&self) -> u8 { - self.index1 - } - - /// Returns the second offset of the pair. - #[inline] - pub fn index2(&self) -> u8 { - self.index2 - } -} - -/// This trait allows the user to customize the heuristic used to determine the -/// relative frequency of a given byte in the dataset being searched. -/// -/// The use of this trait can have a dramatic impact on performance depending -/// on the type of data being searched. The details of why are explained in the -/// docs of [`crate::memmem::Prefilter`]. To summarize, the core algorithm uses -/// a prefilter to quickly identify candidate matches that are later verified -/// more slowly. This prefilter is implemented in terms of trying to find -/// `rare` bytes at specific offsets that will occur less frequently in the -/// dataset. While the concept of a `rare` byte is similar for most datasets, -/// there are some specific datasets (like binary executables) that have -/// dramatically different byte distributions. For these datasets customizing -/// the byte frequency heuristic can have a massive impact on performance, and -/// might even need to be done at runtime. -/// -/// The default implementation of `HeuristicFrequencyRank` reads from the -/// static frequency table defined in `src/memmem/byte_frequencies.rs`. This -/// is optimal for most inputs, so if you are unsure of the impact of using a -/// custom `HeuristicFrequencyRank` you should probably just use the default. -/// -/// # Example -/// -/// ``` -/// use memchr::{ -/// arch::all::packedpair::HeuristicFrequencyRank, -/// memmem::FinderBuilder, -/// }; -/// -/// /// A byte-frequency table that is good for scanning binary executables. -/// struct Binary; -/// -/// impl HeuristicFrequencyRank for Binary { -/// fn rank(&self, byte: u8) -> u8 { -/// const TABLE: [u8; 256] = [ -/// 255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17, -/// 89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16, -/// 68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11, -/// 9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8, -/// 48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27, -/// 4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19, -/// 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24, -/// 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5, -/// 51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15, -/// 0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, -/// 12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0, -/// 2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5, -/// 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13, -/// 3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2, -/// 16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5, -/// 8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175, -/// ]; -/// TABLE[byte as usize] -/// } -/// } -/// // Create a new finder with the custom heuristic. -/// let finder = FinderBuilder::new() -/// .build_forward_with_ranker(Binary, b"\x00\x00\xdd\xdd"); -/// // Find needle with custom heuristic. -/// assert!(finder.find(b"\x00\x00\x00\xdd\xdd").is_some()); -/// ``` -pub trait HeuristicFrequencyRank { - /// Return the heuristic frequency rank of the given byte. A lower rank - /// means the byte is believed to occur less frequently in the haystack. - /// - /// Some uses of this heuristic may treat arbitrary absolute rank values as - /// significant. For example, an implementation detail in this crate may - /// determine that heuristic prefilters are inappropriate if every byte in - /// the needle has a "high" rank. - fn rank(&self, byte: u8) -> u8; -} - -/// The default byte frequency heuristic that is good for most haystacks. -pub(crate) struct DefaultFrequencyRank; - -impl HeuristicFrequencyRank for DefaultFrequencyRank { - fn rank(&self, byte: u8) -> u8 { - self::default_rank::RANK[usize::from(byte)] - } -} - -/// This permits passing any implementation of `HeuristicFrequencyRank` as a -/// borrowed version of itself. -impl<'a, R> HeuristicFrequencyRank for &'a R -where - R: HeuristicFrequencyRank, -{ - fn rank(&self, byte: u8) -> u8 { - (**self).rank(byte) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn forward_packedpair() { - fn find( - haystack: &[u8], - needle: &[u8], - _index1: u8, - _index2: u8, - ) -> Option<Option<usize>> { - // We ignore the index positions requested since it winds up making - // this test too slow overall. - let f = Finder::new(needle)?; - Some(f.find_prefilter(haystack)) - } - crate::tests::packedpair::Runner::new().fwd(find).run() - } -} |