aboutsummaryrefslogtreecommitdiff
path: root/vendor/memchr/src/arch/all/packedpair
diff options
context:
space:
mode:
authorValentin Popov <valentin@popov.link>2024-07-19 15:37:58 +0300
committerValentin Popov <valentin@popov.link>2024-07-19 15:37:58 +0300
commita990de90fe41456a23e58bd087d2f107d321f3a1 (patch)
tree15afc392522a9e85dc3332235e311b7d39352ea9 /vendor/memchr/src/arch/all/packedpair
parent3d48cd3f81164bbfc1a755dc1d4a9a02f98c8ddd (diff)
downloadfparkan-a990de90fe41456a23e58bd087d2f107d321f3a1.tar.xz
fparkan-a990de90fe41456a23e58bd087d2f107d321f3a1.zip
Deleted vendor folder
Diffstat (limited to 'vendor/memchr/src/arch/all/packedpair')
-rw-r--r--vendor/memchr/src/arch/all/packedpair/default_rank.rs258
-rw-r--r--vendor/memchr/src/arch/all/packedpair/mod.rs359
2 files changed, 0 insertions, 617 deletions
diff --git a/vendor/memchr/src/arch/all/packedpair/default_rank.rs b/vendor/memchr/src/arch/all/packedpair/default_rank.rs
deleted file mode 100644
index 6aa3895..0000000
--- a/vendor/memchr/src/arch/all/packedpair/default_rank.rs
+++ /dev/null
@@ -1,258 +0,0 @@
-pub(crate) const RANK: [u8; 256] = [
- 55, // '\x00'
- 52, // '\x01'
- 51, // '\x02'
- 50, // '\x03'
- 49, // '\x04'
- 48, // '\x05'
- 47, // '\x06'
- 46, // '\x07'
- 45, // '\x08'
- 103, // '\t'
- 242, // '\n'
- 66, // '\x0b'
- 67, // '\x0c'
- 229, // '\r'
- 44, // '\x0e'
- 43, // '\x0f'
- 42, // '\x10'
- 41, // '\x11'
- 40, // '\x12'
- 39, // '\x13'
- 38, // '\x14'
- 37, // '\x15'
- 36, // '\x16'
- 35, // '\x17'
- 34, // '\x18'
- 33, // '\x19'
- 56, // '\x1a'
- 32, // '\x1b'
- 31, // '\x1c'
- 30, // '\x1d'
- 29, // '\x1e'
- 28, // '\x1f'
- 255, // ' '
- 148, // '!'
- 164, // '"'
- 149, // '#'
- 136, // '$'
- 160, // '%'
- 155, // '&'
- 173, // "'"
- 221, // '('
- 222, // ')'
- 134, // '*'
- 122, // '+'
- 232, // ','
- 202, // '-'
- 215, // '.'
- 224, // '/'
- 208, // '0'
- 220, // '1'
- 204, // '2'
- 187, // '3'
- 183, // '4'
- 179, // '5'
- 177, // '6'
- 168, // '7'
- 178, // '8'
- 200, // '9'
- 226, // ':'
- 195, // ';'
- 154, // '<'
- 184, // '='
- 174, // '>'
- 126, // '?'
- 120, // '@'
- 191, // 'A'
- 157, // 'B'
- 194, // 'C'
- 170, // 'D'
- 189, // 'E'
- 162, // 'F'
- 161, // 'G'
- 150, // 'H'
- 193, // 'I'
- 142, // 'J'
- 137, // 'K'
- 171, // 'L'
- 176, // 'M'
- 185, // 'N'
- 167, // 'O'
- 186, // 'P'
- 112, // 'Q'
- 175, // 'R'
- 192, // 'S'
- 188, // 'T'
- 156, // 'U'
- 140, // 'V'
- 143, // 'W'
- 123, // 'X'
- 133, // 'Y'
- 128, // 'Z'
- 147, // '['
- 138, // '\\'
- 146, // ']'
- 114, // '^'
- 223, // '_'
- 151, // '`'
- 249, // 'a'
- 216, // 'b'
- 238, // 'c'
- 236, // 'd'
- 253, // 'e'
- 227, // 'f'
- 218, // 'g'
- 230, // 'h'
- 247, // 'i'
- 135, // 'j'
- 180, // 'k'
- 241, // 'l'
- 233, // 'm'
- 246, // 'n'
- 244, // 'o'
- 231, // 'p'
- 139, // 'q'
- 245, // 'r'
- 243, // 's'
- 251, // 't'
- 235, // 'u'
- 201, // 'v'
- 196, // 'w'
- 240, // 'x'
- 214, // 'y'
- 152, // 'z'
- 182, // '{'
- 205, // '|'
- 181, // '}'
- 127, // '~'
- 27, // '\x7f'
- 212, // '\x80'
- 211, // '\x81'
- 210, // '\x82'
- 213, // '\x83'
- 228, // '\x84'
- 197, // '\x85'
- 169, // '\x86'
- 159, // '\x87'
- 131, // '\x88'
- 172, // '\x89'
- 105, // '\x8a'
- 80, // '\x8b'
- 98, // '\x8c'
- 96, // '\x8d'
- 97, // '\x8e'
- 81, // '\x8f'
- 207, // '\x90'
- 145, // '\x91'
- 116, // '\x92'
- 115, // '\x93'
- 144, // '\x94'
- 130, // '\x95'
- 153, // '\x96'
- 121, // '\x97'
- 107, // '\x98'
- 132, // '\x99'
- 109, // '\x9a'
- 110, // '\x9b'
- 124, // '\x9c'
- 111, // '\x9d'
- 82, // '\x9e'
- 108, // '\x9f'
- 118, // '\xa0'
- 141, // '¡'
- 113, // '¢'
- 129, // '£'
- 119, // '¤'
- 125, // '¥'
- 165, // '¦'
- 117, // '§'
- 92, // '¨'
- 106, // '©'
- 83, // 'ª'
- 72, // '«'
- 99, // '¬'
- 93, // '\xad'
- 65, // '®'
- 79, // '¯'
- 166, // '°'
- 237, // '±'
- 163, // '²'
- 199, // '³'
- 190, // '´'
- 225, // 'µ'
- 209, // '¶'
- 203, // '·'
- 198, // '¸'
- 217, // '¹'
- 219, // 'º'
- 206, // '»'
- 234, // '¼'
- 248, // '½'
- 158, // '¾'
- 239, // '¿'
- 255, // 'À'
- 255, // 'Á'
- 255, // 'Â'
- 255, // 'Ã'
- 255, // 'Ä'
- 255, // 'Å'
- 255, // 'Æ'
- 255, // 'Ç'
- 255, // 'È'
- 255, // 'É'
- 255, // 'Ê'
- 255, // 'Ë'
- 255, // 'Ì'
- 255, // 'Í'
- 255, // 'Î'
- 255, // 'Ï'
- 255, // 'Ð'
- 255, // 'Ñ'
- 255, // 'Ò'
- 255, // 'Ó'
- 255, // 'Ô'
- 255, // 'Õ'
- 255, // 'Ö'
- 255, // '×'
- 255, // 'Ø'
- 255, // 'Ù'
- 255, // 'Ú'
- 255, // 'Û'
- 255, // 'Ü'
- 255, // 'Ý'
- 255, // 'Þ'
- 255, // 'ß'
- 255, // 'à'
- 255, // 'á'
- 255, // 'â'
- 255, // 'ã'
- 255, // 'ä'
- 255, // 'å'
- 255, // 'æ'
- 255, // 'ç'
- 255, // 'è'
- 255, // 'é'
- 255, // 'ê'
- 255, // 'ë'
- 255, // 'ì'
- 255, // 'í'
- 255, // 'î'
- 255, // 'ï'
- 255, // 'ð'
- 255, // 'ñ'
- 255, // 'ò'
- 255, // 'ó'
- 255, // 'ô'
- 255, // 'õ'
- 255, // 'ö'
- 255, // '÷'
- 255, // 'ø'
- 255, // 'ù'
- 255, // 'ú'
- 255, // 'û'
- 255, // 'ü'
- 255, // 'ý'
- 255, // 'þ'
- 255, // 'ÿ'
-];
diff --git a/vendor/memchr/src/arch/all/packedpair/mod.rs b/vendor/memchr/src/arch/all/packedpair/mod.rs
deleted file mode 100644
index 148a985..0000000
--- a/vendor/memchr/src/arch/all/packedpair/mod.rs
+++ /dev/null
@@ -1,359 +0,0 @@
-/*!
-Provides an architecture independent implementation of the "packed pair"
-algorithm.
-
-The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
-difference is that it (by default) uses a background distribution of byte
-frequencies to heuristically select the pair of bytes to search for. Note that
-this module provides an architecture independent version that doesn't do as
-good of a job keeping the search for candidates inside a SIMD hot path. It
-however can be good enough in many circumstances.
-
-[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
-*/
-
-use crate::memchr;
-
-mod default_rank;
-
-/// An architecture independent "packed pair" finder.
-///
-/// This finder picks two bytes that it believes have high predictive power for
-/// indicating an overall match of a needle. At search time, it reports offsets
-/// where the needle could match based on whether the pair of bytes it chose
-/// match.
-///
-/// This is architecture independent because it utilizes `memchr` to find the
-/// occurrence of one of the bytes in the pair, and then checks whether the
-/// second byte matches. If it does, in the case of [`Finder::find_prefilter`],
-/// the location at which the needle could match is returned.
-///
-/// It is generally preferred to use architecture specific routines for a
-/// "packed pair" prefilter, but this can be a useful fallback when the
-/// architecture independent routines are unavailable.
-#[derive(Clone, Copy, Debug)]
-pub struct Finder {
- pair: Pair,
- byte1: u8,
- byte2: u8,
-}
-
-impl Finder {
- /// Create a new prefilter that reports possible locations where the given
- /// needle matches.
- #[inline]
- pub fn new(needle: &[u8]) -> Option<Finder> {
- Finder::with_pair(needle, Pair::new(needle)?)
- }
-
- /// Create a new prefilter using the pair given.
- ///
- /// If the prefilter could not be constructed, then `None` is returned.
- ///
- /// This constructor permits callers to control precisely which pair of
- /// bytes is used as a predicate.
- #[inline]
- pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> {
- let byte1 = needle[usize::from(pair.index1())];
- let byte2 = needle[usize::from(pair.index2())];
- // Currently this can never fail so we could just return a Finder,
- // but it's conceivable this could change.
- Some(Finder { pair, byte1, byte2 })
- }
-
- /// Run this finder on the given haystack as a prefilter.
- ///
- /// If a candidate match is found, then an offset where the needle *could*
- /// begin in the haystack is returned.
- #[inline]
- pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> {
- let mut i = 0;
- let index1 = usize::from(self.pair.index1());
- let index2 = usize::from(self.pair.index2());
- loop {
- // Use a fast vectorized implementation to skip to the next
- // occurrence of the rarest byte (heuristically chosen) in the
- // needle.
- i += memchr(self.byte1, &haystack[i..])?;
- let found = i;
- i += 1;
-
- // If we can't align our first byte match with the haystack, then a
- // match is impossible.
- let aligned1 = match found.checked_sub(index1) {
- None => continue,
- Some(aligned1) => aligned1,
- };
-
- // Now align the second byte match with the haystack. A mismatch
- // means that a match is impossible.
- let aligned2 = match aligned1.checked_add(index2) {
- None => continue,
- Some(aligned_index2) => aligned_index2,
- };
- if haystack.get(aligned2).map_or(true, |&b| b != self.byte2) {
- continue;
- }
-
- // We've done what we can. There might be a match here.
- return Some(aligned1);
- }
- }
-
- /// Returns the pair of offsets (into the needle) used to check as a
- /// predicate before confirming whether a needle exists at a particular
- /// position.
- #[inline]
- pub fn pair(&self) -> &Pair {
- &self.pair
- }
-}
-
-/// A pair of byte offsets into a needle to use as a predicate.
-///
-/// This pair is used as a predicate to quickly filter out positions in a
-/// haystack in which a needle cannot match. In some cases, this pair can even
-/// be used in vector algorithms such that the vector algorithm only switches
-/// over to scalar code once this pair has been found.
-///
-/// A pair of offsets can be used in both substring search implementations and
-/// in prefilters. The former will report matches of a needle in a haystack
-/// where as the latter will only report possible matches of a needle.
-///
-/// The offsets are limited each to a maximum of 255 to keep memory usage low.
-/// Moreover, it's rarely advantageous to create a predicate using offsets
-/// greater than 255 anyway.
-///
-/// The only guarantee enforced on the pair of offsets is that they are not
-/// equivalent. It is not necessarily the case that `index1 < index2` for
-/// example. By convention, `index1` corresponds to the byte in the needle
-/// that is believed to be most the predictive. Note also that because of the
-/// requirement that the indices be both valid for the needle used to build
-/// the pair and not equal, it follows that a pair can only be constructed for
-/// needles with length at least 2.
-#[derive(Clone, Copy, Debug)]
-pub struct Pair {
- index1: u8,
- index2: u8,
-}
-
-impl Pair {
- /// Create a new pair of offsets from the given needle.
- ///
- /// If a pair could not be created (for example, if the needle is too
- /// short), then `None` is returned.
- ///
- /// This chooses the pair in the needle that is believed to be as
- /// predictive of an overall match of the needle as possible.
- #[inline]
- pub fn new(needle: &[u8]) -> Option<Pair> {
- Pair::with_ranker(needle, DefaultFrequencyRank)
- }
-
- /// Create a new pair of offsets from the given needle and ranker.
- ///
- /// This permits the caller to choose a background frequency distribution
- /// with which bytes are selected. The idea is to select a pair of bytes
- /// that is believed to strongly predict a match in the haystack. This
- /// usually means selecting bytes that occur rarely in a haystack.
- ///
- /// If a pair could not be created (for example, if the needle is too
- /// short), then `None` is returned.
- #[inline]
- pub fn with_ranker<R: HeuristicFrequencyRank>(
- needle: &[u8],
- ranker: R,
- ) -> Option<Pair> {
- if needle.len() <= 1 {
- return None;
- }
- // Find the rarest two bytes. We make them distinct indices by
- // construction. (The actual byte value may be the same in degenerate
- // cases, but that's OK.)
- let (mut rare1, mut index1) = (needle[0], 0);
- let (mut rare2, mut index2) = (needle[1], 1);
- if ranker.rank(rare2) < ranker.rank(rare1) {
- core::mem::swap(&mut rare1, &mut rare2);
- core::mem::swap(&mut index1, &mut index2);
- }
- let max = usize::from(core::u8::MAX);
- for (i, &b) in needle.iter().enumerate().take(max).skip(2) {
- if ranker.rank(b) < ranker.rank(rare1) {
- rare2 = rare1;
- index2 = index1;
- rare1 = b;
- index1 = u8::try_from(i).unwrap();
- } else if b != rare1 && ranker.rank(b) < ranker.rank(rare2) {
- rare2 = b;
- index2 = u8::try_from(i).unwrap();
- }
- }
- // While not strictly required for how a Pair is normally used, we
- // really don't want these to be equivalent. If they were, it would
- // reduce the effectiveness of candidate searching using these rare
- // bytes by increasing the rate of false positives.
- assert_ne!(index1, index2);
- Some(Pair { index1, index2 })
- }
-
- /// Create a new pair using the offsets given for the needle given.
- ///
- /// This bypasses any sort of heuristic process for choosing the offsets
- /// and permits the caller to choose the offsets themselves.
- ///
- /// Indices are limited to valid `u8` values so that a `Pair` uses less
- /// memory. It is not possible to create a `Pair` with offsets bigger than
- /// `u8::MAX`. It's likely that such a thing is not needed, but if it is,
- /// it's suggested to build your own bespoke algorithm because you're
- /// likely working on a very niche case. (File an issue if this suggestion
- /// does not make sense to you.)
- ///
- /// If a pair could not be created (for example, if the needle is too
- /// short), then `None` is returned.
- #[inline]
- pub fn with_indices(
- needle: &[u8],
- index1: u8,
- index2: u8,
- ) -> Option<Pair> {
- // While not strictly required for how a Pair is normally used, we
- // really don't want these to be equivalent. If they were, it would
- // reduce the effectiveness of candidate searching using these rare
- // bytes by increasing the rate of false positives.
- if index1 == index2 {
- return None;
- }
- // Similarly, invalid indices means the Pair is invalid too.
- if usize::from(index1) >= needle.len() {
- return None;
- }
- if usize::from(index2) >= needle.len() {
- return None;
- }
- Some(Pair { index1, index2 })
- }
-
- /// Returns the first offset of the pair.
- #[inline]
- pub fn index1(&self) -> u8 {
- self.index1
- }
-
- /// Returns the second offset of the pair.
- #[inline]
- pub fn index2(&self) -> u8 {
- self.index2
- }
-}
-
-/// This trait allows the user to customize the heuristic used to determine the
-/// relative frequency of a given byte in the dataset being searched.
-///
-/// The use of this trait can have a dramatic impact on performance depending
-/// on the type of data being searched. The details of why are explained in the
-/// docs of [`crate::memmem::Prefilter`]. To summarize, the core algorithm uses
-/// a prefilter to quickly identify candidate matches that are later verified
-/// more slowly. This prefilter is implemented in terms of trying to find
-/// `rare` bytes at specific offsets that will occur less frequently in the
-/// dataset. While the concept of a `rare` byte is similar for most datasets,
-/// there are some specific datasets (like binary executables) that have
-/// dramatically different byte distributions. For these datasets customizing
-/// the byte frequency heuristic can have a massive impact on performance, and
-/// might even need to be done at runtime.
-///
-/// The default implementation of `HeuristicFrequencyRank` reads from the
-/// static frequency table defined in `src/memmem/byte_frequencies.rs`. This
-/// is optimal for most inputs, so if you are unsure of the impact of using a
-/// custom `HeuristicFrequencyRank` you should probably just use the default.
-///
-/// # Example
-///
-/// ```
-/// use memchr::{
-/// arch::all::packedpair::HeuristicFrequencyRank,
-/// memmem::FinderBuilder,
-/// };
-///
-/// /// A byte-frequency table that is good for scanning binary executables.
-/// struct Binary;
-///
-/// impl HeuristicFrequencyRank for Binary {
-/// fn rank(&self, byte: u8) -> u8 {
-/// const TABLE: [u8; 256] = [
-/// 255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17,
-/// 89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16,
-/// 68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11,
-/// 9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8,
-/// 48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27,
-/// 4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19,
-/// 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24,
-/// 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5,
-/// 51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15,
-/// 0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0,
-/// 12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0,
-/// 2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5,
-/// 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13,
-/// 3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2,
-/// 16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5,
-/// 8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175,
-/// ];
-/// TABLE[byte as usize]
-/// }
-/// }
-/// // Create a new finder with the custom heuristic.
-/// let finder = FinderBuilder::new()
-/// .build_forward_with_ranker(Binary, b"\x00\x00\xdd\xdd");
-/// // Find needle with custom heuristic.
-/// assert!(finder.find(b"\x00\x00\x00\xdd\xdd").is_some());
-/// ```
-pub trait HeuristicFrequencyRank {
- /// Return the heuristic frequency rank of the given byte. A lower rank
- /// means the byte is believed to occur less frequently in the haystack.
- ///
- /// Some uses of this heuristic may treat arbitrary absolute rank values as
- /// significant. For example, an implementation detail in this crate may
- /// determine that heuristic prefilters are inappropriate if every byte in
- /// the needle has a "high" rank.
- fn rank(&self, byte: u8) -> u8;
-}
-
-/// The default byte frequency heuristic that is good for most haystacks.
-pub(crate) struct DefaultFrequencyRank;
-
-impl HeuristicFrequencyRank for DefaultFrequencyRank {
- fn rank(&self, byte: u8) -> u8 {
- self::default_rank::RANK[usize::from(byte)]
- }
-}
-
-/// This permits passing any implementation of `HeuristicFrequencyRank` as a
-/// borrowed version of itself.
-impl<'a, R> HeuristicFrequencyRank for &'a R
-where
- R: HeuristicFrequencyRank,
-{
- fn rank(&self, byte: u8) -> u8 {
- (**self).rank(byte)
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn forward_packedpair() {
- fn find(
- haystack: &[u8],
- needle: &[u8],
- _index1: u8,
- _index2: u8,
- ) -> Option<Option<usize>> {
- // We ignore the index positions requested since it winds up making
- // this test too slow overall.
- let f = Finder::new(needle)?;
- Some(f.find_prefilter(haystack))
- }
- crate::tests::packedpair::Runner::new().fwd(find).run()
- }
-}