From 1b6a04ca5504955c571d1c97504fb45ea0befee4 Mon Sep 17 00:00:00 2001 From: Valentin Popov Date: Mon, 8 Jan 2024 01:21:28 +0400 Subject: Initial vendor packages Signed-off-by: Valentin Popov --- vendor/memchr/src/tests/memchr/mod.rs | 307 +++++++++++++++++++++++ vendor/memchr/src/tests/memchr/naive.rs | 33 +++ vendor/memchr/src/tests/memchr/prop.rs | 321 +++++++++++++++++++++++++ vendor/memchr/src/tests/mod.rs | 15 ++ vendor/memchr/src/tests/packedpair.rs | 216 +++++++++++++++++ vendor/memchr/src/tests/substring/mod.rs | 232 ++++++++++++++++++ vendor/memchr/src/tests/substring/naive.rs | 45 ++++ vendor/memchr/src/tests/substring/prop.rs | 126 ++++++++++ vendor/memchr/src/tests/x86_64-soft_float.json | 15 ++ 9 files changed, 1310 insertions(+) create mode 100644 vendor/memchr/src/tests/memchr/mod.rs create mode 100644 vendor/memchr/src/tests/memchr/naive.rs create mode 100644 vendor/memchr/src/tests/memchr/prop.rs create mode 100644 vendor/memchr/src/tests/mod.rs create mode 100644 vendor/memchr/src/tests/packedpair.rs create mode 100644 vendor/memchr/src/tests/substring/mod.rs create mode 100644 vendor/memchr/src/tests/substring/naive.rs create mode 100644 vendor/memchr/src/tests/substring/prop.rs create mode 100644 vendor/memchr/src/tests/x86_64-soft_float.json (limited to 'vendor/memchr/src/tests') diff --git a/vendor/memchr/src/tests/memchr/mod.rs b/vendor/memchr/src/tests/memchr/mod.rs new file mode 100644 index 0000000..0564ad4 --- /dev/null +++ b/vendor/memchr/src/tests/memchr/mod.rs @@ -0,0 +1,307 @@ +use alloc::{ + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::ext::Byte; + +pub(crate) mod naive; +#[macro_use] +pub(crate) mod prop; + +const SEEDS: &'static [Seed] = &[ + Seed { haystack: "a", needles: &[b'a'], positions: &[0] }, + Seed { haystack: "aa", needles: &[b'a'], positions: &[0, 1] }, + Seed { haystack: "aaa", needles: &[b'a'], positions: &[0, 1, 2] }, + Seed { haystack: "", needles: &[b'a'], positions: &[] }, + Seed { haystack: "z", needles: &[b'a'], positions: &[] }, + Seed { haystack: "zz", needles: &[b'a'], positions: &[] }, + Seed { haystack: "zza", needles: &[b'a'], positions: &[2] }, + Seed { haystack: "zaza", needles: &[b'a'], positions: &[1, 3] }, + Seed { haystack: "zzza", needles: &[b'a'], positions: &[3] }, + Seed { haystack: "\x00a", needles: &[b'a'], positions: &[1] }, + Seed { haystack: "\x00", needles: &[b'\x00'], positions: &[0] }, + Seed { haystack: "\x00\x00", needles: &[b'\x00'], positions: &[0, 1] }, + Seed { haystack: "\x00a\x00", needles: &[b'\x00'], positions: &[0, 2] }, + Seed { haystack: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[16] }, + Seed { + haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza", + needles: &[b'a'], + positions: &[32], + }, + // two needles (applied to memchr2 + memchr3) + Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, + Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, + Seed { haystack: "az", needles: &[b'x', b'y'], positions: &[] }, + Seed { haystack: "az", needles: &[b'a', b'y'], positions: &[0] }, + Seed { haystack: "az", needles: &[b'x', b'z'], positions: &[1] }, + Seed { haystack: "yyyyaz", needles: &[b'a', b'z'], positions: &[4, 5] }, + Seed { haystack: "yyyyaz", needles: &[b'z', b'a'], positions: &[4, 5] }, + // three needles (applied to memchr3) + Seed { + haystack: "xyz", + needles: &[b'x', b'y', b'z'], + positions: &[0, 1, 2], + }, + Seed { + haystack: "zxy", + needles: &[b'x', b'y', b'z'], + positions: &[0, 1, 2], + }, + Seed { haystack: "zxy", needles: &[b'x', b'a', b'z'], positions: &[0, 1] }, + Seed { haystack: "zxy", needles: &[b't', b'a', b'z'], positions: &[0] }, + Seed { haystack: "yxz", needles: &[b't', b'a', b'z'], positions: &[2] }, +]; + +/// Runs a host of substring search tests. +/// +/// This has support for "partial" substring search implementations only work +/// for a subset of needles/haystacks. For example, the "packed pair" substring +/// search implementation only works for haystacks of some minimum length based +/// of the pair of bytes selected and the size of the vector used. +pub(crate) struct Runner { + needle_len: usize, +} + +impl Runner { + /// Create a new test runner for forward and reverse byte search + /// implementations. + /// + /// The `needle_len` given must be at most `3` and at least `1`. It + /// corresponds to the number of needle bytes to search for. + pub(crate) fn new(needle_len: usize) -> Runner { + assert!(needle_len >= 1, "needle_len must be at least 1"); + assert!(needle_len <= 3, "needle_len must be at most 3"); + Runner { needle_len } + } + + /// Run all tests. This panics on the first failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + pub(crate) fn forward_iter(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option> + 'static, + { + for seed in SEEDS.iter() { + if seed.needles.len() > self.needle_len { + continue; + } + for t in seed.generate() { + let results = match test(t.haystack.as_bytes(), &t.needles) { + None => continue, + Some(results) => results, + }; + assert_eq!( + t.expected, + results, + "needles: {:?}, haystack: {:?}", + t.needles + .iter() + .map(|&b| b.to_char()) + .collect::>(), + t.haystack, + ); + } + } + } + + /// Run all tests in the reverse direction. This panics on the first + /// failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + pub(crate) fn reverse_iter(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option> + 'static, + { + for seed in SEEDS.iter() { + if seed.needles.len() > self.needle_len { + continue; + } + for t in seed.generate() { + let mut results = match test(t.haystack.as_bytes(), &t.needles) + { + None => continue, + Some(results) => results, + }; + results.reverse(); + assert_eq!( + t.expected, + results, + "needles: {:?}, haystack: {:?}", + t.needles + .iter() + .map(|&b| b.to_char()) + .collect::>(), + t.haystack, + ); + } + } + } + + /// Run all tests as counting tests. This panics on the first failure. + /// + /// That is, this only checks that the number of matches is correct and + /// not whether the offsets of each match are. + pub(crate) fn count_iter(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option + 'static, + { + for seed in SEEDS.iter() { + if seed.needles.len() > self.needle_len { + continue; + } + for t in seed.generate() { + let got = match test(t.haystack.as_bytes(), &t.needles) { + None => continue, + Some(got) => got, + }; + assert_eq!( + t.expected.len(), + got, + "needles: {:?}, haystack: {:?}", + t.needles + .iter() + .map(|&b| b.to_char()) + .collect::>(), + t.haystack, + ); + } + } + } + + /// Like `Runner::forward`, but for a function that returns only the next + /// match and not all matches. + /// + /// If the function returns `None`, then it is skipped. + pub(crate) fn forward_oneshot(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option> + 'static, + { + self.forward_iter(move |haystack, needles| { + let mut start = 0; + let mut results = vec![]; + while let Some(i) = test(&haystack[start..], needles)? { + results.push(start + i); + start += i + 1; + } + Some(results) + }) + } + + /// Like `Runner::reverse`, but for a function that returns only the last + /// match and not all matches. + /// + /// If the function returns `None`, then it is skipped. + pub(crate) fn reverse_oneshot(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option> + 'static, + { + self.reverse_iter(move |haystack, needles| { + let mut end = haystack.len(); + let mut results = vec![]; + while let Some(i) = test(&haystack[..end], needles)? { + results.push(i); + end = i; + } + Some(results) + }) + } +} + +/// A single test for memr?chr{,2,3}. +#[derive(Clone, Debug)] +struct Test { + /// The string to search in. + haystack: String, + /// The needles to look for. + needles: Vec, + /// The offsets that are expected to be found for all needles in the + /// forward direction. + expected: Vec, +} + +impl Test { + fn new(seed: &Seed) -> Test { + Test { + haystack: seed.haystack.to_string(), + needles: seed.needles.to_vec(), + expected: seed.positions.to_vec(), + } + } +} + +/// Data that can be expanded into many memchr tests by padding out the corpus. +#[derive(Clone, Debug)] +struct Seed { + /// The thing to search. We use `&str` instead of `&[u8]` because they + /// are nicer to write in tests, and we don't miss much since memchr + /// doesn't care about UTF-8. + /// + /// Corpora cannot contain either '%' or '#'. We use these bytes when + /// expanding test cases into many test cases, and we assume they are not + /// used. If they are used, `memchr_tests` will panic. + haystack: &'static str, + /// The needles to search for. This is intended to be an alternation of + /// needles. The number of needles may cause this test to be skipped for + /// some memchr variants. For example, a test with 2 needles cannot be used + /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. + /// However, a test with only 1 needle can be used to test all of `memchr`, + /// `memchr2` and `memchr3`. We achieve this by filling in the needles with + /// bytes that we never used in the corpus (such as '#'). + needles: &'static [u8], + /// The positions expected to match for all of the needles. + positions: &'static [usize], +} + +impl Seed { + /// Controls how much we expand the haystack on either side for each test. + /// We lower this on Miri because otherwise running the tests would take + /// forever. + const EXPAND_LEN: usize = { + #[cfg(not(miri))] + { + 515 + } + #[cfg(miri)] + { + 6 + } + }; + + /// Expand this test into many variations of the same test. + /// + /// In particular, this will generate more tests with larger corpus sizes. + /// The expected positions are updated to maintain the integrity of the + /// test. + /// + /// This is important in testing a memchr implementation, because there are + /// often different cases depending on the length of the corpus. + /// + /// Note that we extend the corpus by adding `%` bytes, which we + /// don't otherwise use as a needle. + fn generate(&self) -> impl Iterator { + let mut more = vec![]; + + // Add bytes to the start of the corpus. + for i in 0..Seed::EXPAND_LEN { + let mut t = Test::new(self); + let mut new: String = core::iter::repeat('%').take(i).collect(); + new.push_str(&t.haystack); + t.haystack = new; + t.expected = t.expected.into_iter().map(|p| p + i).collect(); + more.push(t); + } + // Add bytes to the end of the corpus. + for i in 1..Seed::EXPAND_LEN { + let mut t = Test::new(self); + let padding: String = core::iter::repeat('%').take(i).collect(); + t.haystack.push_str(&padding); + more.push(t); + } + + more.into_iter() + } +} diff --git a/vendor/memchr/src/tests/memchr/naive.rs b/vendor/memchr/src/tests/memchr/naive.rs new file mode 100644 index 0000000..6ebcdae --- /dev/null +++ b/vendor/memchr/src/tests/memchr/naive.rs @@ -0,0 +1,33 @@ +pub(crate) fn memchr(n1: u8, haystack: &[u8]) -> Option { + haystack.iter().position(|&b| b == n1) +} + +pub(crate) fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + haystack.iter().position(|&b| b == n1 || b == n2) +} + +pub(crate) fn memchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], +) -> Option { + haystack.iter().position(|&b| b == n1 || b == n2 || b == n3) +} + +pub(crate) fn memrchr(n1: u8, haystack: &[u8]) -> Option { + haystack.iter().rposition(|&b| b == n1) +} + +pub(crate) fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + haystack.iter().rposition(|&b| b == n1 || b == n2) +} + +pub(crate) fn memrchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], +) -> Option { + haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3) +} diff --git a/vendor/memchr/src/tests/memchr/prop.rs b/vendor/memchr/src/tests/memchr/prop.rs new file mode 100644 index 0000000..b988260 --- /dev/null +++ b/vendor/memchr/src/tests/memchr/prop.rs @@ -0,0 +1,321 @@ +#[cfg(miri)] +#[macro_export] +macro_rules! define_memchr_quickcheck { + ($($tt:tt)*) => {}; +} + +#[cfg(not(miri))] +#[macro_export] +macro_rules! define_memchr_quickcheck { + ($mod:ident) => { + define_memchr_quickcheck!($mod, new); + }; + ($mod:ident, $cons:ident) => { + use alloc::vec::Vec; + + use quickcheck::TestResult; + + use crate::tests::memchr::{ + naive, + prop::{double_ended_take, naive1_iter, naive2_iter, naive3_iter}, + }; + + quickcheck::quickcheck! { + fn qc_memchr_matches_naive(n1: u8, corpus: Vec) -> TestResult { + let expected = naive::memchr(n1, &corpus); + let got = match $mod::One::$cons(n1) { + None => return TestResult::discard(), + Some(f) => f.find(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memrchr_matches_naive(n1: u8, corpus: Vec) -> TestResult { + let expected = naive::memrchr(n1, &corpus); + let got = match $mod::One::$cons(n1) { + None => return TestResult::discard(), + Some(f) => f.rfind(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memchr2_matches_naive(n1: u8, n2: u8, corpus: Vec) -> TestResult { + let expected = naive::memchr2(n1, n2, &corpus); + let got = match $mod::Two::$cons(n1, n2) { + None => return TestResult::discard(), + Some(f) => f.find(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memrchr2_matches_naive(n1: u8, n2: u8, corpus: Vec) -> TestResult { + let expected = naive::memrchr2(n1, n2, &corpus); + let got = match $mod::Two::$cons(n1, n2) { + None => return TestResult::discard(), + Some(f) => f.rfind(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memchr3_matches_naive( + n1: u8, n2: u8, n3: u8, + corpus: Vec + ) -> TestResult { + let expected = naive::memchr3(n1, n2, n3, &corpus); + let got = match $mod::Three::$cons(n1, n2, n3) { + None => return TestResult::discard(), + Some(f) => f.find(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memrchr3_matches_naive( + n1: u8, n2: u8, n3: u8, + corpus: Vec + ) -> TestResult { + let expected = naive::memrchr3(n1, n2, n3, &corpus); + let got = match $mod::Three::$cons(n1, n2, n3) { + None => return TestResult::discard(), + Some(f) => f.rfind(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memchr_double_ended_iter( + needle: u8, data: Vec, take_side: Vec + ) -> TestResult { + // make nonempty + let mut take_side = take_side; + if take_side.is_empty() { take_side.push(true) }; + + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let iter = finder.iter(&data); + let got = double_ended_take( + iter, + take_side.iter().cycle().cloned(), + ); + let expected = naive1_iter(needle, &data); + + TestResult::from_bool(got.iter().cloned().eq(expected)) + } + + fn qc_memchr2_double_ended_iter( + needle1: u8, needle2: u8, data: Vec, take_side: Vec + ) -> TestResult { + // make nonempty + let mut take_side = take_side; + if take_side.is_empty() { take_side.push(true) }; + + let finder = match $mod::Two::$cons(needle1, needle2) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let iter = finder.iter(&data); + let got = double_ended_take( + iter, + take_side.iter().cycle().cloned(), + ); + let expected = naive2_iter(needle1, needle2, &data); + + TestResult::from_bool(got.iter().cloned().eq(expected)) + } + + fn qc_memchr3_double_ended_iter( + needle1: u8, needle2: u8, needle3: u8, + data: Vec, take_side: Vec + ) -> TestResult { + // make nonempty + let mut take_side = take_side; + if take_side.is_empty() { take_side.push(true) }; + + let finder = match $mod::Three::$cons(needle1, needle2, needle3) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let iter = finder.iter(&data); + let got = double_ended_take( + iter, + take_side.iter().cycle().cloned(), + ); + let expected = naive3_iter(needle1, needle2, needle3, &data); + + TestResult::from_bool(got.iter().cloned().eq(expected)) + } + + fn qc_memchr1_iter(data: Vec) -> TestResult { + let needle = 0; + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data); + let expected = naive1_iter(needle, &data); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr1_rev_iter(data: Vec) -> TestResult { + let needle = 0; + + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data).rev(); + let expected = naive1_iter(needle, &data).rev(); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr2_iter(data: Vec) -> TestResult { + let needle1 = 0; + let needle2 = 1; + + let finder = match $mod::Two::$cons(needle1, needle2) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data); + let expected = naive2_iter(needle1, needle2, &data); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr2_rev_iter(data: Vec) -> TestResult { + let needle1 = 0; + let needle2 = 1; + + let finder = match $mod::Two::$cons(needle1, needle2) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data).rev(); + let expected = naive2_iter(needle1, needle2, &data).rev(); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr3_iter(data: Vec) -> TestResult { + let needle1 = 0; + let needle2 = 1; + let needle3 = 2; + + let finder = match $mod::Three::$cons(needle1, needle2, needle3) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data); + let expected = naive3_iter(needle1, needle2, needle3, &data); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr3_rev_iter(data: Vec) -> TestResult { + let needle1 = 0; + let needle2 = 1; + let needle3 = 2; + + let finder = match $mod::Three::$cons(needle1, needle2, needle3) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data).rev(); + let expected = naive3_iter(needle1, needle2, needle3, &data).rev(); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr1_iter_size_hint(data: Vec) -> TestResult { + // test that the size hint is within reasonable bounds + let needle = 0; + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let mut iter = finder.iter(&data); + let mut real_count = data + .iter() + .filter(|&&elt| elt == needle) + .count(); + + while let Some(index) = iter.next() { + real_count -= 1; + let (lower, upper) = iter.size_hint(); + assert!(lower <= real_count); + assert!(upper.unwrap() >= real_count); + assert!(upper.unwrap() <= data.len() - index); + } + TestResult::passed() + } + } + }; +} + +// take items from a DEI, taking front for each true and back for each false. +// Return a vector with the concatenation of the fronts and the reverse of the +// backs. +#[cfg(not(miri))] +pub(crate) fn double_ended_take( + mut iter: I, + take_side: J, +) -> alloc::vec::Vec +where + I: DoubleEndedIterator, + J: Iterator, +{ + let mut found_front = alloc::vec![]; + let mut found_back = alloc::vec![]; + + for take_front in take_side { + if take_front { + if let Some(pos) = iter.next() { + found_front.push(pos); + } else { + break; + } + } else { + if let Some(pos) = iter.next_back() { + found_back.push(pos); + } else { + break; + } + }; + } + + let mut all_found = found_front; + all_found.extend(found_back.into_iter().rev()); + all_found +} + +// return an iterator of the 0-based indices of haystack that match the needle +#[cfg(not(miri))] +pub(crate) fn naive1_iter<'a>( + n1: u8, + haystack: &'a [u8], +) -> impl DoubleEndedIterator + 'a { + haystack.iter().enumerate().filter(move |&(_, &b)| b == n1).map(|t| t.0) +} + +#[cfg(not(miri))] +pub(crate) fn naive2_iter<'a>( + n1: u8, + n2: u8, + haystack: &'a [u8], +) -> impl DoubleEndedIterator + 'a { + haystack + .iter() + .enumerate() + .filter(move |&(_, &b)| b == n1 || b == n2) + .map(|t| t.0) +} + +#[cfg(not(miri))] +pub(crate) fn naive3_iter<'a>( + n1: u8, + n2: u8, + n3: u8, + haystack: &'a [u8], +) -> impl DoubleEndedIterator + 'a { + haystack + .iter() + .enumerate() + .filter(move |&(_, &b)| b == n1 || b == n2 || b == n3) + .map(|t| t.0) +} diff --git a/vendor/memchr/src/tests/mod.rs b/vendor/memchr/src/tests/mod.rs new file mode 100644 index 0000000..259b678 --- /dev/null +++ b/vendor/memchr/src/tests/mod.rs @@ -0,0 +1,15 @@ +#[macro_use] +pub(crate) mod memchr; +pub(crate) mod packedpair; +#[macro_use] +pub(crate) mod substring; + +// For debugging, particularly in CI, print out the byte order of the current +// target. +#[test] +fn byte_order() { + #[cfg(target_endian = "little")] + std::eprintln!("LITTLE ENDIAN"); + #[cfg(target_endian = "big")] + std::eprintln!("BIG ENDIAN"); +} diff --git a/vendor/memchr/src/tests/packedpair.rs b/vendor/memchr/src/tests/packedpair.rs new file mode 100644 index 0000000..204635b --- /dev/null +++ b/vendor/memchr/src/tests/packedpair.rs @@ -0,0 +1,216 @@ +use alloc::{boxed::Box, vec, vec::Vec}; + +/// A set of "packed pair" test seeds. Each seed serves as the base for the +/// generation of many other tests. In essence, the seed captures the pair of +/// bytes we used for a predicate and first byte among our needle. The tests +/// generated from each seed essentially vary the length of the needle and +/// haystack, while using the rare/first byte configuration from the seed. +/// +/// The purpose of this is to test many different needle/haystack lengths. +/// In particular, some of the vector optimizations might only have bugs +/// in haystacks of a certain size. +const SEEDS: &[Seed] = &[ + // Why not use different 'first' bytes? It seemed like a good idea to be + // able to configure it, but when I wrote the test generator below, it + // didn't seem necessary to use for reasons that I forget. + Seed { first: b'x', index1: b'y', index2: b'z' }, + Seed { first: b'x', index1: b'x', index2: b'z' }, + Seed { first: b'x', index1: b'y', index2: b'x' }, + Seed { first: b'x', index1: b'x', index2: b'x' }, + Seed { first: b'x', index1: b'y', index2: b'y' }, +]; + +/// Runs a host of "packed pair" search tests. +/// +/// These tests specifically look for the occurrence of a possible substring +/// match based on a pair of bytes matching at the right offsets. +pub(crate) struct Runner { + fwd: Option< + Box< + dyn FnMut(&[u8], &[u8], u8, u8) -> Option> + 'static, + >, + >, +} + +impl Runner { + /// Create a new test runner for "packed pair" substring search. + pub(crate) fn new() -> Runner { + Runner { fwd: None } + } + + /// Run all tests. This panics on the first failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + /// + /// This runs tests on both the forward and reverse implementations given. + /// If either (or both) are missing, then tests for that implementation are + /// skipped. + pub(crate) fn run(self) { + if let Some(mut fwd) = self.fwd { + for seed in SEEDS.iter() { + for t in seed.generate() { + match fwd(&t.haystack, &t.needle, t.index1, t.index2) { + None => continue, + Some(result) => { + assert_eq!( + t.fwd, result, + "FORWARD, needle: {:?}, haystack: {:?}, \ + index1: {:?}, index2: {:?}", + t.needle, t.haystack, t.index1, t.index2, + ) + } + } + } + } + } + } + + /// Set the implementation for forward "packed pair" substring search. + /// + /// If the closure returns `None`, then it is assumed that the given + /// test cannot be applied to the particular implementation and it is + /// skipped. For example, if a particular implementation only supports + /// needles or haystacks for some minimum length. + /// + /// If this is not set, then forward "packed pair" search is not tested. + pub(crate) fn fwd( + mut self, + search: impl FnMut(&[u8], &[u8], u8, u8) -> Option> + 'static, + ) -> Runner { + self.fwd = Some(Box::new(search)); + self + } +} + +/// A test that represents the input and expected output to a "packed pair" +/// search function. The test should be able to run with any "packed pair" +/// implementation and get the expected output. +struct Test { + haystack: Vec, + needle: Vec, + index1: u8, + index2: u8, + fwd: Option, +} + +impl Test { + /// Create a new "packed pair" test from a seed and some given offsets to + /// the pair of bytes to use as a predicate in the seed's needle. + /// + /// If a valid test could not be constructed, then None is returned. + /// (Currently, we take the approach of massaging tests to be valid + /// instead of rejecting them outright.) + fn new( + seed: Seed, + index1: usize, + index2: usize, + haystack_len: usize, + needle_len: usize, + fwd: Option, + ) -> Option { + let mut index1: u8 = index1.try_into().unwrap(); + let mut index2: u8 = index2.try_into().unwrap(); + // The '#' byte is never used in a haystack (unless we're expecting + // a match), while the '@' byte is never used in a needle. + let mut haystack = vec![b'@'; haystack_len]; + let mut needle = vec![b'#'; needle_len]; + needle[0] = seed.first; + needle[index1 as usize] = seed.index1; + needle[index2 as usize] = seed.index2; + // If we're expecting a match, then make sure the needle occurs + // in the haystack at the expected position. + if let Some(i) = fwd { + haystack[i..i + needle.len()].copy_from_slice(&needle); + } + // If the operations above lead to rare offsets pointing to the + // non-first occurrence of a byte, then adjust it. This might lead + // to redundant tests, but it's simpler than trying to change the + // generation process I think. + if let Some(i) = crate::memchr(seed.index1, &needle) { + index1 = u8::try_from(i).unwrap(); + } + if let Some(i) = crate::memchr(seed.index2, &needle) { + index2 = u8::try_from(i).unwrap(); + } + Some(Test { haystack, needle, index1, index2, fwd }) + } +} + +/// Data that describes a single prefilter test seed. +#[derive(Clone, Copy)] +struct Seed { + first: u8, + index1: u8, + index2: u8, +} + +impl Seed { + const NEEDLE_LENGTH_LIMIT: usize = { + #[cfg(not(miri))] + { + 33 + } + #[cfg(miri)] + { + 5 + } + }; + + const HAYSTACK_LENGTH_LIMIT: usize = { + #[cfg(not(miri))] + { + 65 + } + #[cfg(miri)] + { + 8 + } + }; + + /// Generate a series of prefilter tests from this seed. + fn generate(self) -> impl Iterator { + let len_start = 2; + // The iterator below generates *a lot* of tests. The number of + // tests was chosen somewhat empirically to be "bearable" when + // running the test suite. + // + // We use an iterator here because the collective haystacks of all + // these test cases add up to enough memory to OOM a conservative + // sandbox or a small laptop. + (len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| { + let index_start = len_start - 1; + (index_start..needle_len).flat_map(move |index1| { + (index1..needle_len).flat_map(move |index2| { + (needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map( + move |haystack_len| { + Test::new( + self, + index1, + index2, + haystack_len, + needle_len, + None, + ) + .into_iter() + .chain( + (0..=(haystack_len - needle_len)).flat_map( + move |output| { + Test::new( + self, + index1, + index2, + haystack_len, + needle_len, + Some(output), + ) + }, + ), + ) + }, + ) + }) + }) + }) + } +} diff --git a/vendor/memchr/src/tests/substring/mod.rs b/vendor/memchr/src/tests/substring/mod.rs new file mode 100644 index 0000000..dd10cbd --- /dev/null +++ b/vendor/memchr/src/tests/substring/mod.rs @@ -0,0 +1,232 @@ +/*! +This module defines tests and test helpers for substring implementations. +*/ + +use alloc::{ + boxed::Box, + format, + string::{String, ToString}, +}; + +pub(crate) mod naive; +#[macro_use] +pub(crate) mod prop; + +const SEEDS: &'static [Seed] = &[ + Seed::new("", "", Some(0), Some(0)), + Seed::new("", "a", Some(0), Some(1)), + Seed::new("", "ab", Some(0), Some(2)), + Seed::new("", "abc", Some(0), Some(3)), + Seed::new("a", "", None, None), + Seed::new("a", "a", Some(0), Some(0)), + Seed::new("a", "aa", Some(0), Some(1)), + Seed::new("a", "ba", Some(1), Some(1)), + Seed::new("a", "bba", Some(2), Some(2)), + Seed::new("a", "bbba", Some(3), Some(3)), + Seed::new("a", "bbbab", Some(3), Some(3)), + Seed::new("a", "bbbabb", Some(3), Some(3)), + Seed::new("a", "bbbabbb", Some(3), Some(3)), + Seed::new("a", "bbbbbb", None, None), + Seed::new("ab", "", None, None), + Seed::new("ab", "a", None, None), + Seed::new("ab", "b", None, None), + Seed::new("ab", "ab", Some(0), Some(0)), + Seed::new("ab", "aab", Some(1), Some(1)), + Seed::new("ab", "aaab", Some(2), Some(2)), + Seed::new("ab", "abaab", Some(0), Some(3)), + Seed::new("ab", "baaab", Some(3), Some(3)), + Seed::new("ab", "acb", None, None), + Seed::new("ab", "abba", Some(0), Some(0)), + Seed::new("abc", "ab", None, None), + Seed::new("abc", "abc", Some(0), Some(0)), + Seed::new("abc", "abcz", Some(0), Some(0)), + Seed::new("abc", "abczz", Some(0), Some(0)), + Seed::new("abc", "zabc", Some(1), Some(1)), + Seed::new("abc", "zzabc", Some(2), Some(2)), + Seed::new("abc", "azbc", None, None), + Seed::new("abc", "abzc", None, None), + Seed::new("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)), + Seed::new("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)), + Seed::new( + "xyz", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz", + Some(32), + Some(32), + ), + Seed::new("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)), + Seed::new("\u{0}\u{1e}", "\u{1e}\u{0}", None, None), +]; + +/// Runs a host of substring search tests. +/// +/// This has support for "partial" substring search implementations only work +/// for a subset of needles/haystacks. For example, the "packed pair" substring +/// search implementation only works for haystacks of some minimum length based +/// of the pair of bytes selected and the size of the vector used. +pub(crate) struct Runner { + fwd: Option< + Box Option> + 'static>, + >, + rev: Option< + Box Option> + 'static>, + >, +} + +impl Runner { + /// Create a new test runner for forward and reverse substring search + /// implementations. + pub(crate) fn new() -> Runner { + Runner { fwd: None, rev: None } + } + + /// Run all tests. This panics on the first failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + /// + /// This runs tests on both the forward and reverse implementations given. + /// If either (or both) are missing, then tests for that implementation are + /// skipped. + pub(crate) fn run(self) { + if let Some(mut fwd) = self.fwd { + for seed in SEEDS.iter() { + for t in seed.generate() { + match fwd(t.haystack.as_bytes(), t.needle.as_bytes()) { + None => continue, + Some(result) => { + assert_eq!( + t.fwd, result, + "FORWARD, needle: {:?}, haystack: {:?}", + t.needle, t.haystack, + ); + } + } + } + } + } + if let Some(mut rev) = self.rev { + for seed in SEEDS.iter() { + for t in seed.generate() { + match rev(t.haystack.as_bytes(), t.needle.as_bytes()) { + None => continue, + Some(result) => { + assert_eq!( + t.rev, result, + "REVERSE, needle: {:?}, haystack: {:?}", + t.needle, t.haystack, + ); + } + } + } + } + } + } + + /// Set the implementation for forward substring search. + /// + /// If the closure returns `None`, then it is assumed that the given + /// test cannot be applied to the particular implementation and it is + /// skipped. For example, if a particular implementation only supports + /// needles or haystacks for some minimum length. + /// + /// If this is not set, then forward substring search is not tested. + pub(crate) fn fwd( + mut self, + search: impl FnMut(&[u8], &[u8]) -> Option> + 'static, + ) -> Runner { + self.fwd = Some(Box::new(search)); + self + } + + /// Set the implementation for reverse substring search. + /// + /// If the closure returns `None`, then it is assumed that the given + /// test cannot be applied to the particular implementation and it is + /// skipped. For example, if a particular implementation only supports + /// needles or haystacks for some minimum length. + /// + /// If this is not set, then reverse substring search is not tested. + pub(crate) fn rev( + mut self, + search: impl FnMut(&[u8], &[u8]) -> Option> + 'static, + ) -> Runner { + self.rev = Some(Box::new(search)); + self + } +} + +/// A single substring test for forward and reverse searches. +#[derive(Clone, Debug)] +struct Test { + needle: String, + haystack: String, + fwd: Option, + rev: Option, +} + +/// A single substring test for forward and reverse searches. +/// +/// Each seed is valid on its own, but it also serves as a starting point +/// to generate more tests. Namely, we pad out the haystacks with other +/// characters so that we get more complete coverage. This is especially useful +/// for testing vector algorithms that tend to have weird special cases for +/// alignment and loop unrolling. +/// +/// Padding works by assuming certain characters never otherwise appear in a +/// needle or a haystack. Neither should contain a `#` character. +#[derive(Clone, Copy, Debug)] +struct Seed { + needle: &'static str, + haystack: &'static str, + fwd: Option, + rev: Option, +} + +impl Seed { + const MAX_PAD: usize = 34; + + const fn new( + needle: &'static str, + haystack: &'static str, + fwd: Option, + rev: Option, + ) -> Seed { + Seed { needle, haystack, fwd, rev } + } + + fn generate(self) -> impl Iterator { + assert!(!self.needle.contains('#'), "needle must not contain '#'"); + assert!(!self.haystack.contains('#'), "haystack must not contain '#'"); + (0..=Seed::MAX_PAD) + // Generate tests for padding at the beginning of haystack. + .map(move |pad| { + let needle = self.needle.to_string(); + let prefix = "#".repeat(pad); + let haystack = format!("{}{}", prefix, self.haystack); + let fwd = if needle.is_empty() { + Some(0) + } else { + self.fwd.map(|i| pad + i) + }; + let rev = if needle.is_empty() { + Some(haystack.len()) + } else { + self.rev.map(|i| pad + i) + }; + Test { needle, haystack, fwd, rev } + }) + // Generate tests for padding at the end of haystack. + .chain((1..=Seed::MAX_PAD).map(move |pad| { + let needle = self.needle.to_string(); + let suffix = "#".repeat(pad); + let haystack = format!("{}{}", self.haystack, suffix); + let fwd = if needle.is_empty() { Some(0) } else { self.fwd }; + let rev = if needle.is_empty() { + Some(haystack.len()) + } else { + self.rev + }; + Test { needle, haystack, fwd, rev } + })) + } +} diff --git a/vendor/memchr/src/tests/substring/naive.rs b/vendor/memchr/src/tests/substring/naive.rs new file mode 100644 index 0000000..1bc6009 --- /dev/null +++ b/vendor/memchr/src/tests/substring/naive.rs @@ -0,0 +1,45 @@ +/*! +This module defines "naive" implementations of substring search. + +These are sometimes useful to compare with "real" substring implementations. +The idea is that they are so simple that they are unlikely to be incorrect. +*/ + +/// Naively search forwards for the given needle in the given haystack. +pub(crate) fn find(haystack: &[u8], needle: &[u8]) -> Option { + let end = haystack.len().checked_sub(needle.len()).map_or(0, |i| i + 1); + for i in 0..end { + if needle == &haystack[i..i + needle.len()] { + return Some(i); + } + } + None +} + +/// Naively search in reverse for the given needle in the given haystack. +pub(crate) fn rfind(haystack: &[u8], needle: &[u8]) -> Option { + let end = haystack.len().checked_sub(needle.len()).map_or(0, |i| i + 1); + for i in (0..end).rev() { + if needle == &haystack[i..i + needle.len()] { + return Some(i); + } + } + None +} + +#[cfg(test)] +mod tests { + use crate::tests::substring; + + use super::*; + + #[test] + fn forward() { + substring::Runner::new().fwd(|h, n| Some(find(h, n))).run() + } + + #[test] + fn reverse() { + substring::Runner::new().rev(|h, n| Some(rfind(h, n))).run() + } +} diff --git a/vendor/memchr/src/tests/substring/prop.rs b/vendor/memchr/src/tests/substring/prop.rs new file mode 100644 index 0000000..a8352ec --- /dev/null +++ b/vendor/memchr/src/tests/substring/prop.rs @@ -0,0 +1,126 @@ +/*! +This module defines a few quickcheck properties for substring search. + +It also provides a forward and reverse macro for conveniently defining +quickcheck tests that run these properties over any substring search +implementation. +*/ + +use crate::tests::substring::naive; + +/// $fwd is a `impl FnMut(haystack, needle) -> Option>`. When the +/// routine returns `None`, then it's skipped, which is useful for substring +/// implementations that don't work for all inputs. +#[macro_export] +macro_rules! define_substring_forward_quickcheck { + ($fwd:expr) => { + #[cfg(not(miri))] + quickcheck::quickcheck! { + fn qc_fwd_prefix_is_substring(bs: alloc::vec::Vec) -> bool { + crate::tests::substring::prop::prefix_is_substring(&bs, $fwd) + } + + fn qc_fwd_suffix_is_substring(bs: alloc::vec::Vec) -> bool { + crate::tests::substring::prop::suffix_is_substring(&bs, $fwd) + } + + fn qc_fwd_matches_naive( + haystack: alloc::vec::Vec, + needle: alloc::vec::Vec + ) -> bool { + crate::tests::substring::prop::same_as_naive( + false, + &haystack, + &needle, + $fwd, + ) + } + } + }; +} + +/// $rev is a `impl FnMut(haystack, needle) -> Option>`. When the +/// routine returns `None`, then it's skipped, which is useful for substring +/// implementations that don't work for all inputs. +#[macro_export] +macro_rules! define_substring_reverse_quickcheck { + ($rev:expr) => { + #[cfg(not(miri))] + quickcheck::quickcheck! { + fn qc_rev_prefix_is_substring(bs: alloc::vec::Vec) -> bool { + crate::tests::substring::prop::prefix_is_substring(&bs, $rev) + } + + fn qc_rev_suffix_is_substring(bs: alloc::vec::Vec) -> bool { + crate::tests::substring::prop::suffix_is_substring(&bs, $rev) + } + + fn qc_rev_matches_naive( + haystack: alloc::vec::Vec, + needle: alloc::vec::Vec + ) -> bool { + crate::tests::substring::prop::same_as_naive( + true, + &haystack, + &needle, + $rev, + ) + } + } + }; +} + +/// Check that every prefix of the given byte string is a substring. +pub(crate) fn prefix_is_substring( + bs: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option>, +) -> bool { + for i in 0..bs.len().saturating_sub(1) { + let prefix = &bs[..i]; + let result = match search(bs, prefix) { + None => continue, + Some(result) => result, + }; + if !result.is_some() { + return false; + } + } + true +} + +/// Check that every suffix of the given byte string is a substring. +pub(crate) fn suffix_is_substring( + bs: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option>, +) -> bool { + for i in 0..bs.len().saturating_sub(1) { + let suffix = &bs[i..]; + let result = match search(bs, suffix) { + None => continue, + Some(result) => result, + }; + if !result.is_some() { + return false; + } + } + true +} + +/// Check that naive substring search matches the result of the given search +/// algorithm. +pub(crate) fn same_as_naive( + reverse: bool, + haystack: &[u8], + needle: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option>, +) -> bool { + let result = match search(haystack, needle) { + None => return true, + Some(result) => result, + }; + if reverse { + result == naive::rfind(haystack, needle) + } else { + result == naive::find(haystack, needle) + } +} diff --git a/vendor/memchr/src/tests/x86_64-soft_float.json b/vendor/memchr/src/tests/x86_64-soft_float.json new file mode 100644 index 0000000..b77649e --- /dev/null +++ b/vendor/memchr/src/tests/x86_64-soft_float.json @@ -0,0 +1,15 @@ +{ + "llvm-target": "x86_64-unknown-none", + "target-endian": "little", + "target-pointer-width": "64", + "target-c-int-width": "32", + "os": "none", + "arch": "x86_64", + "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", + "linker-flavor": "ld.lld", + "linker": "rust-lld", + "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-3dnow,-3dnowa,-avx,-avx2,+soft-float", + "executables": true, + "disable-redzone": true, + "panic-strategy": "abort" +} -- cgit v1.2.3