summaryrefslogtreecommitdiff
path: root/vendor/memchr/src/tests/packedpair.rs
diff options
context:
space:
mode:
authorValentin Popov <valentin@popov.link>2024-01-08 00:21:28 +0300
committerValentin Popov <valentin@popov.link>2024-01-08 00:21:28 +0300
commit1b6a04ca5504955c571d1c97504fb45ea0befee4 (patch)
tree7579f518b23313e8a9748a88ab6173d5e030b227 /vendor/memchr/src/tests/packedpair.rs
parent5ecd8cf2cba827454317368b68571df0d13d7842 (diff)
downloadfparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.tar.xz
fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.zip
Initial vendor packages
Signed-off-by: Valentin Popov <valentin@popov.link>
Diffstat (limited to 'vendor/memchr/src/tests/packedpair.rs')
-rw-r--r--vendor/memchr/src/tests/packedpair.rs216
1 files changed, 216 insertions, 0 deletions
diff --git a/vendor/memchr/src/tests/packedpair.rs b/vendor/memchr/src/tests/packedpair.rs
new file mode 100644
index 0000000..204635b
--- /dev/null
+++ b/vendor/memchr/src/tests/packedpair.rs
@@ -0,0 +1,216 @@
+use alloc::{boxed::Box, vec, vec::Vec};
+
+/// A set of "packed pair" test seeds. Each seed serves as the base for the
+/// generation of many other tests. In essence, the seed captures the pair of
+/// bytes we used for a predicate and first byte among our needle. The tests
+/// generated from each seed essentially vary the length of the needle and
+/// haystack, while using the rare/first byte configuration from the seed.
+///
+/// The purpose of this is to test many different needle/haystack lengths.
+/// In particular, some of the vector optimizations might only have bugs
+/// in haystacks of a certain size.
+const SEEDS: &[Seed] = &[
+ // Why not use different 'first' bytes? It seemed like a good idea to be
+ // able to configure it, but when I wrote the test generator below, it
+ // didn't seem necessary to use for reasons that I forget.
+ Seed { first: b'x', index1: b'y', index2: b'z' },
+ Seed { first: b'x', index1: b'x', index2: b'z' },
+ Seed { first: b'x', index1: b'y', index2: b'x' },
+ Seed { first: b'x', index1: b'x', index2: b'x' },
+ Seed { first: b'x', index1: b'y', index2: b'y' },
+];
+
+/// Runs a host of "packed pair" search tests.
+///
+/// These tests specifically look for the occurrence of a possible substring
+/// match based on a pair of bytes matching at the right offsets.
+pub(crate) struct Runner {
+ fwd: Option<
+ Box<
+ dyn FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static,
+ >,
+ >,
+}
+
+impl Runner {
+ /// Create a new test runner for "packed pair" substring search.
+ pub(crate) fn new() -> Runner {
+ Runner { fwd: None }
+ }
+
+ /// Run all tests. This panics on the first failure.
+ ///
+ /// If the implementation being tested returns `None` for a particular
+ /// haystack/needle combination, then that test is skipped.
+ ///
+ /// This runs tests on both the forward and reverse implementations given.
+ /// If either (or both) are missing, then tests for that implementation are
+ /// skipped.
+ pub(crate) fn run(self) {
+ if let Some(mut fwd) = self.fwd {
+ for seed in SEEDS.iter() {
+ for t in seed.generate() {
+ match fwd(&t.haystack, &t.needle, t.index1, t.index2) {
+ None => continue,
+ Some(result) => {
+ assert_eq!(
+ t.fwd, result,
+ "FORWARD, needle: {:?}, haystack: {:?}, \
+ index1: {:?}, index2: {:?}",
+ t.needle, t.haystack, t.index1, t.index2,
+ )
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /// Set the implementation for forward "packed pair" substring search.
+ ///
+ /// If the closure returns `None`, then it is assumed that the given
+ /// test cannot be applied to the particular implementation and it is
+ /// skipped. For example, if a particular implementation only supports
+ /// needles or haystacks for some minimum length.
+ ///
+ /// If this is not set, then forward "packed pair" search is not tested.
+ pub(crate) fn fwd(
+ mut self,
+ search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static,
+ ) -> Runner {
+ self.fwd = Some(Box::new(search));
+ self
+ }
+}
+
+/// A test that represents the input and expected output to a "packed pair"
+/// search function. The test should be able to run with any "packed pair"
+/// implementation and get the expected output.
+struct Test {
+ haystack: Vec<u8>,
+ needle: Vec<u8>,
+ index1: u8,
+ index2: u8,
+ fwd: Option<usize>,
+}
+
+impl Test {
+ /// Create a new "packed pair" test from a seed and some given offsets to
+ /// the pair of bytes to use as a predicate in the seed's needle.
+ ///
+ /// If a valid test could not be constructed, then None is returned.
+ /// (Currently, we take the approach of massaging tests to be valid
+ /// instead of rejecting them outright.)
+ fn new(
+ seed: Seed,
+ index1: usize,
+ index2: usize,
+ haystack_len: usize,
+ needle_len: usize,
+ fwd: Option<usize>,
+ ) -> Option<Test> {
+ let mut index1: u8 = index1.try_into().unwrap();
+ let mut index2: u8 = index2.try_into().unwrap();
+ // The '#' byte is never used in a haystack (unless we're expecting
+ // a match), while the '@' byte is never used in a needle.
+ let mut haystack = vec![b'@'; haystack_len];
+ let mut needle = vec![b'#'; needle_len];
+ needle[0] = seed.first;
+ needle[index1 as usize] = seed.index1;
+ needle[index2 as usize] = seed.index2;
+ // If we're expecting a match, then make sure the needle occurs
+ // in the haystack at the expected position.
+ if let Some(i) = fwd {
+ haystack[i..i + needle.len()].copy_from_slice(&needle);
+ }
+ // If the operations above lead to rare offsets pointing to the
+ // non-first occurrence of a byte, then adjust it. This might lead
+ // to redundant tests, but it's simpler than trying to change the
+ // generation process I think.
+ if let Some(i) = crate::memchr(seed.index1, &needle) {
+ index1 = u8::try_from(i).unwrap();
+ }
+ if let Some(i) = crate::memchr(seed.index2, &needle) {
+ index2 = u8::try_from(i).unwrap();
+ }
+ Some(Test { haystack, needle, index1, index2, fwd })
+ }
+}
+
+/// Data that describes a single prefilter test seed.
+#[derive(Clone, Copy)]
+struct Seed {
+ first: u8,
+ index1: u8,
+ index2: u8,
+}
+
+impl Seed {
+ const NEEDLE_LENGTH_LIMIT: usize = {
+ #[cfg(not(miri))]
+ {
+ 33
+ }
+ #[cfg(miri)]
+ {
+ 5
+ }
+ };
+
+ const HAYSTACK_LENGTH_LIMIT: usize = {
+ #[cfg(not(miri))]
+ {
+ 65
+ }
+ #[cfg(miri)]
+ {
+ 8
+ }
+ };
+
+ /// Generate a series of prefilter tests from this seed.
+ fn generate(self) -> impl Iterator<Item = Test> {
+ let len_start = 2;
+ // The iterator below generates *a lot* of tests. The number of
+ // tests was chosen somewhat empirically to be "bearable" when
+ // running the test suite.
+ //
+ // We use an iterator here because the collective haystacks of all
+ // these test cases add up to enough memory to OOM a conservative
+ // sandbox or a small laptop.
+ (len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| {
+ let index_start = len_start - 1;
+ (index_start..needle_len).flat_map(move |index1| {
+ (index1..needle_len).flat_map(move |index2| {
+ (needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map(
+ move |haystack_len| {
+ Test::new(
+ self,
+ index1,
+ index2,
+ haystack_len,
+ needle_len,
+ None,
+ )
+ .into_iter()
+ .chain(
+ (0..=(haystack_len - needle_len)).flat_map(
+ move |output| {
+ Test::new(
+ self,
+ index1,
+ index2,
+ haystack_len,
+ needle_len,
+ Some(output),
+ )
+ },
+ ),
+ )
+ },
+ )
+ })
+ })
+ })
+ }
+}