aboutsummaryrefslogtreecommitdiff
path: root/vendor/strsim
diff options
context:
space:
mode:
authorValentin Popov <valentin@popov.link>2024-07-19 15:37:58 +0300
committerValentin Popov <valentin@popov.link>2024-07-19 15:37:58 +0300
commita990de90fe41456a23e58bd087d2f107d321f3a1 (patch)
tree15afc392522a9e85dc3332235e311b7d39352ea9 /vendor/strsim
parent3d48cd3f81164bbfc1a755dc1d4a9a02f98c8ddd (diff)
downloadfparkan-a990de90fe41456a23e58bd087d2f107d321f3a1.tar.xz
fparkan-a990de90fe41456a23e58bd087d2f107d321f3a1.zip
Deleted vendor folder
Diffstat (limited to 'vendor/strsim')
-rw-r--r--vendor/strsim/.cargo-checksum.json1
-rw-r--r--vendor/strsim/CHANGELOG.md226
-rw-r--r--vendor/strsim/Cargo.toml39
-rw-r--r--vendor/strsim/LICENSE23
-rw-r--r--vendor/strsim/README.md102
-rw-r--r--vendor/strsim/benches/benches.rs95
-rw-r--r--vendor/strsim/src/lib.rs1307
-rw-r--r--vendor/strsim/tests/lib.rs71
8 files changed, 0 insertions, 1864 deletions
diff --git a/vendor/strsim/.cargo-checksum.json b/vendor/strsim/.cargo-checksum.json
deleted file mode 100644
index 6e7d5ab..0000000
--- a/vendor/strsim/.cargo-checksum.json
+++ /dev/null
@@ -1 +0,0 @@
-{"files":{"CHANGELOG.md":"fef521b697f2b44b73bc64603538a9849cb7d707c5227063546d4bb5ca836f03","Cargo.toml":"7face49fe821809026b3a537695cdcb12a82a5ebbed20b73cfab69b06fa309fb","LICENSE":"1e697ce8d21401fbf1bddd9b5c3fd4c4c79ae1e3bdf51f81761c85e11d5a89cd","README.md":"70806b27bbb20acf1e47aabd68530062aab834b7efeced77fcb3126179b043d0","benches/benches.rs":"2f7fae162a517378b42af04b4b077ffd563171f7341cba55b4efca3b4c30426a","src/lib.rs":"2e340450050784ea8e6e19ea36cbedcf9084ecb7829fc206cacdca4f6f069784","tests/lib.rs":"4c8207a5728b82836795e2f87d7d7834db7276082f5ded640f34822feb750cb4"},"package":"ccbca6f34534eb78dbee83f6b2c9442fea7113f43d9e80ea320f0972ae5dc08d"} \ No newline at end of file
diff --git a/vendor/strsim/CHANGELOG.md b/vendor/strsim/CHANGELOG.md
deleted file mode 100644
index 558cbf9..0000000
--- a/vendor/strsim/CHANGELOG.md
+++ /dev/null
@@ -1,226 +0,0 @@
-# Change Log
-
-This project attempts to adhere to [Semantic Versioning](http://semver.org).
-
-## [Unreleased]
-
-## [0.10.1] - (2024-01-07)
-
-### Changed
-
-- improve OSA implementation
- - reduce runtime
- - reduce binary size by more than `25%`
-
-- reduce binary size of Levenshtein distance
-
-- improve Damerau-Levenshtein implementation
- - reduce memory usage from `O(N*M)` to `O(N+M)`
- - reduce runtime in our own benchmark by more than `70%`
- - reduce binary size by more than `25%`
-
-- only boost similarity in Jaro-Winkler once the Jaro similarity exceeds 0.7
-
-### Fixed
-
-- Fix transposition counting in Jaro and Jaro-Winkler.
-- Limit common prefix in Jaro-Winkler to 4 characters
-
-## [0.10.0] - (2020-01-31)
-
-### Added
-
-- Sørensen-Dice implementation (thanks [@robjtede](https://github.com/robjtede))
-
-## [0.9.3] - (2019-12-12)
-
-### Fixed
-
-- Fix Jaro and Jaro-Winkler when the arguments have lengths of 1 and are equal.
- Previously, the functions would erroneously return 0 instead of 1. Thanks to
- [@vvrably](https://github.com/vvrably) for pointing out the issue.
-
-## [0.9.2] - (2019-05-09)
-
-### Changed
-
-- Revert back to the standard library hashmap because it will use hashbrown very
- soon
-- Remove ndarray in favor of using a single vector to represent the 2d grid in
- Damerau-Levenshtein
-
-## [0.9.1] - (2019-04-08)
-
-### Changed
-
-- Faster Damerau-Levenshtein implementation (thanks [@lovasoa](https://github.com/lovasoa))
-
-## [0.9.0] - (2019-04-06)
-
-### Added
-
-- Generic distance functions (thanks [@lovasoa](https://github.com/lovasoa))
-
-## [0.8.0] - (2018-08-19)
-
-### Added
-
-- Normalized versions of Levenshtein and Damerau-Levenshtein (thanks [@gentoid](https://github.com/gentoid))
-
-## [0.7.0] - (2018-01-17)
-
-### Changed
-
-- Faster Levenshtein implementation (thanks [@wdv4758h](https://github.com/wdv4758h))
-
-### Removed
-
-- Remove the "against_vec" functions. They are one-liners now, so they don't
- seem to add enough value to justify making the API larger. I didn't find
- anybody using them when I skimmed through a GitHub search. If you do use them,
- you can change the calls to something like:
-```rust
-let distances = strings.iter().map(|a| jaro(target, a)).collect();
-```
-
-## [0.6.0] - (2016-12-26)
-
-### Added
-
-- Add optimal string alignment distance
-
-### Fixed
-
-- Fix Damerau-Levenshtein implementation (previous implementation was actually
- optimal string alignment; see this [Damerau-Levenshtein explanation])
-
-## [0.5.2] - (2016-11-21)
-
-### Changed
-
-- Remove Cargo generated documentation in favor of a [docs.rs] link
-
-## [0.5.1] - (2016-08-23)
-
-### Added
-
-- Add Cargo generated documentation
-
-### Fixed
-
-- Fix panic when Jaro or Jaro-Winkler are given strings both with a length of
- one
-
-## [0.5.0] - (2016-08-11)
-
-### Changed
-
-- Make Hamming faster (thanks @IBUzPE9) when the two strings have the same
- length but slower when they have different lengths
-
-## [0.4.1] - (2016-04-18)
-
-### Added
-
-- Add Vagrant setup for development
-- Add AppVeyor configuration for Windows CI
-
-### Fixed
-
-- Fix metrics when given strings with multibyte characters (thanks @WanzenBug)
-
-## [0.4.0] - (2015-06-10)
-
-### Added
-
-- For each metric, add a function that takes a vector of strings and returns a
-vector of results (thanks @ovarene)
-
-## [0.3.0] - (2015-04-30)
-
-### Changed
-
-- Remove usage of unstable Rust features
-
-## [0.2.5] - (2015-04-24)
-
-### Fixed
-
-- Remove unnecessary `Float` import from doc tests
-
-## [0.2.4] - (2015-04-15)
-
-### Fixed
-
-- Remove unused `core` feature flag
-
-## [0.2.3] - (2015-04-01)
-
-### Fixed
-
-- Remove now unnecessary `Float` import
-
-## [0.2.2] - (2015-03-29)
-
-### Fixed
-
-- Remove usage of `char_at` (marked as unstable)
-
-## [0.2.1] - (2015-02-20)
-
-### Fixed
-
-- Update bit vector import to match Rust update
-
-## [0.2.0] - (2015-02-19)
-
-### Added
-
-- Implement Damerau-Levenshtein
-- Add tests in docs
-
-## [0.1.1] - (2015-02-10)
-
-### Added
-
-- Configure Travis for CI
-- Add rustdoc comments
-
-### Fixed
-
-- Limit Jaro-Winkler return value to a maximum of 1.0
-- Fix float comparisons in tests
-
-## [0.1.0] - (2015-02-09)
-
-### Added
-
-- Implement Hamming, Jaro, Jaro-Winkler, and Levenshtein
-
-[Unreleased]: https://github.com/rapidfuzz/strsim-rs/compare/0.10.1...HEAD
-[0.10.1]: https://github.com/rapidfuzz/strsim-rs/compare/0.10.0...0.10.1
-[0.10.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.9.3...0.10.0
-[0.9.3]: https://github.com/rapidfuzz/strsim-rs/compare/0.9.2...0.9.3
-[0.9.2]: https://github.com/rapidfuzz/strsim-rs/compare/0.9.1...0.9.2
-[0.9.1]: https://github.com/rapidfuzz/strsim-rs/compare/0.9.0...0.9.1
-[0.9.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.8.0...0.9.0
-[0.8.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.7.0...0.8.0
-[0.7.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.6.0...0.7.0
-[0.6.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.5.2...0.6.0
-[0.5.2]: https://github.com/rapidfuzz/strsim-rs/compare/0.5.1...0.5.2
-[0.5.1]: https://github.com/rapidfuzz/strsim-rs/compare/0.5.0...0.5.1
-[0.5.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.4.1...0.5.0
-[0.4.1]: https://github.com/rapidfuzz/strsim-rs/compare/0.4.0...0.4.1
-[0.4.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.3.0...0.4.0
-[0.3.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.2.5...0.3.0
-[0.2.5]: https://github.com/rapidfuzz/strsim-rs/compare/0.2.4...0.2.5
-[0.2.4]: https://github.com/rapidfuzz/strsim-rs/compare/0.2.3...0.2.4
-[0.2.3]: https://github.com/rapidfuzz/strsim-rs/compare/0.2.2...0.2.3
-[0.2.2]: https://github.com/rapidfuzz/strsim-rs/compare/0.2.1...0.2.2
-[0.2.1]: https://github.com/rapidfuzz/strsim-rs/compare/0.2.0...0.2.1
-[0.2.0]: https://github.com/rapidfuzz/strsim-rs/compare/0.1.1...0.2.0
-[0.1.1]: https://github.com/rapidfuzz/strsim-rs/compare/0.1.0...0.1.1
-[0.1.0]: https://github.com/rapidfuzz/strsim-rs/compare/fabad4...0.1.0
-[docs.rs]: https://docs.rs/strsim/
-[Damerau-Levenshtein explanation]:
-http://scarcitycomputing.blogspot.com/2013/04/damerau-levenshtein-edit-distance.html
diff --git a/vendor/strsim/Cargo.toml b/vendor/strsim/Cargo.toml
deleted file mode 100644
index 7011ac5..0000000
--- a/vendor/strsim/Cargo.toml
+++ /dev/null
@@ -1,39 +0,0 @@
-# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
-#
-# When uploading crates to the registry Cargo will automatically
-# "normalize" Cargo.toml files for maximal compatibility
-# with all versions of Cargo and also rewrite `path` dependencies
-# to registry (e.g., crates.io) dependencies.
-#
-# If you are reading this file be aware that the original Cargo.toml
-# will likely look very different (and much more reasonable).
-# See Cargo.toml.orig for the original contents.
-
-[package]
-name = "strsim"
-version = "0.10.1"
-authors = [
- "Danny Guo <danny@dannyguo.com>",
- "maxbachmann <oss@maxbachmann.de>",
-]
-exclude = [
- "/.github",
- "/dev",
-]
-description = """
-Implementations of string similarity metrics. Includes Hamming, Levenshtein,
-OSA, Damerau-Levenshtein, Jaro, Jaro-Winkler, and Sørensen-Dice.
-"""
-homepage = "https://github.com/rapidfuzz/strsim-rs"
-documentation = "https://docs.rs/strsim/"
-readme = "README.md"
-keywords = [
- "string",
- "similarity",
- "Hamming",
- "Levenshtein",
- "Jaro",
-]
-categories = ["text-processing"]
-license = "MIT"
-repository = "https://github.com/rapidfuzz/strsim-rs"
diff --git a/vendor/strsim/LICENSE b/vendor/strsim/LICENSE
deleted file mode 100644
index 8d1fbe1..0000000
--- a/vendor/strsim/LICENSE
+++ /dev/null
@@ -1,23 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2015 Danny Guo
-Copyright (c) 2016 Titus Wormer <tituswormer@gmail.com>
-Copyright (c) 2018 Akash Kurdekar
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/vendor/strsim/README.md b/vendor/strsim/README.md
deleted file mode 100644
index f3dcd96..0000000
--- a/vendor/strsim/README.md
+++ /dev/null
@@ -1,102 +0,0 @@
-# strsim-rs
-
-[![Crates.io](https://img.shields.io/crates/v/strsim.svg)](https://crates.io/crates/strsim)
-[![Crates.io](https://img.shields.io/crates/l/strsim.svg?maxAge=2592000)](https://github.com/rapidfuzz/strsim-rs/blob/main/LICENSE)
-[![CI status](https://github.com/rapidfuzz/strsim-rs/workflows/CI/badge.svg)](https://github.com/rapidfuzz/strsim-rs/actions?query=branch%3Amain)
-[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
-
-[Rust](https://www.rust-lang.org) implementations of [string similarity metrics]:
- - [Hamming]
- - [Levenshtein] - distance & normalized
- - [Optimal string alignment]
- - [Damerau-Levenshtein] - distance & normalized
- - [Jaro and Jaro-Winkler]
- - [Sørensen-Dice]
-
-The normalized versions return values between `0.0` and `1.0`, where `1.0` means
-an exact match.
-
-There are also generic versions of the functions for non-string inputs.
-
-## Installation
-
-`strsim` is available on [crates.io](https://crates.io/crates/strsim). Add it to
-your `Cargo.toml`:
-```toml
-[dependencies]
-strsim = "0.10.0"
-```
-
-## Usage
-
-Go to [Docs.rs](https://docs.rs/strsim/) for the full documentation. You can
-also clone the repo, and run `$ cargo doc --open`.
-
-### Examples
-
-```rust
-extern crate strsim;
-
-use strsim::{hamming, levenshtein, normalized_levenshtein, osa_distance,
- damerau_levenshtein, normalized_damerau_levenshtein, jaro,
- jaro_winkler, sorensen_dice};
-
-fn main() {
- match hamming("hamming", "hammers") {
- Ok(distance) => assert_eq!(3, distance),
- Err(why) => panic!("{:?}", why)
- }
-
- assert_eq!(levenshtein("kitten", "sitting"), 3);
-
- assert!((normalized_levenshtein("kitten", "sitting") - 0.571).abs() < 0.001);
-
- assert_eq!(osa_distance("ac", "cba"), 3);
-
- assert_eq!(damerau_levenshtein("ac", "cba"), 2);
-
- assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.272).abs() <
- 0.001);
-
- assert!((jaro("Friedrich Nietzsche", "Jean-Paul Sartre") - 0.392).abs() <
- 0.001);
-
- assert!((jaro_winkler("cheeseburger", "cheese fries") - 0.911).abs() <
- 0.001);
-
- assert_eq!(sorensen_dice("web applications", "applications of the web"),
- 0.7878787878787878);
-}
-```
-
-Using the generic versions of the functions:
-
-```rust
-extern crate strsim;
-
-use strsim::generic_levenshtein;
-
-fn main() {
- assert_eq!(2, generic_levenshtein(&[1, 2, 3], &[0, 2, 5]));
-}
-```
-
-## Contributing
-
-If you don't want to install Rust itself, you can run `$ ./dev` for a
-development CLI if you have [Docker] installed.
-
-Benchmarks require a Nightly toolchain. Run `$ cargo +nightly bench`.
-
-## License
-
-[MIT](https://github.com/rapidfuzz/strsim-rs/blob/main/LICENSE)
-
-[string similarity metrics]:http://en.wikipedia.org/wiki/String_metric
-[Damerau-Levenshtein]:http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
-[Jaro and Jaro-Winkler]:http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
-[Levenshtein]:http://en.wikipedia.org/wiki/Levenshtein_distance
-[Hamming]:http://en.wikipedia.org/wiki/Hamming_distance
-[Optimal string alignment]:https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance
-[Sørensen-Dice]:http://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
-[Docker]:https://docs.docker.com/engine/installation/
diff --git a/vendor/strsim/benches/benches.rs b/vendor/strsim/benches/benches.rs
deleted file mode 100644
index 15c7041..0000000
--- a/vendor/strsim/benches/benches.rs
+++ /dev/null
@@ -1,95 +0,0 @@
-//! Benchmarks for strsim.
-
-#![feature(test)]
-
-extern crate strsim;
-extern crate test;
-use self::test::Bencher;
-
-#[bench]
-fn bench_hamming(bencher: &mut Bencher) {
- let a = "ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGG";
- let b = "CCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGC";
- bencher.iter(|| {
- strsim::hamming(a, b).unwrap();
- })
-}
-
-#[bench]
-fn bench_jaro(bencher: &mut Bencher) {
- let a = "Philosopher Friedrich Nietzsche";
- let b = "Philosopher Jean-Paul Sartre";
- bencher.iter(|| {
- strsim::jaro(a, b);
- })
-}
-
-#[bench]
-fn bench_jaro_winkler(bencher: &mut Bencher) {
- let a = "Philosopher Friedrich Nietzsche";
- let b = "Philosopher Jean-Paul Sartre";
- bencher.iter(|| {
- strsim::jaro_winkler(a, b);
- })
-}
-
-#[bench]
-fn bench_levenshtein(bencher: &mut Bencher) {
- let a = "Philosopher Friedrich Nietzsche";
- let b = "Philosopher Jean-Paul Sartre";
- bencher.iter(|| {
- strsim::levenshtein(a, b);
- })
-}
-
-#[bench]
-fn bench_levenshtein_on_u8(bencher: &mut Bencher) {
- bencher.iter(|| {
- strsim::generic_levenshtein(&vec![0u8; 30], &vec![7u8; 31]);
- })
-}
-
-#[bench]
-fn bench_normalized_levenshtein(bencher: &mut Bencher) {
- let a = "Philosopher Friedrich Nietzsche";
- let b = "Philosopher Jean-Paul Sartre";
- bencher.iter(|| {
- strsim::normalized_levenshtein(a, b);
- })
-}
-
-#[bench]
-fn bench_osa_distance(bencher: &mut Bencher) {
- let a = "Philosopher Friedrich Nietzsche";
- let b = "Philosopher Jean-Paul Sartre";
- bencher.iter(|| {
- strsim::osa_distance(a, b);
- })
-}
-
-#[bench]
-fn bench_damerau_levenshtein(bencher: &mut Bencher) {
- let a = "Philosopher Friedrich Nietzsche";
- let b = "Philosopher Jean-Paul Sartre";
- bencher.iter(|| {
- strsim::damerau_levenshtein(a, b);
- })
-}
-
-#[bench]
-fn bench_normalized_damerau_levenshtein(bencher: &mut Bencher) {
- let a = "Philosopher Friedrich Nietzsche";
- let b = "Philosopher Jean-Paul Sartre";
- bencher.iter(|| {
- strsim::normalized_damerau_levenshtein(a, b);
- })
-}
-
-#[bench]
-fn bench_sorensen_dice(bencher: &mut Bencher) {
- let a = "Philosopher Friedrich Nietzsche";
- let b = "Philosopher Jean-Paul Sartre";
- bencher.iter(|| {
- strsim::sorensen_dice(a, b);
- })
-}
diff --git a/vendor/strsim/src/lib.rs b/vendor/strsim/src/lib.rs
deleted file mode 100644
index 8118277..0000000
--- a/vendor/strsim/src/lib.rs
+++ /dev/null
@@ -1,1307 +0,0 @@
-//! This library implements string similarity metrics.
-
-#![forbid(unsafe_code)]
-#![allow(
- // these casts are sometimes needed. They restrict the length of input iterators
- // but there isn't really any way around this except for always working with
- // 128 bit types
- clippy::cast_possible_wrap,
- clippy::cast_sign_loss,
- clippy::cast_precision_loss,
- // not practical
- clippy::needless_pass_by_value,
- clippy::similar_names,
- // noisy
- clippy::missing_errors_doc,
- clippy::missing_panics_doc,
- clippy::must_use_candidate,
- // todo https://github.com/rapidfuzz/strsim-rs/issues/59
- clippy::range_plus_one
-)]
-
-use std::char;
-use std::cmp::{max, min};
-use std::collections::HashMap;
-use std::convert::TryFrom;
-use std::error::Error;
-use std::fmt::{self, Display, Formatter};
-use std::hash::Hash;
-use std::mem;
-use std::str::Chars;
-
-#[derive(Debug, PartialEq)]
-pub enum StrSimError {
- DifferentLengthArgs,
-}
-
-impl Display for StrSimError {
- fn fmt(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> {
- let text = match self {
- StrSimError::DifferentLengthArgs => "Differing length arguments provided",
- };
-
- write!(fmt, "{text}")
- }
-}
-
-impl Error for StrSimError {}
-
-pub type HammingResult = Result<usize, StrSimError>;
-
-/// Calculates the number of positions in the two sequences where the elements
-/// differ. Returns an error if the sequences have different lengths.
-pub fn generic_hamming<Iter1, Iter2, Elem1, Elem2>(a: Iter1, b: Iter2) -> HammingResult
-where
- Iter1: IntoIterator<Item = Elem1>,
- Iter2: IntoIterator<Item = Elem2>,
- Elem1: PartialEq<Elem2>,
-{
- let (mut ita, mut itb) = (a.into_iter(), b.into_iter());
- let mut count = 0;
- loop {
- match (ita.next(), itb.next()) {
- (Some(x), Some(y)) => {
- if x != y {
- count += 1;
- }
- }
- (None, None) => return Ok(count),
- _ => return Err(StrSimError::DifferentLengthArgs),
- }
- }
-}
-
-/// Calculates the number of positions in the two strings where the characters
-/// differ. Returns an error if the strings have different lengths.
-///
-/// ```
-/// use strsim::{hamming, StrSimError::DifferentLengthArgs};
-///
-/// assert_eq!(Ok(3), hamming("hamming", "hammers"));
-///
-/// assert_eq!(Err(DifferentLengthArgs), hamming("hamming", "ham"));
-/// ```
-pub fn hamming(a: &str, b: &str) -> HammingResult {
- generic_hamming(a.chars(), b.chars())
-}
-
-/// Calculates the Jaro similarity between two sequences. The returned value
-/// is between 0.0 and 1.0 (higher value means more similar).
-pub fn generic_jaro<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> f64
-where
- &'a Iter1: IntoIterator<Item = Elem1>,
- &'b Iter2: IntoIterator<Item = Elem2>,
- Elem1: PartialEq<Elem2>,
-{
- let a_len = a.into_iter().count();
- let b_len = b.into_iter().count();
-
- if a_len == 0 && b_len == 0 {
- return 1.0;
- } else if a_len == 0 || b_len == 0 {
- return 0.0;
- }
-
- let mut search_range = max(a_len, b_len) / 2;
- search_range = search_range.saturating_sub(1);
-
- // combine memory allocations to reduce runtime
- let mut flags_memory = vec![false; a_len + b_len];
- let (a_flags, b_flags) = flags_memory.split_at_mut(a_len);
-
- let mut matches = 0_usize;
-
- for (i, a_elem) in a.into_iter().enumerate() {
- // prevent integer wrapping
- let min_bound = if i > search_range {
- i - search_range
- } else {
- 0
- };
-
- let max_bound = min(b_len, i + search_range + 1);
-
- for (j, b_elem) in b.into_iter().enumerate().take(max_bound) {
- if min_bound <= j && a_elem == b_elem && !b_flags[j] {
- a_flags[i] = true;
- b_flags[j] = true;
- matches += 1;
- break;
- }
- }
- }
-
- let mut transpositions = 0_usize;
- if matches != 0 {
- let mut b_iter = b_flags.iter().zip(b);
- for (a_flag, ch1) in a_flags.iter().zip(a) {
- if *a_flag {
- loop {
- if let Some((b_flag, ch2)) = b_iter.next() {
- if !*b_flag {
- continue;
- }
-
- if ch1 != ch2 {
- transpositions += 1;
- }
- break;
- }
- }
- }
- }
- }
- transpositions /= 2;
-
- if matches == 0 {
- 0.0
- } else {
- ((matches as f64 / a_len as f64)
- + (matches as f64 / b_len as f64)
- + ((matches - transpositions) as f64 / matches as f64))
- / 3.0
- }
-}
-
-struct StringWrapper<'a>(&'a str);
-
-impl<'a, 'b> IntoIterator for &'a StringWrapper<'b> {
- type Item = char;
- type IntoIter = Chars<'b>;
-
- fn into_iter(self) -> Self::IntoIter {
- self.0.chars()
- }
-}
-
-/// Calculates the Jaro similarity between two strings. The returned value
-/// is between 0.0 and 1.0 (higher value means more similar).
-///
-/// ```
-/// use strsim::jaro;
-///
-/// assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
-/// 0.001);
-/// ```
-pub fn jaro(a: &str, b: &str) -> f64 {
- generic_jaro(&StringWrapper(a), &StringWrapper(b))
-}
-
-/// Like Jaro but gives a boost to sequences that have a common prefix.
-pub fn generic_jaro_winkler<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> f64
-where
- &'a Iter1: IntoIterator<Item = Elem1>,
- &'b Iter2: IntoIterator<Item = Elem2>,
- Elem1: PartialEq<Elem2>,
-{
- let sim = generic_jaro(a, b);
-
- if sim > 0.7 {
- let prefix_length = a
- .into_iter()
- .take(4)
- .zip(b)
- .take_while(|(a_elem, b_elem)| a_elem == b_elem)
- .count();
-
- sim + 0.1 * prefix_length as f64 * (1.0 - sim)
- } else {
- sim
- }
-}
-
-/// Like Jaro but gives a boost to strings that have a common prefix.
-///
-/// ```
-/// use strsim::jaro_winkler;
-///
-/// assert!((0.866 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
-/// 0.001);
-/// ```
-pub fn jaro_winkler(a: &str, b: &str) -> f64 {
- generic_jaro_winkler(&StringWrapper(a), &StringWrapper(b))
-}
-
-/// Calculates the minimum number of insertions, deletions, and substitutions
-/// required to change one sequence into the other.
-///
-/// ```
-/// use strsim::generic_levenshtein;
-///
-/// assert_eq!(3, generic_levenshtein(&[1,2,3], &[1,2,3,4,5,6]));
-/// ```
-pub fn generic_levenshtein<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> usize
-where
- &'a Iter1: IntoIterator<Item = Elem1>,
- &'b Iter2: IntoIterator<Item = Elem2>,
- Elem1: PartialEq<Elem2>,
-{
- let b_len = b.into_iter().count();
-
- let mut cache: Vec<usize> = (1..b_len + 1).collect();
-
- let mut result = b_len;
-
- for (i, a_elem) in a.into_iter().enumerate() {
- result = i + 1;
- let mut distance_b = i;
-
- for (j, b_elem) in b.into_iter().enumerate() {
- let cost = usize::from(a_elem != b_elem);
- let distance_a = distance_b + cost;
- distance_b = cache[j];
- result = min(result + 1, min(distance_a, distance_b + 1));
- cache[j] = result;
- }
- }
-
- result
-}
-
-/// Calculates the minimum number of insertions, deletions, and substitutions
-/// required to change one string into the other.
-///
-/// ```
-/// use strsim::levenshtein;
-///
-/// assert_eq!(3, levenshtein("kitten", "sitting"));
-/// ```
-pub fn levenshtein(a: &str, b: &str) -> usize {
- generic_levenshtein(&StringWrapper(a), &StringWrapper(b))
-}
-
-/// Calculates a normalized score of the Levenshtein algorithm between 0.0 and
-/// 1.0 (inclusive), where 1.0 means the strings are the same.
-///
-/// ```
-/// use strsim::normalized_levenshtein;
-///
-/// assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
-/// assert!((normalized_levenshtein("", "") - 1.0).abs() < 0.00001);
-/// assert!(normalized_levenshtein("", "second").abs() < 0.00001);
-/// assert!(normalized_levenshtein("first", "").abs() < 0.00001);
-/// assert!((normalized_levenshtein("string", "string") - 1.0).abs() < 0.00001);
-/// ```
-pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
- if a.is_empty() && b.is_empty() {
- return 1.0;
- }
- 1.0 - (levenshtein(a, b) as f64) / (a.chars().count().max(b.chars().count()) as f64)
-}
-
-/// Like Levenshtein but allows for adjacent transpositions. Each substring can
-/// only be edited once.
-///
-/// ```
-/// use strsim::osa_distance;
-///
-/// assert_eq!(3, osa_distance("ab", "bca"));
-/// ```
-pub fn osa_distance(a: &str, b: &str) -> usize {
- let b_len = b.chars().count();
- // 0..=b_len behaves like 0..b_len.saturating_add(1) which could be a different size
- // this leads to significantly worse code gen when swapping the vectors below
- let mut prev_two_distances: Vec<usize> = (0..b_len + 1).collect();
- let mut prev_distances: Vec<usize> = (0..b_len + 1).collect();
- let mut curr_distances: Vec<usize> = vec![0; b_len + 1];
-
- let mut prev_a_char = char::MAX;
- let mut prev_b_char = char::MAX;
-
- for (i, a_char) in a.chars().enumerate() {
- curr_distances[0] = i + 1;
-
- for (j, b_char) in b.chars().enumerate() {
- let cost = usize::from(a_char != b_char);
- curr_distances[j + 1] = min(
- curr_distances[j] + 1,
- min(prev_distances[j + 1] + 1, prev_distances[j] + cost),
- );
- if i > 0 && j > 0 && a_char != b_char && a_char == prev_b_char && b_char == prev_a_char
- {
- curr_distances[j + 1] = min(curr_distances[j + 1], prev_two_distances[j - 1] + 1);
- }
-
- prev_b_char = b_char;
- }
-
- mem::swap(&mut prev_two_distances, &mut prev_distances);
- mem::swap(&mut prev_distances, &mut curr_distances);
- prev_a_char = a_char;
- }
-
- // access prev_distances instead of curr_distances since we swapped
- // them above. In case a is empty this would still contain the correct value
- // from initializing the last element to b_len
- prev_distances[b_len]
-}
-
-/* Returns the final index for a value in a single vector that represents a fixed
-2d grid */
-fn flat_index(i: usize, j: usize, width: usize) -> usize {
- j * width + i
-}
-
-/// Like optimal string alignment, but substrings can be edited an unlimited
-/// number of times, and the triangle inequality holds.
-///
-/// ```
-/// use strsim::generic_damerau_levenshtein;
-///
-/// assert_eq!(2, generic_damerau_levenshtein(&[1,2], &[2,3,1]));
-/// ```
-pub fn generic_damerau_levenshtein<Elem>(a_elems: &[Elem], b_elems: &[Elem]) -> usize
-where
- Elem: Eq + Hash + Clone,
-{
- let a_len = a_elems.len();
- let b_len = b_elems.len();
-
- if a_len == 0 {
- return b_len;
- }
- if b_len == 0 {
- return a_len;
- }
-
- let width = a_len + 2;
- let mut distances = vec![0; (a_len + 2) * (b_len + 2)];
- let max_distance = a_len + b_len;
- distances[0] = max_distance;
-
- for i in 0..(a_len + 1) {
- distances[flat_index(i + 1, 0, width)] = max_distance;
- distances[flat_index(i + 1, 1, width)] = i;
- }
-
- for j in 0..(b_len + 1) {
- distances[flat_index(0, j + 1, width)] = max_distance;
- distances[flat_index(1, j + 1, width)] = j;
- }
-
- let mut elems: HashMap<Elem, usize> = HashMap::with_capacity(64);
-
- for i in 1..(a_len + 1) {
- let mut db = 0;
-
- for j in 1..(b_len + 1) {
- let k = match elems.get(&b_elems[j - 1]) {
- Some(&value) => value,
- None => 0,
- };
-
- let insertion_cost = distances[flat_index(i, j + 1, width)] + 1;
- let deletion_cost = distances[flat_index(i + 1, j, width)] + 1;
- let transposition_cost =
- distances[flat_index(k, db, width)] + (i - k - 1) + 1 + (j - db - 1);
-
- let mut substitution_cost = distances[flat_index(i, j, width)] + 1;
- if a_elems[i - 1] == b_elems[j - 1] {
- db = j;
- substitution_cost -= 1;
- }
-
- distances[flat_index(i + 1, j + 1, width)] = min(
- substitution_cost,
- min(insertion_cost, min(deletion_cost, transposition_cost)),
- );
- }
-
- elems.insert(a_elems[i - 1].clone(), i);
- }
-
- distances[flat_index(a_len + 1, b_len + 1, width)]
-}
-
-#[derive(Clone, Copy, PartialEq, Eq)]
-struct RowId {
- val: isize,
-}
-
-impl Default for RowId {
- fn default() -> Self {
- Self { val: -1 }
- }
-}
-
-#[derive(Default, Clone)]
-struct GrowingHashmapMapElemChar<ValueType> {
- key: u32,
- value: ValueType,
-}
-
-/// specialized hashmap to store user provided types
-/// this implementation relies on a couple of base assumptions in order to simplify the implementation
-/// - the hashmap does not have an upper limit of included items
-/// - the default value for the `ValueType` can be used as a dummy value to indicate an empty cell
-/// - elements can't be removed
-/// - only allocates memory on first write access.
-/// This improves performance for hashmaps that are never written to
-struct GrowingHashmapChar<ValueType> {
- used: i32,
- fill: i32,
- mask: i32,
- map: Option<Vec<GrowingHashmapMapElemChar<ValueType>>>,
-}
-
-impl<ValueType> Default for GrowingHashmapChar<ValueType>
-where
- ValueType: Default + Clone + Eq,
-{
- fn default() -> Self {
- Self {
- used: 0,
- fill: 0,
- mask: -1,
- map: None,
- }
- }
-}
-
-impl<ValueType> GrowingHashmapChar<ValueType>
-where
- ValueType: Default + Clone + Eq + Copy,
-{
- fn get(&self, key: u32) -> ValueType {
- self.map
- .as_ref()
- .map_or_else(|| Default::default(), |map| map[self.lookup(key)].value)
- }
-
- fn get_mut(&mut self, key: u32) -> &mut ValueType {
- if self.map.is_none() {
- self.allocate();
- }
-
- let mut i = self.lookup(key);
- if self
- .map
- .as_ref()
- .expect("map should have been created above")[i]
- .value
- == Default::default()
- {
- self.fill += 1;
- // resize when 2/3 full
- if self.fill * 3 >= (self.mask + 1) * 2 {
- self.grow((self.used + 1) * 2);
- i = self.lookup(key);
- }
-
- self.used += 1;
- }
-
- let elem = &mut self
- .map
- .as_mut()
- .expect("map should have been created above")[i];
- elem.key = key;
- &mut elem.value
- }
-
- fn allocate(&mut self) {
- self.mask = 8 - 1;
- self.map = Some(vec![GrowingHashmapMapElemChar::default(); 8]);
- }
-
- /// lookup key inside the hashmap using a similar collision resolution
- /// strategy to `CPython` and `Ruby`
- fn lookup(&self, key: u32) -> usize {
- let hash = key;
- let mut i = hash as usize & self.mask as usize;
-
- let map = self
- .map
- .as_ref()
- .expect("callers have to ensure map is allocated");
-
- if map[i].value == Default::default() || map[i].key == key {
- return i;
- }
-
- let mut perturb = key;
- loop {
- i = (i * 5 + perturb as usize + 1) & self.mask as usize;
-
- if map[i].value == Default::default() || map[i].key == key {
- return i;
- }
-
- perturb >>= 5;
- }
- }
-
- fn grow(&mut self, min_used: i32) {
- let mut new_size = self.mask + 1;
- while new_size <= min_used {
- new_size <<= 1;
- }
-
- self.fill = self.used;
- self.mask = new_size - 1;
-
- let old_map = std::mem::replace(
- self.map
- .as_mut()
- .expect("callers have to ensure map is allocated"),
- vec![GrowingHashmapMapElemChar::<ValueType>::default(); new_size as usize],
- );
-
- for elem in old_map {
- if elem.value != Default::default() {
- let j = self.lookup(elem.key);
- let new_elem = &mut self.map.as_mut().expect("map created above")[j];
- new_elem.key = elem.key;
- new_elem.value = elem.value;
- self.used -= 1;
- if self.used == 0 {
- break;
- }
- }
- }
-
- self.used = self.fill;
- }
-}
-
-struct HybridGrowingHashmapChar<ValueType> {
- map: GrowingHashmapChar<ValueType>,
- extended_ascii: [ValueType; 256],
-}
-
-impl<ValueType> HybridGrowingHashmapChar<ValueType>
-where
- ValueType: Default + Clone + Copy + Eq,
-{
- fn get(&self, key: char) -> ValueType {
- let value = key as u32;
- if value <= 255 {
- let val_u8 = u8::try_from(value).expect("we check the bounds above");
- self.extended_ascii[usize::from(val_u8)]
- } else {
- self.map.get(value)
- }
- }
-
- fn get_mut(&mut self, key: char) -> &mut ValueType {
- let value = key as u32;
- if value <= 255 {
- let val_u8 = u8::try_from(value).expect("we check the bounds above");
- &mut self.extended_ascii[usize::from(val_u8)]
- } else {
- self.map.get_mut(value)
- }
- }
-}
-
-impl<ValueType> Default for HybridGrowingHashmapChar<ValueType>
-where
- ValueType: Default + Clone + Copy + Eq,
-{
- fn default() -> Self {
- HybridGrowingHashmapChar {
- map: GrowingHashmapChar::default(),
- extended_ascii: [Default::default(); 256],
- }
- }
-}
-
-fn damerau_levenshtein_impl<Iter1, Iter2>(s1: Iter1, len1: usize, s2: Iter2, len2: usize) -> usize
-where
- Iter1: Iterator<Item = char> + Clone,
- Iter2: Iterator<Item = char> + Clone,
-{
- // The implementations is based on the paper
- // `Linear space string correction algorithm using the Damerau-Levenshtein distance`
- // from Chunchun Zhao and Sartaj Sahni
- //
- // It has a runtime complexity of `O(N*M)` and a memory usage of `O(N+M)`.
- let max_val = max(len1, len2) as isize + 1;
-
- let mut last_row_id = HybridGrowingHashmapChar::<RowId>::default();
-
- let size = len2 + 2;
- let mut fr = vec![max_val; size];
- let mut r1 = vec![max_val; size];
- let mut r: Vec<isize> = (max_val..max_val + 1)
- .chain(0..(size - 1) as isize)
- .collect();
-
- for (i, ch1) in s1.enumerate().map(|(i, ch1)| (i + 1, ch1)) {
- mem::swap(&mut r, &mut r1);
- let mut last_col_id: isize = -1;
- let mut last_i2l1 = r[1];
- r[1] = i as isize;
- let mut t = max_val;
-
- for (j, ch2) in s2.clone().enumerate().map(|(j, ch2)| (j + 1, ch2)) {
- let diag = r1[j] + isize::from(ch1 != ch2);
- let left = r[j] + 1;
- let up = r1[j + 1] + 1;
- let mut temp = min(diag, min(left, up));
-
- if ch1 == ch2 {
- last_col_id = j as isize; // last occurence of s1_i
- fr[j + 1] = r1[j - 1]; // save H_k-1,j-2
- t = last_i2l1; // save H_i-2,l-1
- } else {
- let k = last_row_id.get(ch2).val;
- let l = last_col_id;
-
- if j as isize - l == 1 {
- let transpose = fr[j + 1] + (i as isize - k);
- temp = min(temp, transpose);
- } else if i as isize - k == 1 {
- let transpose = t + (j as isize - l);
- temp = min(temp, transpose);
- }
- }
-
- last_i2l1 = r[j + 1];
- r[j + 1] = temp;
- }
- last_row_id.get_mut(ch1).val = i as isize;
- }
-
- r[len2 + 1] as usize
-}
-
-/// Like optimal string alignment, but substrings can be edited an unlimited
-/// number of times, and the triangle inequality holds.
-///
-/// ```
-/// use strsim::damerau_levenshtein;
-///
-/// assert_eq!(2, damerau_levenshtein("ab", "bca"));
-/// ```
-pub fn damerau_levenshtein(a: &str, b: &str) -> usize {
- damerau_levenshtein_impl(a.chars(), a.chars().count(), b.chars(), b.chars().count())
-}
-
-/// Calculates a normalized score of the Damerau–Levenshtein algorithm between
-/// 0.0 and 1.0 (inclusive), where 1.0 means the strings are the same.
-///
-/// ```
-/// use strsim::normalized_damerau_levenshtein;
-///
-/// assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.27272).abs() < 0.00001);
-/// assert!((normalized_damerau_levenshtein("", "") - 1.0).abs() < 0.00001);
-/// assert!(normalized_damerau_levenshtein("", "flower").abs() < 0.00001);
-/// assert!(normalized_damerau_levenshtein("tree", "").abs() < 0.00001);
-/// assert!((normalized_damerau_levenshtein("sunglasses", "sunglasses") - 1.0).abs() < 0.00001);
-/// ```
-pub fn normalized_damerau_levenshtein(a: &str, b: &str) -> f64 {
- if a.is_empty() && b.is_empty() {
- return 1.0;
- }
-
- let len1 = a.chars().count();
- let len2 = b.chars().count();
- let dist = damerau_levenshtein_impl(a.chars(), len1, b.chars(), len2);
- 1.0 - (dist as f64) / (max(len1, len2) as f64)
-}
-
-/// Returns an Iterator of char tuples.
-fn bigrams(s: &str) -> impl Iterator<Item = (char, char)> + '_ {
- s.chars().zip(s.chars().skip(1))
-}
-
-/// Calculates a Sørensen-Dice similarity distance using bigrams.
-/// See <https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient>.
-///
-/// ```
-/// use strsim::sorensen_dice;
-///
-/// assert_eq!(1.0, sorensen_dice("", ""));
-/// assert_eq!(0.0, sorensen_dice("", "a"));
-/// assert_eq!(0.0, sorensen_dice("french", "quebec"));
-/// assert_eq!(1.0, sorensen_dice("ferris", "ferris"));
-/// assert_eq!(0.8888888888888888, sorensen_dice("feris", "ferris"));
-/// ```
-pub fn sorensen_dice(a: &str, b: &str) -> f64 {
- // implementation guided by
- // https://github.com/aceakash/string-similarity/blob/f83ba3cd7bae874c20c429774e911ae8cff8bced/src/index.js#L6
-
- let a: String = a.chars().filter(|&x| !char::is_whitespace(x)).collect();
- let b: String = b.chars().filter(|&x| !char::is_whitespace(x)).collect();
-
- if a == b {
- return 1.0;
- }
-
- if a.len() < 2 || b.len() < 2 {
- return 0.0;
- }
-
- let mut a_bigrams: HashMap<(char, char), usize> = HashMap::new();
-
- for bigram in bigrams(&a) {
- *a_bigrams.entry(bigram).or_insert(0) += 1;
- }
-
- let mut intersection_size = 0_usize;
-
- for bigram in bigrams(&b) {
- a_bigrams.entry(bigram).and_modify(|bi| {
- if *bi > 0 {
- *bi -= 1;
- intersection_size += 1;
- }
- });
- }
-
- (2 * intersection_size) as f64 / (a.len() + b.len() - 2) as f64
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- macro_rules! assert_delta {
- ($x:expr, $y:expr) => {
- assert_delta!($x, $y, 1e-5);
- };
- ($x:expr, $y:expr, $d:expr) => {
- if ($x - $y).abs() > $d {
- panic!(
- "assertion failed: actual: `{}`, expected: `{}`: \
- actual not within < {} of expected",
- $x, $y, $d
- );
- }
- };
- }
-
- #[test]
- fn bigrams_iterator() {
- let mut bi = bigrams("abcde");
-
- assert_eq!(Some(('a', 'b')), bi.next());
- assert_eq!(Some(('b', 'c')), bi.next());
- assert_eq!(Some(('c', 'd')), bi.next());
- assert_eq!(Some(('d', 'e')), bi.next());
- assert_eq!(None, bi.next());
- }
-
- fn assert_hamming_dist(dist: usize, str1: &str, str2: &str) {
- assert_eq!(Ok(dist), hamming(str1, str2));
- }
-
- #[test]
- fn hamming_empty() {
- assert_hamming_dist(0, "", "")
- }
-
- #[test]
- fn hamming_same() {
- assert_hamming_dist(0, "hamming", "hamming")
- }
-
- #[test]
- fn hamming_numbers() {
- assert_eq!(Ok(1), generic_hamming(&[1, 2, 4], &[1, 2, 3]));
- }
-
- #[test]
- fn hamming_diff() {
- assert_hamming_dist(3, "hamming", "hammers")
- }
-
- #[test]
- fn hamming_diff_multibyte() {
- assert_hamming_dist(2, "hamming", "h香mmüng");
- }
-
- #[test]
- fn hamming_unequal_length() {
- assert_eq!(
- Err(StrSimError::DifferentLengthArgs),
- generic_hamming("ham".chars(), "hamming".chars())
- );
- }
-
- #[test]
- fn hamming_names() {
- assert_hamming_dist(14, "Friedrich Nietzs", "Jean-Paul Sartre")
- }
-
- #[test]
- fn jaro_both_empty() {
- assert_eq!(1.0, jaro("", ""));
- }
-
- #[test]
- fn jaro_first_empty() {
- assert_eq!(0.0, jaro("", "jaro"));
- }
-
- #[test]
- fn jaro_second_empty() {
- assert_eq!(0.0, jaro("distance", ""));
- }
-
- #[test]
- fn jaro_same() {
- assert_eq!(1.0, jaro("jaro", "jaro"));
- }
-
- #[test]
- fn jaro_multibyte() {
- assert_delta!(0.818, jaro("testabctest", "testöঙ香test"), 0.001);
- assert_delta!(0.818, jaro("testöঙ香test", "testabctest"), 0.001);
- }
-
- #[test]
- fn jaro_diff_short() {
- assert_delta!(0.767, jaro("dixon", "dicksonx"), 0.001);
- }
-
- #[test]
- fn jaro_diff_one_character() {
- assert_eq!(0.0, jaro("a", "b"));
- }
-
- #[test]
- fn jaro_same_one_character() {
- assert_eq!(1.0, jaro("a", "a"));
- }
-
- #[test]
- fn generic_jaro_diff() {
- assert_eq!(0.0, generic_jaro(&[1, 2], &[3, 4]));
- }
-
- #[test]
- fn jaro_diff_one_and_two() {
- assert_delta!(0.83, jaro("a", "ab"), 0.01);
- }
-
- #[test]
- fn jaro_diff_two_and_one() {
- assert_delta!(0.83, jaro("ab", "a"), 0.01);
- }
-
- #[test]
- fn jaro_diff_no_transposition() {
- assert_delta!(0.822, jaro("dwayne", "duane"), 0.001);
- }
-
- #[test]
- fn jaro_diff_with_transposition() {
- assert_delta!(0.944, jaro("martha", "marhta"), 0.001);
- assert_delta!(0.6, jaro("a jke", "jane a k"), 0.001);
- }
-
- #[test]
- fn jaro_names() {
- assert_delta!(
- 0.392,
- jaro("Friedrich Nietzsche", "Jean-Paul Sartre"),
- 0.001
- );
- }
-
- #[test]
- fn jaro_winkler_both_empty() {
- assert_eq!(1.0, jaro_winkler("", ""));
- }
-
- #[test]
- fn jaro_winkler_first_empty() {
- assert_eq!(0.0, jaro_winkler("", "jaro-winkler"));
- }
-
- #[test]
- fn jaro_winkler_second_empty() {
- assert_eq!(0.0, jaro_winkler("distance", ""));
- }
-
- #[test]
- fn jaro_winkler_same() {
- assert_eq!(1.0, jaro_winkler("Jaro-Winkler", "Jaro-Winkler"));
- }
-
- #[test]
- fn jaro_winkler_multibyte() {
- assert_delta!(0.89, jaro_winkler("testabctest", "testöঙ香test"), 0.001);
- assert_delta!(0.89, jaro_winkler("testöঙ香test", "testabctest"), 0.001);
- }
-
- #[test]
- fn jaro_winkler_diff_short() {
- assert_delta!(0.813, jaro_winkler("dixon", "dicksonx"), 0.001);
- assert_delta!(0.813, jaro_winkler("dicksonx", "dixon"), 0.001);
- }
-
- #[test]
- fn jaro_winkler_diff_one_character() {
- assert_eq!(0.0, jaro_winkler("a", "b"));
- }
-
- #[test]
- fn jaro_winkler_same_one_character() {
- assert_eq!(1.0, jaro_winkler("a", "a"));
- }
-
- #[test]
- fn jaro_winkler_diff_no_transposition() {
- assert_delta!(0.84, jaro_winkler("dwayne", "duane"), 0.001);
- }
-
- #[test]
- fn jaro_winkler_diff_with_transposition() {
- assert_delta!(0.961, jaro_winkler("martha", "marhta"), 0.001);
- assert_delta!(0.6, jaro_winkler("a jke", "jane a k"), 0.001);
- }
-
- #[test]
- fn jaro_winkler_names() {
- assert_delta!(
- 0.452,
- jaro_winkler("Friedrich Nietzsche", "Fran-Paul Sartre"),
- 0.001
- );
- }
-
- #[test]
- fn jaro_winkler_long_prefix() {
- assert_delta!(0.866, jaro_winkler("cheeseburger", "cheese fries"), 0.001);
- }
-
- #[test]
- fn jaro_winkler_more_names() {
- assert_delta!(0.868, jaro_winkler("Thorkel", "Thorgier"), 0.001);
- }
-
- #[test]
- fn jaro_winkler_length_of_one() {
- assert_delta!(0.738, jaro_winkler("Dinsdale", "D"), 0.001);
- }
-
- #[test]
- fn jaro_winkler_very_long_prefix() {
- assert_delta!(
- 0.98519,
- jaro_winkler("thequickbrownfoxjumpedoverx", "thequickbrownfoxjumpedovery")
- );
- }
-
- #[test]
- fn levenshtein_empty() {
- assert_eq!(0, levenshtein("", ""));
- }
-
- #[test]
- fn levenshtein_same() {
- assert_eq!(0, levenshtein("levenshtein", "levenshtein"));
- }
-
- #[test]
- fn levenshtein_diff_short() {
- assert_eq!(3, levenshtein("kitten", "sitting"));
- }
-
- #[test]
- fn levenshtein_diff_with_space() {
- assert_eq!(5, levenshtein("hello, world", "bye, world"));
- }
-
- #[test]
- fn levenshtein_diff_multibyte() {
- assert_eq!(3, levenshtein("öঙ香", "abc"));
- assert_eq!(3, levenshtein("abc", "öঙ香"));
- }
-
- #[test]
- fn levenshtein_diff_longer() {
- let a = "The quick brown fox jumped over the angry dog.";
- let b = "Lorem ipsum dolor sit amet, dicta latine an eam.";
- assert_eq!(37, levenshtein(a, b));
- }
-
- #[test]
- fn levenshtein_first_empty() {
- assert_eq!(7, levenshtein("", "sitting"));
- }
-
- #[test]
- fn levenshtein_second_empty() {
- assert_eq!(6, levenshtein("kitten", ""));
- }
-
- #[test]
- fn normalized_levenshtein_diff_short() {
- assert_delta!(0.57142, normalized_levenshtein("kitten", "sitting"));
- }
-
- #[test]
- fn normalized_levenshtein_for_empty_strings() {
- assert_delta!(1.0, normalized_levenshtein("", ""));
- }
-
- #[test]
- fn normalized_levenshtein_first_empty() {
- assert_delta!(0.0, normalized_levenshtein("", "second"));
- }
-
- #[test]
- fn normalized_levenshtein_second_empty() {
- assert_delta!(0.0, normalized_levenshtein("first", ""));
- }
-
- #[test]
- fn normalized_levenshtein_identical_strings() {
- assert_delta!(1.0, normalized_levenshtein("identical", "identical"));
- }
-
- #[test]
- fn osa_distance_empty() {
- assert_eq!(0, osa_distance("", ""));
- }
-
- #[test]
- fn osa_distance_same() {
- assert_eq!(0, osa_distance("damerau", "damerau"));
- }
-
- #[test]
- fn osa_distance_first_empty() {
- assert_eq!(7, osa_distance("", "damerau"));
- }
-
- #[test]
- fn osa_distance_second_empty() {
- assert_eq!(7, osa_distance("damerau", ""));
- }
-
- #[test]
- fn osa_distance_diff() {
- assert_eq!(3, osa_distance("ca", "abc"));
- }
-
- #[test]
- fn osa_distance_diff_short() {
- assert_eq!(3, osa_distance("damerau", "aderua"));
- }
-
- #[test]
- fn osa_distance_diff_reversed() {
- assert_eq!(3, osa_distance("aderua", "damerau"));
- }
-
- #[test]
- fn osa_distance_diff_multibyte() {
- assert_eq!(3, osa_distance("öঙ香", "abc"));
- assert_eq!(3, osa_distance("abc", "öঙ香"));
- }
-
- #[test]
- fn osa_distance_diff_unequal_length() {
- assert_eq!(6, osa_distance("damerau", "aderuaxyz"));
- }
-
- #[test]
- fn osa_distance_diff_unequal_length_reversed() {
- assert_eq!(6, osa_distance("aderuaxyz", "damerau"));
- }
-
- #[test]
- fn osa_distance_diff_comedians() {
- assert_eq!(5, osa_distance("Stewart", "Colbert"));
- }
-
- #[test]
- fn osa_distance_many_transpositions() {
- assert_eq!(4, osa_distance("abcdefghijkl", "bacedfgihjlk"));
- }
-
- #[test]
- fn osa_distance_diff_longer() {
- let a = "The quick brown fox jumped over the angry dog.";
- let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
- assert_eq!(36, osa_distance(a, b));
- }
-
- #[test]
- fn osa_distance_beginning_transposition() {
- assert_eq!(1, osa_distance("foobar", "ofobar"));
- }
-
- #[test]
- fn osa_distance_end_transposition() {
- assert_eq!(1, osa_distance("specter", "spectre"));
- }
-
- #[test]
- fn osa_distance_restricted_edit() {
- assert_eq!(4, osa_distance("a cat", "an abct"));
- }
-
- #[test]
- fn damerau_levenshtein_empty() {
- assert_eq!(0, damerau_levenshtein("", ""));
- }
-
- #[test]
- fn damerau_levenshtein_same() {
- assert_eq!(0, damerau_levenshtein("damerau", "damerau"));
- }
-
- #[test]
- fn damerau_levenshtein_first_empty() {
- assert_eq!(7, damerau_levenshtein("", "damerau"));
- }
-
- #[test]
- fn damerau_levenshtein_second_empty() {
- assert_eq!(7, damerau_levenshtein("damerau", ""));
- }
-
- #[test]
- fn damerau_levenshtein_diff() {
- assert_eq!(2, damerau_levenshtein("ca", "abc"));
- }
-
- #[test]
- fn damerau_levenshtein_diff_short() {
- assert_eq!(3, damerau_levenshtein("damerau", "aderua"));
- }
-
- #[test]
- fn damerau_levenshtein_diff_reversed() {
- assert_eq!(3, damerau_levenshtein("aderua", "damerau"));
- }
-
- #[test]
- fn damerau_levenshtein_diff_multibyte() {
- assert_eq!(3, damerau_levenshtein("öঙ香", "abc"));
- assert_eq!(3, damerau_levenshtein("abc", "öঙ香"));
- }
-
- #[test]
- fn damerau_levenshtein_diff_unequal_length() {
- assert_eq!(6, damerau_levenshtein("damerau", "aderuaxyz"));
- }
-
- #[test]
- fn damerau_levenshtein_diff_unequal_length_reversed() {
- assert_eq!(6, damerau_levenshtein("aderuaxyz", "damerau"));
- }
-
- #[test]
- fn damerau_levenshtein_diff_comedians() {
- assert_eq!(5, damerau_levenshtein("Stewart", "Colbert"));
- }
-
- #[test]
- fn damerau_levenshtein_many_transpositions() {
- assert_eq!(4, damerau_levenshtein("abcdefghijkl", "bacedfgihjlk"));
- }
-
- #[test]
- fn damerau_levenshtein_diff_longer() {
- let a = "The quick brown fox jumped over the angry dog.";
- let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
- assert_eq!(36, damerau_levenshtein(a, b));
- }
-
- #[test]
- fn damerau_levenshtein_beginning_transposition() {
- assert_eq!(1, damerau_levenshtein("foobar", "ofobar"));
- }
-
- #[test]
- fn damerau_levenshtein_end_transposition() {
- assert_eq!(1, damerau_levenshtein("specter", "spectre"));
- }
-
- #[test]
- fn damerau_levenshtein_unrestricted_edit() {
- assert_eq!(3, damerau_levenshtein("a cat", "an abct"));
- }
-
- #[test]
- fn normalized_damerau_levenshtein_diff_short() {
- assert_delta!(
- 0.27272,
- normalized_damerau_levenshtein("levenshtein", "löwenbräu")
- );
- }
-
- #[test]
- fn normalized_damerau_levenshtein_for_empty_strings() {
- assert_delta!(1.0, normalized_damerau_levenshtein("", ""));
- }
-
- #[test]
- fn normalized_damerau_levenshtein_first_empty() {
- assert_delta!(0.0, normalized_damerau_levenshtein("", "flower"));
- }
-
- #[test]
- fn normalized_damerau_levenshtein_second_empty() {
- assert_delta!(0.0, normalized_damerau_levenshtein("tree", ""));
- }
-
- #[test]
- fn normalized_damerau_levenshtein_identical_strings() {
- assert_delta!(
- 1.0,
- normalized_damerau_levenshtein("sunglasses", "sunglasses")
- );
- }
-
- #[test]
- fn sorensen_dice_all() {
- // test cases taken from
- // https://github.com/aceakash/string-similarity/blob/f83ba3cd7bae874c20c429774e911ae8cff8bced/src/spec/index.spec.js#L11
-
- assert_delta!(1.0, sorensen_dice("a", "a"));
- assert_delta!(0.0, sorensen_dice("a", "b"));
- assert_delta!(1.0, sorensen_dice("", ""));
- assert_delta!(0.0, sorensen_dice("a", ""));
- assert_delta!(0.0, sorensen_dice("", "a"));
- assert_delta!(1.0, sorensen_dice("apple event", "apple event"));
- assert_delta!(0.90909, sorensen_dice("iphone", "iphone x"));
- assert_delta!(0.0, sorensen_dice("french", "quebec"));
- assert_delta!(1.0, sorensen_dice("france", "france"));
- assert_delta!(0.2, sorensen_dice("fRaNce", "france"));
- assert_delta!(0.8, sorensen_dice("healed", "sealed"));
- assert_delta!(
- 0.78788,
- sorensen_dice("web applications", "applications of the web")
- );
- assert_delta!(
- 0.92,
- sorensen_dice(
- "this will have a typo somewhere",
- "this will huve a typo somewhere"
- )
- );
- assert_delta!(
- 0.60606,
- sorensen_dice(
- "Olive-green table for sale, in extremely good condition.",
- "For sale: table in very good condition, olive green in colour."
- )
- );
- assert_delta!(
- 0.25581,
- sorensen_dice(
- "Olive-green table for sale, in extremely good condition.",
- "For sale: green Subaru Impreza, 210,000 miles"
- )
- );
- assert_delta!(
- 0.14118,
- sorensen_dice(
- "Olive-green table for sale, in extremely good condition.",
- "Wanted: mountain bike with at least 21 gears."
- )
- );
- assert_delta!(
- 0.77419,
- sorensen_dice("this has one extra word", "this has one word")
- );
- }
-}
diff --git a/vendor/strsim/tests/lib.rs b/vendor/strsim/tests/lib.rs
deleted file mode 100644
index 991fc6f..0000000
--- a/vendor/strsim/tests/lib.rs
+++ /dev/null
@@ -1,71 +0,0 @@
-extern crate strsim;
-
-use strsim::{
- damerau_levenshtein, hamming, jaro, jaro_winkler, levenshtein, normalized_damerau_levenshtein,
- normalized_levenshtein, osa_distance,
-};
-
-macro_rules! assert_delta {
- ($x:expr, $y:expr) => {
- assert_delta!($x, $y, 1e-5);
- };
- ($x:expr, $y:expr, $d:expr) => {
- if ($x - $y).abs() > $d {
- panic!(
- "assertion failed: actual: `{}`, expected: `{}`: \
- actual not within < {} of expected",
- $x, $y, $d
- );
- }
- };
-}
-
-#[test]
-fn hamming_works() {
- match hamming("hamming", "hammers") {
- Ok(distance) => assert_eq!(3, distance),
- Err(why) => panic!("{:?}", why),
- }
-}
-
-#[test]
-fn levenshtein_works() {
- assert_eq!(3, levenshtein("kitten", "sitting"));
-}
-
-#[test]
-fn normalized_levenshtein_works() {
- assert_delta!(0.57142, normalized_levenshtein("kitten", "sitting"));
-}
-
-#[test]
-fn osa_distance_works() {
- assert_eq!(3, osa_distance("ac", "cba"));
-}
-
-#[test]
-fn damerau_levenshtein_works() {
- assert_eq!(2, damerau_levenshtein("ac", "cba"));
-}
-
-#[test]
-fn normalized_damerau_levenshtein_works() {
- assert_delta!(
- 0.27272,
- normalized_damerau_levenshtein("levenshtein", "löwenbräu")
- );
-}
-
-#[test]
-fn jaro_works() {
- assert_delta!(
- 0.392,
- jaro("Friedrich Nietzsche", "Jean-Paul Sartre"),
- 0.001
- );
-}
-
-#[test]
-fn jaro_winkler_works() {
- assert_delta!(0.866, jaro_winkler("cheeseburger", "cheese fries"), 0.001);
-}