diff options
Diffstat (limited to 'vendor/textwrap/src/word_splitters.rs')
-rw-r--r-- | vendor/textwrap/src/word_splitters.rs | 314 |
1 files changed, 0 insertions, 314 deletions
diff --git a/vendor/textwrap/src/word_splitters.rs b/vendor/textwrap/src/word_splitters.rs deleted file mode 100644 index 69e246f..0000000 --- a/vendor/textwrap/src/word_splitters.rs +++ /dev/null @@ -1,314 +0,0 @@ -//! Word splitting functionality. -//! -//! To wrap text into lines, long words sometimes need to be split -//! across lines. The [`WordSplitter`] enum defines this -//! functionality. - -use crate::core::{display_width, Word}; - -/// The `WordSplitter` enum describes where words can be split. -/// -/// If the textwrap crate has been compiled with the `hyphenation` -/// Cargo feature enabled, you will find a -/// [`WordSplitter::Hyphenation`] variant. Use this struct for -/// language-aware hyphenation: -/// -/// ``` -/// #[cfg(feature = "hyphenation")] { -/// use hyphenation::{Language, Load, Standard}; -/// use textwrap::{wrap, Options, WordSplitter}; -/// -/// let text = "Oxidation is the loss of electrons."; -/// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); -/// let options = Options::new(8).word_splitter(WordSplitter::Hyphenation(dictionary)); -/// assert_eq!(wrap(text, &options), vec!["Oxida-", -/// "tion is", -/// "the loss", -/// "of elec-", -/// "trons."]); -/// } -/// ``` -/// -/// Please see the documentation for the [hyphenation] crate for more -/// details. -/// -/// [hyphenation]: https://docs.rs/hyphenation/ -#[derive(Clone)] -pub enum WordSplitter { - /// Use this as a [`Options.word_splitter`] to avoid any kind of - /// hyphenation: - /// - /// ``` - /// use textwrap::{wrap, Options, WordSplitter}; - /// - /// let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation); - /// assert_eq!(wrap("foo bar-baz", &options), - /// vec!["foo", "bar-baz"]); - /// ``` - /// - /// [`Options.word_splitter`]: super::Options::word_splitter - NoHyphenation, - - /// `HyphenSplitter` is the default `WordSplitter` used by - /// [`Options::new`](super::Options::new). It will split words on - /// existing hyphens in the word. - /// - /// It will only use hyphens that are surrounded by alphanumeric - /// characters, which prevents a word like `"--foo-bar"` from - /// being split into `"--"` and `"foo-bar"`. - /// - /// # Examples - /// - /// ``` - /// use textwrap::WordSplitter; - /// - /// assert_eq!(WordSplitter::HyphenSplitter.split_points("--foo-bar"), - /// vec![6]); - /// ``` - HyphenSplitter, - - /// Use a custom function as the word splitter. - /// - /// This varian lets you implement a custom word splitter using - /// your own function. - /// - /// # Examples - /// - /// ``` - /// use textwrap::WordSplitter; - /// - /// fn split_at_underscore(word: &str) -> Vec<usize> { - /// word.match_indices('_').map(|(idx, _)| idx + 1).collect() - /// } - /// - /// let word_splitter = WordSplitter::Custom(split_at_underscore); - /// assert_eq!(word_splitter.split_points("a_long_identifier"), - /// vec![2, 7]); - /// ``` - Custom(fn(word: &str) -> Vec<usize>), - - /// A hyphenation dictionary can be used to do language-specific - /// hyphenation using patterns from the [hyphenation] crate. - /// - /// **Note:** Only available when the `hyphenation` Cargo feature is - /// enabled. - /// - /// [hyphenation]: https://docs.rs/hyphenation/ - #[cfg(feature = "hyphenation")] - Hyphenation(hyphenation::Standard), -} - -impl std::fmt::Debug for WordSplitter { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - WordSplitter::NoHyphenation => f.write_str("NoHyphenation"), - WordSplitter::HyphenSplitter => f.write_str("HyphenSplitter"), - WordSplitter::Custom(_) => f.write_str("Custom(...)"), - #[cfg(feature = "hyphenation")] - WordSplitter::Hyphenation(dict) => write!(f, "Hyphenation({})", dict.language()), - } - } -} - -impl PartialEq<WordSplitter> for WordSplitter { - fn eq(&self, other: &WordSplitter) -> bool { - match (self, other) { - (WordSplitter::NoHyphenation, WordSplitter::NoHyphenation) => true, - (WordSplitter::HyphenSplitter, WordSplitter::HyphenSplitter) => true, - #[cfg(feature = "hyphenation")] - (WordSplitter::Hyphenation(this_dict), WordSplitter::Hyphenation(other_dict)) => { - this_dict.language() == other_dict.language() - } - (_, _) => false, - } - } -} - -impl WordSplitter { - /// Return all possible indices where `word` can be split. - /// - /// The indices are in the range `0..word.len()`. They point to - /// the index _after_ the split point, i.e., after `-` if - /// splitting on hyphens. This way, `word.split_at(idx)` will - /// break the word into two well-formed pieces. - /// - /// # Examples - /// - /// ``` - /// use textwrap::WordSplitter; - /// assert_eq!(WordSplitter::NoHyphenation.split_points("cannot-be-split"), vec![]); - /// assert_eq!(WordSplitter::HyphenSplitter.split_points("can-be-split"), vec![4, 7]); - /// assert_eq!(WordSplitter::Custom(|word| vec![word.len()/2]).split_points("middle"), vec![3]); - /// ``` - pub fn split_points(&self, word: &str) -> Vec<usize> { - match self { - WordSplitter::NoHyphenation => Vec::new(), - WordSplitter::HyphenSplitter => { - let mut splits = Vec::new(); - - for (idx, _) in word.match_indices('-') { - // We only use hyphens that are surrounded by alphanumeric - // characters. This is to avoid splitting on repeated hyphens, - // such as those found in --foo-bar. - let prev = word[..idx].chars().next_back(); - let next = word[idx + 1..].chars().next(); - - if prev.filter(|ch| ch.is_alphanumeric()).is_some() - && next.filter(|ch| ch.is_alphanumeric()).is_some() - { - splits.push(idx + 1); // +1 due to width of '-'. - } - } - - splits - } - WordSplitter::Custom(splitter_func) => splitter_func(word), - #[cfg(feature = "hyphenation")] - WordSplitter::Hyphenation(dictionary) => { - use hyphenation::Hyphenator; - dictionary.hyphenate(word).breaks - } - } - } -} - -/// Split words into smaller words according to the split points given -/// by `word_splitter`. -/// -/// Note that we split all words, regardless of their length. This is -/// to more cleanly separate the business of splitting (including -/// automatic hyphenation) from the business of word wrapping. -pub fn split_words<'a, I>( - words: I, - word_splitter: &'a WordSplitter, -) -> impl Iterator<Item = Word<'a>> -where - I: IntoIterator<Item = Word<'a>>, -{ - words.into_iter().flat_map(move |word| { - let mut prev = 0; - let mut split_points = word_splitter.split_points(&word).into_iter(); - std::iter::from_fn(move || { - if let Some(idx) = split_points.next() { - let need_hyphen = !word[..idx].ends_with('-'); - let w = Word { - word: &word.word[prev..idx], - width: display_width(&word[prev..idx]), - whitespace: "", - penalty: if need_hyphen { "-" } else { "" }, - }; - prev = idx; - return Some(w); - } - - if prev < word.word.len() || prev == 0 { - let w = Word { - word: &word.word[prev..], - width: display_width(&word[prev..]), - whitespace: word.whitespace, - penalty: word.penalty, - }; - prev = word.word.len() + 1; - return Some(w); - } - - None - }) - }) -} - -#[cfg(test)] -mod tests { - use super::*; - - // Like assert_eq!, but the left expression is an iterator. - macro_rules! assert_iter_eq { - ($left:expr, $right:expr) => { - assert_eq!($left.collect::<Vec<_>>(), $right); - }; - } - - #[test] - fn split_words_no_words() { - assert_iter_eq!(split_words(vec![], &WordSplitter::HyphenSplitter), vec![]); - } - - #[test] - fn split_words_empty_word() { - assert_iter_eq!( - split_words(vec![Word::from(" ")], &WordSplitter::HyphenSplitter), - vec![Word::from(" ")] - ); - } - - #[test] - fn split_words_single_word() { - assert_iter_eq!( - split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter), - vec![Word::from("foobar")] - ); - } - - #[test] - fn split_words_hyphen_splitter() { - assert_iter_eq!( - split_words(vec![Word::from("foo-bar")], &WordSplitter::HyphenSplitter), - vec![Word::from("foo-"), Word::from("bar")] - ); - } - - #[test] - fn split_words_no_hyphenation() { - assert_iter_eq!( - split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation), - vec![Word::from("foo-bar")] - ); - } - - #[test] - fn split_words_adds_penalty() { - let fixed_split_point = |_: &str| vec![3]; - - assert_iter_eq!( - split_words( - vec![Word::from("foobar")].into_iter(), - &WordSplitter::Custom(fixed_split_point) - ), - vec![ - Word { - word: "foo", - width: 3, - whitespace: "", - penalty: "-" - }, - Word { - word: "bar", - width: 3, - whitespace: "", - penalty: "" - } - ] - ); - - assert_iter_eq!( - split_words( - vec![Word::from("fo-bar")].into_iter(), - &WordSplitter::Custom(fixed_split_point) - ), - vec![ - Word { - word: "fo-", - width: 3, - whitespace: "", - penalty: "" - }, - Word { - word: "bar", - width: 3, - whitespace: "", - penalty: "" - } - ] - ); - } -} |