aboutsummaryrefslogtreecommitdiff
path: root/vendor/textwrap/src/word_separators.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/textwrap/src/word_separators.rs')
-rw-r--r--vendor/textwrap/src/word_separators.rs428
1 files changed, 0 insertions, 428 deletions
diff --git a/vendor/textwrap/src/word_separators.rs b/vendor/textwrap/src/word_separators.rs
deleted file mode 100644
index 25adf31..0000000
--- a/vendor/textwrap/src/word_separators.rs
+++ /dev/null
@@ -1,428 +0,0 @@
-//! Functionality for finding words.
-//!
-//! In order to wrap text, we need to know where the legal break
-//! points are, i.e., where the words of the text are. This means that
-//! we need to define what a "word" is.
-//!
-//! A simple approach is to simply split the text on whitespace, but
-//! this does not work for East-Asian languages such as Chinese or
-//! Japanese where there are no spaces between words. Breaking a long
-//! sequence of emojis is another example where line breaks might be
-//! wanted even if there are no whitespace to be found.
-//!
-//! The [`WordSeparator`] trait is responsible for determining where
-//! there words are in a line of text. Please refer to the trait and
-//! the structs which implement it for more information.
-
-#[cfg(feature = "unicode-linebreak")]
-use crate::core::skip_ansi_escape_sequence;
-use crate::core::Word;
-
-/// Describes where words occur in a line of text.
-///
-/// The simplest approach is say that words are separated by one or
-/// more ASCII spaces (`' '`). This works for Western languages
-/// without emojis. A more complex approach is to use the Unicode line
-/// breaking algorithm, which finds break points in non-ASCII text.
-///
-/// The line breaks occur between words, please see
-/// [`WordSplitter`](crate::WordSplitter) for options of how to handle
-/// hyphenation of individual words.
-///
-/// # Examples
-///
-/// ```
-/// use textwrap::core::Word;
-/// use textwrap::WordSeparator::AsciiSpace;
-///
-/// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>();
-/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]);
-/// ```
-#[derive(Clone, Copy)]
-pub enum WordSeparator {
- /// Find words by splitting on runs of `' '` characters.
- ///
- /// # Examples
- ///
- /// ```
- /// use textwrap::core::Word;
- /// use textwrap::WordSeparator::AsciiSpace;
- ///
- /// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>();
- /// assert_eq!(words, vec![Word::from("Hello "),
- /// Word::from("World!")]);
- /// ```
- AsciiSpace,
-
- /// Split `line` into words using Unicode break properties.
- ///
- /// This word separator uses the Unicode line breaking algorithm
- /// described in [Unicode Standard Annex
- /// #14](https://www.unicode.org/reports/tr14/) to find legal places
- /// to break lines. There is a small difference in that the U+002D
- /// (Hyphen-Minus) and U+00AD (Soft Hyphen) don’t create a line break:
- /// to allow a line break at a hyphen, use
- /// [`WordSplitter::HyphenSplitter`](crate::WordSplitter::HyphenSplitter).
- /// Soft hyphens are not currently supported.
- ///
- /// # Examples
- ///
- /// Unlike [`WordSeparator::AsciiSpace`], the Unicode line
- /// breaking algorithm will find line break opportunities between
- /// some characters with no intervening whitespace:
- ///
- /// ```
- /// #[cfg(feature = "unicode-linebreak")] {
- /// use textwrap::core::Word;
- /// use textwrap::WordSeparator::UnicodeBreakProperties;
- ///
- /// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂😍").collect::<Vec<_>>(),
- /// vec![Word::from("Emojis: "),
- /// Word::from("😂"),
- /// Word::from("😍")]);
- ///
- /// assert_eq!(UnicodeBreakProperties.find_words("CJK: 你好").collect::<Vec<_>>(),
- /// vec![Word::from("CJK: "),
- /// Word::from("你"),
- /// Word::from("好")]);
- /// }
- /// ```
- ///
- /// A U+2060 (Word Joiner) character can be inserted if you want to
- /// manually override the defaults and keep the characters together:
- ///
- /// ```
- /// #[cfg(feature = "unicode-linebreak")] {
- /// use textwrap::core::Word;
- /// use textwrap::WordSeparator::UnicodeBreakProperties;
- ///
- /// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂\u{2060}😍").collect::<Vec<_>>(),
- /// vec![Word::from("Emojis: "),
- /// Word::from("😂\u{2060}😍")]);
- /// }
- /// ```
- ///
- /// The Unicode line breaking algorithm will also automatically
- /// suppress break breaks around certain punctuation characters::
- ///
- /// ```
- /// #[cfg(feature = "unicode-linebreak")] {
- /// use textwrap::core::Word;
- /// use textwrap::WordSeparator::UnicodeBreakProperties;
- ///
- /// assert_eq!(UnicodeBreakProperties.find_words("[ foo ] bar !").collect::<Vec<_>>(),
- /// vec![Word::from("[ foo ] "),
- /// Word::from("bar !")]);
- /// }
- /// ```
- #[cfg(feature = "unicode-linebreak")]
- UnicodeBreakProperties,
-
- /// Find words using a custom word separator
- Custom(fn(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_>),
-}
-
-impl std::fmt::Debug for WordSeparator {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- match self {
- WordSeparator::AsciiSpace => f.write_str("AsciiSpace"),
- #[cfg(feature = "unicode-linebreak")]
- WordSeparator::UnicodeBreakProperties => f.write_str("UnicodeBreakProperties"),
- WordSeparator::Custom(_) => f.write_str("Custom(...)"),
- }
- }
-}
-
-impl WordSeparator {
- // This function should really return impl Iterator<Item = Word>, but
- // this isn't possible until Rust supports higher-kinded types:
- // https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md
- /// Find all words in `line`.
- pub fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
- match self {
- WordSeparator::AsciiSpace => find_words_ascii_space(line),
- #[cfg(feature = "unicode-linebreak")]
- WordSeparator::UnicodeBreakProperties => find_words_unicode_break_properties(line),
- WordSeparator::Custom(func) => func(line),
- }
- }
-}
-
-fn find_words_ascii_space<'a>(line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
- let mut start = 0;
- let mut in_whitespace = false;
- let mut char_indices = line.char_indices();
-
- Box::new(std::iter::from_fn(move || {
- // for (idx, ch) in char_indices does not work, gives this
- // error:
- //
- // > cannot move out of `char_indices`, a captured variable in
- // > an `FnMut` closure
- #[allow(clippy::while_let_on_iterator)]
- while let Some((idx, ch)) = char_indices.next() {
- if in_whitespace && ch != ' ' {
- let word = Word::from(&line[start..idx]);
- start = idx;
- in_whitespace = ch == ' ';
- return Some(word);
- }
-
- in_whitespace = ch == ' ';
- }
-
- if start < line.len() {
- let word = Word::from(&line[start..]);
- start = line.len();
- return Some(word);
- }
-
- None
- }))
-}
-
-// Strip all ANSI escape sequences from `text`.
-#[cfg(feature = "unicode-linebreak")]
-fn strip_ansi_escape_sequences(text: &str) -> String {
- let mut result = String::with_capacity(text.len());
-
- let mut chars = text.chars();
- while let Some(ch) = chars.next() {
- if skip_ansi_escape_sequence(ch, &mut chars) {
- continue;
- }
- result.push(ch);
- }
-
- result
-}
-
-/// Soft hyphen, also knows as a “shy hyphen”. Should show up as ‘-’
-/// if a line is broken at this point, and otherwise be invisible.
-/// Textwrap does not currently support breaking words at soft
-/// hyphens.
-#[cfg(feature = "unicode-linebreak")]
-const SHY: char = '\u{00ad}';
-
-/// Find words in line. ANSI escape sequences are ignored in `line`.
-#[cfg(feature = "unicode-linebreak")]
-fn find_words_unicode_break_properties<'a>(
- line: &'a str,
-) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
- // Construct an iterator over (original index, stripped index)
- // tuples. We find the Unicode linebreaks on a stripped string,
- // but we need the original indices so we can form words based on
- // the original string.
- let mut last_stripped_idx = 0;
- let mut char_indices = line.char_indices();
- let mut idx_map = std::iter::from_fn(move || match char_indices.next() {
- Some((orig_idx, ch)) => {
- let stripped_idx = last_stripped_idx;
- if !skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
- last_stripped_idx += ch.len_utf8();
- }
- Some((orig_idx, stripped_idx))
- }
- None => None,
- });
-
- let stripped = strip_ansi_escape_sequences(line);
- let mut opportunities = unicode_linebreak::linebreaks(&stripped)
- .filter(|(idx, _)| {
- #[allow(clippy::match_like_matches_macro)]
- match &stripped[..*idx].chars().next_back() {
- // We suppress breaks at ‘-’ since we want to control
- // this via the WordSplitter.
- Some('-') => false,
- // Soft hyphens are currently not supported since we
- // require all `Word` fragments to be continuous in
- // the input string.
- Some(SHY) => false,
- // Other breaks should be fine!
- _ => true,
- }
- })
- .collect::<Vec<_>>()
- .into_iter();
-
- // Remove final break opportunity, we will add it below using
- // &line[start..]; This ensures that we correctly include a
- // trailing ANSI escape sequence.
- opportunities.next_back();
-
- let mut start = 0;
- Box::new(std::iter::from_fn(move || {
- #[allow(clippy::while_let_on_iterator)]
- while let Some((idx, _)) = opportunities.next() {
- if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx) {
- let word = Word::from(&line[start..orig_idx]);
- start = orig_idx;
- return Some(word);
- }
- }
-
- if start < line.len() {
- let word = Word::from(&line[start..]);
- start = line.len();
- return Some(word);
- }
-
- None
- }))
-}
-
-#[cfg(test)]
-mod tests {
- use super::WordSeparator::*;
- use super::*;
-
- // Like assert_eq!, but the left expression is an iterator.
- macro_rules! assert_iter_eq {
- ($left:expr, $right:expr) => {
- assert_eq!($left.collect::<Vec<_>>(), $right);
- };
- }
-
- fn to_words<'a>(words: Vec<&'a str>) -> Vec<Word<'a>> {
- words.into_iter().map(|w: &str| Word::from(&w)).collect()
- }
-
- macro_rules! test_find_words {
- ($ascii_name:ident,
- $unicode_name:ident,
- $([ $line:expr, $ascii_words:expr, $unicode_words:expr ]),+) => {
- #[test]
- fn $ascii_name() {
- $(
- let expected_words = to_words($ascii_words.to_vec());
- let actual_words = WordSeparator::AsciiSpace
- .find_words($line)
- .collect::<Vec<_>>();
- assert_eq!(actual_words, expected_words, "Line: {:?}", $line);
- )+
- }
-
- #[test]
- #[cfg(feature = "unicode-linebreak")]
- fn $unicode_name() {
- $(
- let expected_words = to_words($unicode_words.to_vec());
- let actual_words = WordSeparator::UnicodeBreakProperties
- .find_words($line)
- .collect::<Vec<_>>();
- assert_eq!(actual_words, expected_words, "Line: {:?}", $line);
- )+
- }
- };
- }
-
- test_find_words!(ascii_space_empty, unicode_empty, ["", [], []]);
-
- test_find_words!(
- ascii_single_word,
- unicode_single_word,
- ["foo", ["foo"], ["foo"]]
- );
-
- test_find_words!(
- ascii_two_words,
- unicode_two_words,
- ["foo bar", ["foo ", "bar"], ["foo ", "bar"]]
- );
-
- test_find_words!(
- ascii_multiple_words,
- unicode_multiple_words,
- ["foo bar", ["foo ", "bar"], ["foo ", "bar"]],
- ["x y z", ["x ", "y ", "z"], ["x ", "y ", "z"]]
- );
-
- test_find_words!(
- ascii_only_whitespace,
- unicode_only_whitespace,
- [" ", [" "], [" "]],
- [" ", [" "], [" "]]
- );
-
- test_find_words!(
- ascii_inter_word_whitespace,
- unicode_inter_word_whitespace,
- ["foo bar", ["foo ", "bar"], ["foo ", "bar"]]
- );
-
- test_find_words!(
- ascii_trailing_whitespace,
- unicode_trailing_whitespace,
- ["foo ", ["foo "], ["foo "]]
- );
-
- test_find_words!(
- ascii_leading_whitespace,
- unicode_leading_whitespace,
- [" foo", [" ", "foo"], [" ", "foo"]]
- );
-
- test_find_words!(
- ascii_multi_column_char,
- unicode_multi_column_char,
- ["\u{1f920}", ["\u{1f920}"], ["\u{1f920}"]] // cowboy emoji 🤠
- );
-
- test_find_words!(
- ascii_hyphens,
- unicode_hyphens,
- ["foo-bar", ["foo-bar"], ["foo-bar"]],
- ["foo- bar", ["foo- ", "bar"], ["foo- ", "bar"]],
- ["foo - bar", ["foo ", "- ", "bar"], ["foo ", "- ", "bar"]],
- ["foo -bar", ["foo ", "-bar"], ["foo ", "-bar"]]
- );
-
- test_find_words!(
- ascii_newline,
- unicode_newline,
- ["foo\nbar", ["foo\nbar"], ["foo\n", "bar"]]
- );
-
- test_find_words!(
- ascii_tab,
- unicode_tab,
- ["foo\tbar", ["foo\tbar"], ["foo\t", "bar"]]
- );
-
- test_find_words!(
- ascii_non_breaking_space,
- unicode_non_breaking_space,
- ["foo\u{00A0}bar", ["foo\u{00A0}bar"], ["foo\u{00A0}bar"]]
- );
-
- #[test]
- #[cfg(unix)]
- fn find_words_colored_text() {
- use termion::color::{Blue, Fg, Green, Reset};
-
- let green_hello = format!("{}Hello{} ", Fg(Green), Fg(Reset));
- let blue_world = format!("{}World!{}", Fg(Blue), Fg(Reset));
- assert_iter_eq!(
- AsciiSpace.find_words(&format!("{}{}", green_hello, blue_world)),
- vec![Word::from(&green_hello), Word::from(&blue_world)]
- );
-
- #[cfg(feature = "unicode-linebreak")]
- assert_iter_eq!(
- UnicodeBreakProperties.find_words(&format!("{}{}", green_hello, blue_world)),
- vec![Word::from(&green_hello), Word::from(&blue_world)]
- );
- }
-
- #[test]
- fn find_words_color_inside_word() {
- let text = "foo\u{1b}[0m\u{1b}[32mbar\u{1b}[0mbaz";
- assert_iter_eq!(AsciiSpace.find_words(&text), vec![Word::from(text)]);
-
- #[cfg(feature = "unicode-linebreak")]
- assert_iter_eq!(
- UnicodeBreakProperties.find_words(&text),
- vec![Word::from(text)]
- );
- }
-}