diff options
Diffstat (limited to 'vendor/textwrap/src')
-rw-r--r-- | vendor/textwrap/src/core.rs | 433 | ||||
-rw-r--r-- | vendor/textwrap/src/indentation.rs | 347 | ||||
-rw-r--r-- | vendor/textwrap/src/lib.rs | 1847 | ||||
-rw-r--r-- | vendor/textwrap/src/word_separators.rs | 428 | ||||
-rw-r--r-- | vendor/textwrap/src/word_splitters.rs | 314 | ||||
-rw-r--r-- | vendor/textwrap/src/wrap_algorithms.rs | 381 | ||||
-rw-r--r-- | vendor/textwrap/src/wrap_algorithms/optimal_fit.rs | 433 |
7 files changed, 0 insertions, 4183 deletions
diff --git a/vendor/textwrap/src/core.rs b/vendor/textwrap/src/core.rs deleted file mode 100644 index 0ab4ef8..0000000 --- a/vendor/textwrap/src/core.rs +++ /dev/null @@ -1,433 +0,0 @@ -//! Building blocks for advanced wrapping functionality. -//! -//! The functions and structs in this module can be used to implement -//! advanced wrapping functionality when the [`wrap`](super::wrap) and -//! [`fill`](super::fill) function don't do what you want. -//! -//! In general, you want to follow these steps when wrapping -//! something: -//! -//! 1. Split your input into [`Fragment`]s. These are abstract blocks -//! of text or content which can be wrapped into lines. See -//! [`WordSeparator`](crate::word_separators::WordSeparator) for -//! how to do this for text. -//! -//! 2. Potentially split your fragments into smaller pieces. This -//! allows you to implement things like hyphenation. If you use the -//! `Word` type, you can use [`WordSplitter`](crate::WordSplitter) -//! enum for this. -//! -//! 3. Potentially break apart fragments that are still too large to -//! fit on a single line. This is implemented in [`break_words`]. -//! -//! 4. Finally take your fragments and put them into lines. There are -//! two algorithms for this in the -//! [`wrap_algorithms`](crate::wrap_algorithms) module: -//! [`wrap_optimal_fit`](crate::wrap_algorithms::wrap_optimal_fit) -//! and [`wrap_first_fit`](crate::wrap_algorithms::wrap_first_fit). -//! The former produces better line breaks, the latter is faster. -//! -//! 5. Iterate through the slices returned by the wrapping functions -//! and construct your lines of output. -//! -//! Please [open an issue](https://github.com/mgeisler/textwrap/) if -//! the functionality here is not sufficient or if you have ideas for -//! improving it. We would love to hear from you! - -/// The CSI or “Control Sequence Introducer” introduces an ANSI escape -/// sequence. This is typically used for colored text and will be -/// ignored when computing the text width. -const CSI: (char, char) = ('\x1b', '['); -/// The final bytes of an ANSI escape sequence must be in this range. -const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e'; - -/// Skip ANSI escape sequences. The `ch` is the current `char`, the -/// `chars` provide the following characters. The `chars` will be -/// modified if `ch` is the start of an ANSI escape sequence. -#[inline] -pub(crate) fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool { - if ch == CSI.0 && chars.next() == Some(CSI.1) { - // We have found the start of an ANSI escape code, typically - // used for colored terminal text. We skip until we find a - // "final byte" in the range 0x40–0x7E. - for ch in chars { - if ANSI_FINAL_BYTE.contains(&ch) { - return true; - } - } - } - false -} - -#[cfg(feature = "unicode-width")] -#[inline] -fn ch_width(ch: char) -> usize { - unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0) -} - -/// First character which [`ch_width`] will classify as double-width. -/// Please see [`display_width`]. -#[cfg(not(feature = "unicode-width"))] -const DOUBLE_WIDTH_CUTOFF: char = '\u{1100}'; - -#[cfg(not(feature = "unicode-width"))] -#[inline] -fn ch_width(ch: char) -> usize { - if ch < DOUBLE_WIDTH_CUTOFF { - 1 - } else { - 2 - } -} - -/// Compute the display width of `text` while skipping over ANSI -/// escape sequences. -/// -/// # Examples -/// -/// ``` -/// use textwrap::core::display_width; -/// -/// assert_eq!(display_width("Café Plain"), 10); -/// assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10); -/// ``` -/// -/// **Note:** When the `unicode-width` Cargo feature is disabled, the -/// width of a `char` is determined by a crude approximation which -/// simply counts chars below U+1100 as 1 column wide, and all other -/// characters as 2 columns wide. With the feature enabled, function -/// will correctly deal with [combining characters] in their -/// decomposed form (see [Unicode equivalence]). -/// -/// An example of a decomposed character is “é”, which can be -/// decomposed into: “e” followed by a combining acute accent: “◌́”. -/// Without the `unicode-width` Cargo feature, every `char` below -/// U+1100 has a width of 1. This includes the combining accent: -/// -/// ``` -/// use textwrap::core::display_width; -/// -/// assert_eq!(display_width("Cafe Plain"), 10); -/// #[cfg(feature = "unicode-width")] -/// assert_eq!(display_width("Cafe\u{301} Plain"), 10); -/// #[cfg(not(feature = "unicode-width"))] -/// assert_eq!(display_width("Cafe\u{301} Plain"), 11); -/// ``` -/// -/// ## Emojis and CJK Characters -/// -/// Characters such as emojis and [CJK characters] used in the -/// Chinese, Japanese, and Korean langauges are seen as double-width, -/// even if the `unicode-width` feature is disabled: -/// -/// ``` -/// use textwrap::core::display_width; -/// -/// assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20); -/// assert_eq!(display_width("你好"), 4); // “Nǐ hǎo” or “Hello” in Chinese -/// ``` -/// -/// # Limitations -/// -/// The displayed width of a string cannot always be computed from the -/// string alone. This is because the width depends on the rendering -/// engine used. This is particularly visible with [emoji modifier -/// sequences] where a base emoji is modified with, e.g., skin tone or -/// hair color modifiers. It is up to the rendering engine to detect -/// this and to produce a suitable emoji. -/// -/// A simple example is “❤️”, which consists of “❤” (U+2764: Black -/// Heart Symbol) followed by U+FE0F (Variation Selector-16). By -/// itself, “❤” is a black heart, but if you follow it with the -/// variant selector, you may get a wider red heart. -/// -/// A more complex example would be “👨🦰” which should depict a man -/// with red hair. Here the computed width is too large — and the -/// width differs depending on the use of the `unicode-width` feature: -/// -/// ``` -/// use textwrap::core::display_width; -/// -/// assert_eq!("👨🦰".chars().collect::<Vec<char>>(), ['\u{1f468}', '\u{200d}', '\u{1f9b0}']); -/// #[cfg(feature = "unicode-width")] -/// assert_eq!(display_width("👨🦰"), 4); -/// #[cfg(not(feature = "unicode-width"))] -/// assert_eq!(display_width("👨🦰"), 6); -/// ``` -/// -/// This happens because the grapheme consists of three code points: -/// “👨” (U+1F468: Man), Zero Width Joiner (U+200D), and “🦰” -/// (U+1F9B0: Red Hair). You can see them above in the test. With -/// `unicode-width` enabled, the ZWJ is correctly seen as having zero -/// width, without it is counted as a double-width character. -/// -/// ## Terminal Support -/// -/// Modern browsers typically do a great job at combining characters -/// as shown above, but terminals often struggle more. As an example, -/// Gnome Terminal version 3.38.1, shows “❤️” as a big red heart, but -/// shows "👨🦰" as “👨🦰”. -/// -/// [combining characters]: https://en.wikipedia.org/wiki/Combining_character -/// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence -/// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters -/// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html -pub fn display_width(text: &str) -> usize { - let mut chars = text.chars(); - let mut width = 0; - while let Some(ch) = chars.next() { - if skip_ansi_escape_sequence(ch, &mut chars) { - continue; - } - width += ch_width(ch); - } - width -} - -/// A (text) fragment denotes the unit which we wrap into lines. -/// -/// Fragments represent an abstract _word_ plus the _whitespace_ -/// following the word. In case the word falls at the end of the line, -/// the whitespace is dropped and a so-called _penalty_ is inserted -/// instead (typically `"-"` if the word was hyphenated). -/// -/// For wrapping purposes, the precise content of the word, the -/// whitespace, and the penalty is irrelevant. All we need to know is -/// the displayed width of each part, which this trait provides. -pub trait Fragment: std::fmt::Debug { - /// Displayed width of word represented by this fragment. - fn width(&self) -> f64; - - /// Displayed width of the whitespace that must follow the word - /// when the word is not at the end of a line. - fn whitespace_width(&self) -> f64; - - /// Displayed width of the penalty that must be inserted if the - /// word falls at the end of a line. - fn penalty_width(&self) -> f64; -} - -/// A piece of wrappable text, including any trailing whitespace. -/// -/// A `Word` is an example of a [`Fragment`], so it has a width, -/// trailing whitespace, and potentially a penalty item. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub struct Word<'a> { - /// Word content. - pub word: &'a str, - /// Whitespace to insert if the word does not fall at the end of a line. - pub whitespace: &'a str, - /// Penalty string to insert if the word falls at the end of a line. - pub penalty: &'a str, - // Cached width in columns. - pub(crate) width: usize, -} - -impl std::ops::Deref for Word<'_> { - type Target = str; - - fn deref(&self) -> &Self::Target { - self.word - } -} - -impl<'a> Word<'a> { - /// Construct a `Word` from a string. - /// - /// A trailing stretch of `' '` is automatically taken to be the - /// whitespace part of the word. - pub fn from(word: &str) -> Word<'_> { - let trimmed = word.trim_end_matches(' '); - Word { - word: trimmed, - width: display_width(trimmed), - whitespace: &word[trimmed.len()..], - penalty: "", - } - } - - /// Break this word into smaller words with a width of at most - /// `line_width`. The whitespace and penalty from this `Word` is - /// added to the last piece. - /// - /// # Examples - /// - /// ``` - /// use textwrap::core::Word; - /// assert_eq!( - /// Word::from("Hello! ").break_apart(3).collect::<Vec<_>>(), - /// vec![Word::from("Hel"), Word::from("lo! ")] - /// ); - /// ``` - pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b { - let mut char_indices = self.word.char_indices(); - let mut offset = 0; - let mut width = 0; - - std::iter::from_fn(move || { - while let Some((idx, ch)) = char_indices.next() { - if skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) { - continue; - } - - if width > 0 && width + ch_width(ch) > line_width { - let word = Word { - word: &self.word[offset..idx], - width: width, - whitespace: "", - penalty: "", - }; - offset = idx; - width = ch_width(ch); - return Some(word); - } - - width += ch_width(ch); - } - - if offset < self.word.len() { - let word = Word { - word: &self.word[offset..], - width: width, - whitespace: self.whitespace, - penalty: self.penalty, - }; - offset = self.word.len(); - return Some(word); - } - - None - }) - } -} - -impl Fragment for Word<'_> { - #[inline] - fn width(&self) -> f64 { - self.width as f64 - } - - // We assume the whitespace consist of ' ' only. This allows us to - // compute the display width in constant time. - #[inline] - fn whitespace_width(&self) -> f64 { - self.whitespace.len() as f64 - } - - // We assume the penalty is `""` or `"-"`. This allows us to - // compute the display width in constant time. - #[inline] - fn penalty_width(&self) -> f64 { - self.penalty.len() as f64 - } -} - -/// Forcibly break words wider than `line_width` into smaller words. -/// -/// This simply calls [`Word::break_apart`] on words that are too -/// wide. This means that no extra `'-'` is inserted, the word is -/// simply broken into smaller pieces. -pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>> -where - I: IntoIterator<Item = Word<'a>>, -{ - let mut shortened_words = Vec::new(); - for word in words { - if word.width() > line_width as f64 { - shortened_words.extend(word.break_apart(line_width)); - } else { - shortened_words.push(word); - } - } - shortened_words -} - -#[cfg(test)] -mod tests { - use super::*; - - #[cfg(feature = "unicode-width")] - use unicode_width::UnicodeWidthChar; - - #[test] - fn skip_ansi_escape_sequence_works() { - let blue_text = "\u{1b}[34mHello\u{1b}[0m"; - let mut chars = blue_text.chars(); - let ch = chars.next().unwrap(); - assert!(skip_ansi_escape_sequence(ch, &mut chars)); - assert_eq!(chars.next(), Some('H')); - } - - #[test] - fn emojis_have_correct_width() { - use unic_emoji_char::is_emoji; - - // Emojis in the Basic Latin (ASCII) and Latin-1 Supplement - // blocks all have a width of 1 column. This includes - // characters such as '#' and '©'. - for ch in '\u{1}'..'\u{FF}' { - if is_emoji(ch) { - let desc = format!("{:?} U+{:04X}", ch, ch as u32); - - #[cfg(feature = "unicode-width")] - assert_eq!(ch.width().unwrap(), 1, "char: {}", desc); - - #[cfg(not(feature = "unicode-width"))] - assert_eq!(ch_width(ch), 1, "char: {}", desc); - } - } - - // Emojis in the remaining blocks of the Basic Multilingual - // Plane (BMP), in the Supplementary Multilingual Plane (SMP), - // and in the Supplementary Ideographic Plane (SIP), are all 1 - // or 2 columns wide when unicode-width is used, and always 2 - // columns wide otherwise. This includes all of our favorite - // emojis such as 😊. - for ch in '\u{FF}'..'\u{2FFFF}' { - if is_emoji(ch) { - let desc = format!("{:?} U+{:04X}", ch, ch as u32); - - #[cfg(feature = "unicode-width")] - assert!(ch.width().unwrap() <= 2, "char: {}", desc); - - #[cfg(not(feature = "unicode-width"))] - assert_eq!(ch_width(ch), 2, "char: {}", desc); - } - } - - // The remaining planes contain almost no assigned code points - // and thus also no emojis. - } - - #[test] - fn display_width_works() { - assert_eq!("Café Plain".len(), 11); // “é” is two bytes - assert_eq!(display_width("Café Plain"), 10); - assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10); - } - - #[test] - fn display_width_narrow_emojis() { - #[cfg(feature = "unicode-width")] - assert_eq!(display_width("⁉"), 1); - - // The ⁉ character is above DOUBLE_WIDTH_CUTOFF. - #[cfg(not(feature = "unicode-width"))] - assert_eq!(display_width("⁉"), 2); - } - - #[test] - fn display_width_narrow_emojis_variant_selector() { - #[cfg(feature = "unicode-width")] - assert_eq!(display_width("⁉\u{fe0f}"), 1); - - // The variant selector-16 is also counted. - #[cfg(not(feature = "unicode-width"))] - assert_eq!(display_width("⁉\u{fe0f}"), 4); - } - - #[test] - fn display_width_emojis() { - assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20); - } -} diff --git a/vendor/textwrap/src/indentation.rs b/vendor/textwrap/src/indentation.rs deleted file mode 100644 index 5d90c06..0000000 --- a/vendor/textwrap/src/indentation.rs +++ /dev/null @@ -1,347 +0,0 @@ -//! Functions related to adding and removing indentation from lines of -//! text. -//! -//! The functions here can be used to uniformly indent or dedent -//! (unindent) word wrapped lines of text. - -/// Indent each line by the given prefix. -/// -/// # Examples -/// -/// ``` -/// use textwrap::indent; -/// -/// assert_eq!(indent("First line.\nSecond line.\n", " "), -/// " First line.\n Second line.\n"); -/// ``` -/// -/// When indenting, trailing whitespace is stripped from the prefix. -/// This means that empty lines remain empty afterwards: -/// -/// ``` -/// use textwrap::indent; -/// -/// assert_eq!(indent("First line.\n\n\nSecond line.\n", " "), -/// " First line.\n\n\n Second line.\n"); -/// ``` -/// -/// Notice how `"\n\n\n"` remained as `"\n\n\n"`. -/// -/// This feature is useful when you want to indent text and have a -/// space between your prefix and the text. In this case, you _don't_ -/// want a trailing space on empty lines: -/// -/// ``` -/// use textwrap::indent; -/// -/// assert_eq!(indent("foo = 123\n\nprint(foo)\n", "# "), -/// "# foo = 123\n#\n# print(foo)\n"); -/// ``` -/// -/// Notice how `"\n\n"` became `"\n#\n"` instead of `"\n# \n"` which -/// would have trailing whitespace. -/// -/// Leading and trailing whitespace coming from the text itself is -/// kept unchanged: -/// -/// ``` -/// use textwrap::indent; -/// -/// assert_eq!(indent(" \t Foo ", "->"), "-> \t Foo "); -/// ``` -pub fn indent(s: &str, prefix: &str) -> String { - // We know we'll need more than s.len() bytes for the output, but - // without counting '\n' characters (which is somewhat slow), we - // don't know exactly how much. However, we can preemptively do - // the first doubling of the output size. - let mut result = String::with_capacity(2 * s.len()); - let trimmed_prefix = prefix.trim_end(); - for (idx, line) in s.split_terminator('\n').enumerate() { - if idx > 0 { - result.push('\n'); - } - if line.trim().is_empty() { - result.push_str(trimmed_prefix); - } else { - result.push_str(prefix); - } - result.push_str(line); - } - if s.ends_with('\n') { - // split_terminator will have eaten the final '\n'. - result.push('\n'); - } - result -} - -/// Removes common leading whitespace from each line. -/// -/// This function will look at each non-empty line and determine the -/// maximum amount of whitespace that can be removed from all lines: -/// -/// ``` -/// use textwrap::dedent; -/// -/// assert_eq!(dedent(" -/// 1st line -/// 2nd line -/// 3rd line -/// "), " -/// 1st line -/// 2nd line -/// 3rd line -/// "); -/// ``` -pub fn dedent(s: &str) -> String { - let mut prefix = ""; - let mut lines = s.lines(); - - // We first search for a non-empty line to find a prefix. - for line in &mut lines { - let mut whitespace_idx = line.len(); - for (idx, ch) in line.char_indices() { - if !ch.is_whitespace() { - whitespace_idx = idx; - break; - } - } - - // Check if the line had anything but whitespace - if whitespace_idx < line.len() { - prefix = &line[..whitespace_idx]; - break; - } - } - - // We then continue looking through the remaining lines to - // possibly shorten the prefix. - for line in &mut lines { - let mut whitespace_idx = line.len(); - for ((idx, a), b) in line.char_indices().zip(prefix.chars()) { - if a != b { - whitespace_idx = idx; - break; - } - } - - // Check if the line had anything but whitespace and if we - // have found a shorter prefix - if whitespace_idx < line.len() && whitespace_idx < prefix.len() { - prefix = &line[..whitespace_idx]; - } - } - - // We now go over the lines a second time to build the result. - let mut result = String::new(); - for line in s.lines() { - if line.starts_with(&prefix) && line.chars().any(|c| !c.is_whitespace()) { - let (_, tail) = line.split_at(prefix.len()); - result.push_str(tail); - } - result.push('\n'); - } - - if result.ends_with('\n') && !s.ends_with('\n') { - let new_len = result.len() - 1; - result.truncate(new_len); - } - - result -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn indent_empty() { - assert_eq!(indent("\n", " "), "\n"); - } - - #[test] - #[rustfmt::skip] - fn indent_nonempty() { - let text = [ - " foo\n", - "bar\n", - " baz\n", - ].join(""); - let expected = [ - "// foo\n", - "// bar\n", - "// baz\n", - ].join(""); - assert_eq!(indent(&text, "// "), expected); - } - - #[test] - #[rustfmt::skip] - fn indent_empty_line() { - let text = [ - " foo", - "bar", - "", - " baz", - ].join("\n"); - let expected = [ - "// foo", - "// bar", - "//", - "// baz", - ].join("\n"); - assert_eq!(indent(&text, "// "), expected); - } - - #[test] - fn dedent_empty() { - assert_eq!(dedent(""), ""); - } - - #[test] - #[rustfmt::skip] - fn dedent_multi_line() { - let x = [ - " foo", - " bar", - " baz", - ].join("\n"); - let y = [ - " foo", - "bar", - " baz" - ].join("\n"); - assert_eq!(dedent(&x), y); - } - - #[test] - #[rustfmt::skip] - fn dedent_empty_line() { - let x = [ - " foo", - " bar", - " ", - " baz" - ].join("\n"); - let y = [ - " foo", - "bar", - "", - " baz" - ].join("\n"); - assert_eq!(dedent(&x), y); - } - - #[test] - #[rustfmt::skip] - fn dedent_blank_line() { - let x = [ - " foo", - "", - " bar", - " foo", - " bar", - " baz", - ].join("\n"); - let y = [ - "foo", - "", - " bar", - " foo", - " bar", - " baz", - ].join("\n"); - assert_eq!(dedent(&x), y); - } - - #[test] - #[rustfmt::skip] - fn dedent_whitespace_line() { - let x = [ - " foo", - " ", - " bar", - " foo", - " bar", - " baz", - ].join("\n"); - let y = [ - "foo", - "", - " bar", - " foo", - " bar", - " baz", - ].join("\n"); - assert_eq!(dedent(&x), y); - } - - #[test] - #[rustfmt::skip] - fn dedent_mixed_whitespace() { - let x = [ - "\tfoo", - " bar", - ].join("\n"); - let y = [ - "\tfoo", - " bar", - ].join("\n"); - assert_eq!(dedent(&x), y); - } - - #[test] - #[rustfmt::skip] - fn dedent_tabbed_whitespace() { - let x = [ - "\t\tfoo", - "\t\t\tbar", - ].join("\n"); - let y = [ - "foo", - "\tbar", - ].join("\n"); - assert_eq!(dedent(&x), y); - } - - #[test] - #[rustfmt::skip] - fn dedent_mixed_tabbed_whitespace() { - let x = [ - "\t \tfoo", - "\t \t\tbar", - ].join("\n"); - let y = [ - "foo", - "\tbar", - ].join("\n"); - assert_eq!(dedent(&x), y); - } - - #[test] - #[rustfmt::skip] - fn dedent_mixed_tabbed_whitespace2() { - let x = [ - "\t \tfoo", - "\t \tbar", - ].join("\n"); - let y = [ - "\tfoo", - " \tbar", - ].join("\n"); - assert_eq!(dedent(&x), y); - } - - #[test] - #[rustfmt::skip] - fn dedent_preserve_no_terminating_newline() { - let x = [ - " foo", - " bar", - ].join("\n"); - let y = [ - "foo", - " bar", - ].join("\n"); - assert_eq!(dedent(&x), y); - } -} diff --git a/vendor/textwrap/src/lib.rs b/vendor/textwrap/src/lib.rs deleted file mode 100644 index e570eac..0000000 --- a/vendor/textwrap/src/lib.rs +++ /dev/null @@ -1,1847 +0,0 @@ -//! The textwrap library provides functions for word wrapping and -//! indenting text. -//! -//! # Wrapping Text -//! -//! Wrapping text can be very useful in command-line programs where -//! you want to format dynamic output nicely so it looks good in a -//! terminal. A quick example: -//! -//! ``` -//! # #[cfg(feature = "smawk")] { -//! let text = "textwrap: a small library for wrapping text."; -//! assert_eq!(textwrap::wrap(text, 18), -//! vec!["textwrap: a", -//! "small library for", -//! "wrapping text."]); -//! # } -//! ``` -//! -//! The [`wrap`] function returns the individual lines, use [`fill`] -//! is you want the lines joined with `'\n'` to form a `String`. -//! -//! If you enable the `hyphenation` Cargo feature, you can get -//! automatic hyphenation for a number of languages: -//! -//! ``` -//! #[cfg(feature = "hyphenation")] { -//! use hyphenation::{Language, Load, Standard}; -//! use textwrap::{wrap, Options, WordSplitter}; -//! -//! let text = "textwrap: a small library for wrapping text."; -//! let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); -//! let options = Options::new(18).word_splitter(WordSplitter::Hyphenation(dictionary)); -//! assert_eq!(wrap(text, &options), -//! vec!["textwrap: a small", -//! "library for wrap-", -//! "ping text."]); -//! } -//! ``` -//! -//! See also the [`unfill`] and [`refill`] functions which allow you to -//! manipulate already wrapped text. -//! -//! ## Wrapping Strings at Compile Time -//! -//! If your strings are known at compile time, please take a look at -//! the procedural macros from the [textwrap-macros] crate. -//! -//! ## Displayed Width vs Byte Size -//! -//! To word wrap text, one must know the width of each word so one can -//! know when to break lines. This library will by default measure the -//! width of text using the _displayed width_, not the size in bytes. -//! The `unicode-width` Cargo feature controls this. -//! -//! This is important for non-ASCII text. ASCII characters such as `a` -//! and `!` are simple and take up one column each. This means that -//! the displayed width is equal to the string length in bytes. -//! However, non-ASCII characters and symbols take up more than one -//! byte when UTF-8 encoded: `é` is `0xc3 0xa9` (two bytes) and `⚙` is -//! `0xe2 0x9a 0x99` (three bytes) in UTF-8, respectively. -//! -//! This is why we take care to use the displayed width instead of the -//! byte count when computing line lengths. All functions in this -//! library handle Unicode characters like this when the -//! `unicode-width` Cargo feature is enabled (it is enabled by -//! default). -//! -//! # Indentation and Dedentation -//! -//! The textwrap library also offers functions for adding a prefix to -//! every line of a string and to remove leading whitespace. As an -//! example, the [`indent`] function allows you to turn lines of text -//! into a bullet list: -//! -//! ``` -//! let before = "\ -//! foo -//! bar -//! baz -//! "; -//! let after = "\ -//! * foo -//! * bar -//! * baz -//! "; -//! assert_eq!(textwrap::indent(before, "* "), after); -//! ``` -//! -//! Removing leading whitespace is done with [`dedent`]: -//! -//! ``` -//! let before = " -//! Some -//! indented -//! text -//! "; -//! let after = " -//! Some -//! indented -//! text -//! "; -//! assert_eq!(textwrap::dedent(before), after); -//! ``` -//! -//! # Cargo Features -//! -//! The textwrap library can be slimmed down as needed via a number of -//! Cargo features. This means you only pay for the features you -//! actually use. -//! -//! The full dependency graph, where dashed lines indicate optional -//! dependencies, is shown below: -//! -//! <img src="https://raw.githubusercontent.com/mgeisler/textwrap/master/images/textwrap-0.15.2.svg"> -//! -//! ## Default Features -//! -//! These features are enabled by default: -//! -//! * `unicode-linebreak`: enables finding words using the -//! [unicode-linebreak] crate, which implements the line breaking -//! algorithm described in [Unicode Standard Annex -//! #14](https://www.unicode.org/reports/tr14/). -//! -//! This feature can be disabled if you are happy to find words -//! separated by ASCII space characters only. People wrapping text -//! with emojis or East-Asian characters will want most likely want -//! to enable this feature. See [`WordSeparator`] for details. -//! -//! * `unicode-width`: enables correct width computation of non-ASCII -//! characters via the [unicode-width] crate. Without this feature, -//! every [`char`] is 1 column wide, except for emojis which are 2 -//! columns wide. See the [`core::display_width`] function for -//! details. -//! -//! This feature can be disabled if you only need to wrap ASCII -//! text, or if the functions in [`core`] are used directly with -//! [`core::Fragment`]s for which the widths have been computed in -//! other ways. -//! -//! * `smawk`: enables linear-time wrapping of the whole paragraph via -//! the [smawk] crate. See the [`wrap_algorithms::wrap_optimal_fit`] -//! function for details on the optimal-fit algorithm. -//! -//! This feature can be disabled if you only ever intend to use -//! [`wrap_algorithms::wrap_first_fit`]. -//! -//! With Rust 1.59.0, the size impact of the above features on your -//! binary is as follows: -//! -//! | Configuration | Binary Size | Delta | -//! | :--- | ---: | ---: | -//! | quick-and-dirty implementation | 289 KB | — KB | -//! | textwrap without default features | 301 KB | 12 KB | -//! | textwrap with smawk | 317 KB | 28 KB | -//! | textwrap with unicode-width | 313 KB | 24 KB | -//! | textwrap with unicode-linebreak | 395 KB | 106 KB | -//! -//! The above sizes are the stripped sizes and the binary is compiled -//! in release mode with this profile: -//! -//! ```toml -//! [profile.release] -//! lto = true -//! codegen-units = 1 -//! ``` -//! -//! See the [binary-sizes demo] if you want to reproduce these -//! results. -//! -//! ## Optional Features -//! -//! These Cargo features enable new functionality: -//! -//! * `terminal_size`: enables automatic detection of the terminal -//! width via the [terminal_size] crate. See the -//! [`Options::with_termwidth`] constructor for details. -//! -//! * `hyphenation`: enables language-sensitive hyphenation via the -//! [hyphenation] crate. See the [`word_splitters::WordSplitter`] -//! trait for details. -//! -//! [unicode-linebreak]: https://docs.rs/unicode-linebreak/ -//! [unicode-width]: https://docs.rs/unicode-width/ -//! [smawk]: https://docs.rs/smawk/ -//! [binary-sizes demo]: https://github.com/mgeisler/textwrap/tree/master/examples/binary-sizes -//! [textwrap-macros]: https://docs.rs/textwrap-macros/ -//! [terminal_size]: https://docs.rs/terminal_size/ -//! [hyphenation]: https://docs.rs/hyphenation/ - -#![doc(html_root_url = "https://docs.rs/textwrap/0.15.2")] -#![forbid(unsafe_code)] // See https://github.com/mgeisler/textwrap/issues/210 -#![deny(missing_docs)] -#![deny(missing_debug_implementations)] -#![allow(clippy::redundant_field_names)] - -// Make `cargo test` execute the README doctests. -#[cfg(doctest)] -#[doc = include_str!("../README.md")] -mod readme_doctest {} - -use std::borrow::Cow; - -mod indentation; -pub use crate::indentation::{dedent, indent}; - -mod word_separators; -pub use word_separators::WordSeparator; - -pub mod word_splitters; -pub use word_splitters::WordSplitter; - -pub mod wrap_algorithms; -pub use wrap_algorithms::WrapAlgorithm; - -pub mod core; - -#[cfg(feature = "unicode-linebreak")] -macro_rules! DefaultWordSeparator { - () => { - WordSeparator::UnicodeBreakProperties - }; -} - -#[cfg(not(feature = "unicode-linebreak"))] -macro_rules! DefaultWordSeparator { - () => { - WordSeparator::AsciiSpace - }; -} - -/// Holds configuration options for wrapping and filling text. -#[derive(Debug, Clone)] -pub struct Options<'a> { - /// The width in columns at which the text will be wrapped. - pub width: usize, - /// Indentation used for the first line of output. See the - /// [`Options::initial_indent`] method. - pub initial_indent: &'a str, - /// Indentation used for subsequent lines of output. See the - /// [`Options::subsequent_indent`] method. - pub subsequent_indent: &'a str, - /// Allow long words to be broken if they cannot fit on a line. - /// When set to `false`, some lines may be longer than - /// `self.width`. See the [`Options::break_words`] method. - pub break_words: bool, - /// Wrapping algorithm to use, see the implementations of the - /// [`wrap_algorithms::WrapAlgorithm`] trait for details. - pub wrap_algorithm: WrapAlgorithm, - /// The line breaking algorithm to use, see - /// [`word_separators::WordSeparator`] trait for an overview and - /// possible implementations. - pub word_separator: WordSeparator, - /// The method for splitting words. This can be used to prohibit - /// splitting words on hyphens, or it can be used to implement - /// language-aware machine hyphenation. - pub word_splitter: WordSplitter, -} - -impl<'a> From<&'a Options<'a>> for Options<'a> { - fn from(options: &'a Options<'a>) -> Self { - Self { - width: options.width, - initial_indent: options.initial_indent, - subsequent_indent: options.subsequent_indent, - break_words: options.break_words, - word_separator: options.word_separator, - wrap_algorithm: options.wrap_algorithm, - word_splitter: options.word_splitter.clone(), - } - } -} - -impl<'a> From<usize> for Options<'a> { - fn from(width: usize) -> Self { - Options::new(width) - } -} - -impl<'a> Options<'a> { - /// Creates a new [`Options`] with the specified width. Equivalent to - /// - /// ``` - /// # use textwrap::{Options, WordSplitter, WordSeparator, WrapAlgorithm}; - /// # let width = 80; - /// # let actual = Options::new(width); - /// # let expected = - /// Options { - /// width: width, - /// initial_indent: "", - /// subsequent_indent: "", - /// break_words: true, - /// #[cfg(feature = "unicode-linebreak")] - /// word_separator: WordSeparator::UnicodeBreakProperties, - /// #[cfg(not(feature = "unicode-linebreak"))] - /// word_separator: WordSeparator::AsciiSpace, - /// #[cfg(feature = "smawk")] - /// wrap_algorithm: WrapAlgorithm::new_optimal_fit(), - /// #[cfg(not(feature = "smawk"))] - /// wrap_algorithm: WrapAlgorithm::FirstFit, - /// word_splitter: WordSplitter::HyphenSplitter, - /// } - /// # ; - /// # assert_eq!(actual.width, expected.width); - /// # assert_eq!(actual.initial_indent, expected.initial_indent); - /// # assert_eq!(actual.subsequent_indent, expected.subsequent_indent); - /// # assert_eq!(actual.break_words, expected.break_words); - /// # assert_eq!(actual.word_splitter, expected.word_splitter); - /// ``` - /// - /// Note that the default word separator and wrap algorithms - /// changes based on the available Cargo features. The best - /// available algorithms are used by default. - pub const fn new(width: usize) -> Self { - Options { - width, - initial_indent: "", - subsequent_indent: "", - break_words: true, - word_separator: DefaultWordSeparator!(), - wrap_algorithm: WrapAlgorithm::new(), - word_splitter: WordSplitter::HyphenSplitter, - } - } - - /// Creates a new [`Options`] with `width` set to the current - /// terminal width. If the terminal width cannot be determined - /// (typically because the standard input and output is not - /// connected to a terminal), a width of 80 characters will be - /// used. Other settings use the same defaults as - /// [`Options::new`]. - /// - /// Equivalent to: - /// - /// ```no_run - /// use textwrap::{termwidth, Options}; - /// - /// let options = Options::new(termwidth()); - /// ``` - /// - /// **Note:** Only available when the `terminal_size` feature is - /// enabled. - #[cfg(feature = "terminal_size")] - pub fn with_termwidth() -> Self { - Self::new(termwidth()) - } -} - -impl<'a> Options<'a> { - /// Change [`self.initial_indent`]. The initial indentation is - /// used on the very first line of output. - /// - /// # Examples - /// - /// Classic paragraph indentation can be achieved by specifying an - /// initial indentation and wrapping each paragraph by itself: - /// - /// ``` - /// use textwrap::{wrap, Options}; - /// - /// let options = Options::new(16).initial_indent(" "); - /// assert_eq!(wrap("This is a little example.", options), - /// vec![" This is a", - /// "little example."]); - /// ``` - /// - /// [`self.initial_indent`]: #structfield.initial_indent - pub fn initial_indent(self, indent: &'a str) -> Self { - Options { - initial_indent: indent, - ..self - } - } - - /// Change [`self.subsequent_indent`]. The subsequent indentation - /// is used on lines following the first line of output. - /// - /// # Examples - /// - /// Combining initial and subsequent indentation lets you format a - /// single paragraph as a bullet list: - /// - /// ``` - /// use textwrap::{wrap, Options}; - /// - /// let options = Options::new(12) - /// .initial_indent("* ") - /// .subsequent_indent(" "); - /// #[cfg(feature = "smawk")] - /// assert_eq!(wrap("This is a little example.", options), - /// vec!["* This is", - /// " a little", - /// " example."]); - /// - /// // Without the `smawk` feature, the wrapping is a little different: - /// #[cfg(not(feature = "smawk"))] - /// assert_eq!(wrap("This is a little example.", options), - /// vec!["* This is a", - /// " little", - /// " example."]); - /// ``` - /// - /// [`self.subsequent_indent`]: #structfield.subsequent_indent - pub fn subsequent_indent(self, indent: &'a str) -> Self { - Options { - subsequent_indent: indent, - ..self - } - } - - /// Change [`self.break_words`]. This controls if words longer - /// than `self.width` can be broken, or if they will be left - /// sticking out into the right margin. - /// - /// # Examples - /// - /// ``` - /// use textwrap::{wrap, Options}; - /// - /// let options = Options::new(4).break_words(true); - /// assert_eq!(wrap("This is a little example.", options), - /// vec!["This", - /// "is a", - /// "litt", - /// "le", - /// "exam", - /// "ple."]); - /// ``` - /// - /// [`self.break_words`]: #structfield.break_words - pub fn break_words(self, setting: bool) -> Self { - Options { - break_words: setting, - ..self - } - } - - /// Change [`self.word_separator`]. - /// - /// See [`word_separators::WordSeparator`] for details on the choices. - /// - /// [`self.word_separator`]: #structfield.word_separator - pub fn word_separator(self, word_separator: WordSeparator) -> Options<'a> { - Options { - width: self.width, - initial_indent: self.initial_indent, - subsequent_indent: self.subsequent_indent, - break_words: self.break_words, - word_separator: word_separator, - wrap_algorithm: self.wrap_algorithm, - word_splitter: self.word_splitter, - } - } - - /// Change [`self.wrap_algorithm`]. - /// - /// See the [`wrap_algorithms::WrapAlgorithm`] trait for details on - /// the choices. - /// - /// [`self.wrap_algorithm`]: #structfield.wrap_algorithm - pub fn wrap_algorithm(self, wrap_algorithm: WrapAlgorithm) -> Options<'a> { - Options { - width: self.width, - initial_indent: self.initial_indent, - subsequent_indent: self.subsequent_indent, - break_words: self.break_words, - word_separator: self.word_separator, - wrap_algorithm: wrap_algorithm, - word_splitter: self.word_splitter, - } - } - - /// Change [`self.word_splitter`]. The - /// [`word_splitters::WordSplitter`] is used to fit part of a word - /// into the current line when wrapping text. - /// - /// # Examples - /// - /// ``` - /// use textwrap::{Options, WordSplitter}; - /// let opt = Options::new(80); - /// assert_eq!(opt.word_splitter, WordSplitter::HyphenSplitter); - /// let opt = opt.word_splitter(WordSplitter::NoHyphenation); - /// assert_eq!(opt.word_splitter, WordSplitter::NoHyphenation); - /// ``` - /// - /// [`self.word_splitter`]: #structfield.word_splitter - pub fn word_splitter(self, word_splitter: WordSplitter) -> Options<'a> { - Options { - width: self.width, - initial_indent: self.initial_indent, - subsequent_indent: self.subsequent_indent, - break_words: self.break_words, - word_separator: self.word_separator, - wrap_algorithm: self.wrap_algorithm, - word_splitter, - } - } -} - -/// Return the current terminal width. -/// -/// If the terminal width cannot be determined (typically because the -/// standard output is not connected to a terminal), a default width -/// of 80 characters will be used. -/// -/// # Examples -/// -/// Create an [`Options`] for wrapping at the current terminal width -/// with a two column margin to the left and the right: -/// -/// ```no_run -/// use textwrap::{termwidth, Options}; -/// -/// let width = termwidth() - 4; // Two columns on each side. -/// let options = Options::new(width) -/// .initial_indent(" ") -/// .subsequent_indent(" "); -/// ``` -/// -/// **Note:** Only available when the `terminal_size` Cargo feature is -/// enabled. -#[cfg(feature = "terminal_size")] -pub fn termwidth() -> usize { - terminal_size::terminal_size().map_or(80, |(terminal_size::Width(w), _)| w.into()) -} - -/// Fill a line of text at a given width. -/// -/// The result is a [`String`], complete with newlines between each -/// line. Use the [`wrap`] function if you need access to the -/// individual lines. -/// -/// The easiest way to use this function is to pass an integer for -/// `width_or_options`: -/// -/// ``` -/// use textwrap::fill; -/// -/// assert_eq!( -/// fill("Memory safety without garbage collection.", 15), -/// "Memory safety\nwithout garbage\ncollection." -/// ); -/// ``` -/// -/// If you need to customize the wrapping, you can pass an [`Options`] -/// instead of an `usize`: -/// -/// ``` -/// use textwrap::{fill, Options}; -/// -/// let options = Options::new(15) -/// .initial_indent("- ") -/// .subsequent_indent(" "); -/// assert_eq!( -/// fill("Memory safety without garbage collection.", &options), -/// "- Memory safety\n without\n garbage\n collection." -/// ); -/// ``` -pub fn fill<'a, Opt>(text: &str, width_or_options: Opt) -> String -where - Opt: Into<Options<'a>>, -{ - // This will avoid reallocation in simple cases (no - // indentation, no hyphenation). - let mut result = String::with_capacity(text.len()); - - for (i, line) in wrap(text, width_or_options).iter().enumerate() { - if i > 0 { - result.push('\n'); - } - result.push_str(line); - } - - result -} - -/// Unpack a paragraph of already-wrapped text. -/// -/// This function attempts to recover the original text from a single -/// paragraph of text produced by the [`fill`] function. This means -/// that it turns -/// -/// ```text -/// textwrap: a small -/// library for -/// wrapping text. -/// ``` -/// -/// back into -/// -/// ```text -/// textwrap: a small library for wrapping text. -/// ``` -/// -/// In addition, it will recognize a common prefix among the lines. -/// The prefix of the first line is returned in -/// [`Options::initial_indent`] and the prefix (if any) of the the -/// other lines is returned in [`Options::subsequent_indent`]. -/// -/// In addition to `' '`, the prefixes can consist of characters used -/// for unordered lists (`'-'`, `'+'`, and `'*'`) and block quotes -/// (`'>'`) in Markdown as well as characters often used for inline -/// comments (`'#'` and `'/'`). -/// -/// The text must come from a single wrapped paragraph. This means -/// that there can be no `"\n\n"` within the text. -/// -/// # Examples -/// -/// ``` -/// use textwrap::unfill; -/// -/// let (text, options) = unfill("\ -/// * This is an -/// example of -/// a list item. -/// "); -/// -/// assert_eq!(text, "This is an example of a list item.\n"); -/// assert_eq!(options.initial_indent, "* "); -/// assert_eq!(options.subsequent_indent, " "); -/// ``` -pub fn unfill(text: &str) -> (String, Options<'_>) { - let trimmed = text.trim_end_matches('\n'); - let prefix_chars: &[_] = &[' ', '-', '+', '*', '>', '#', '/']; - - let mut options = Options::new(0); - for (idx, line) in trimmed.split('\n').enumerate() { - options.width = std::cmp::max(options.width, core::display_width(line)); - let without_prefix = line.trim_start_matches(prefix_chars); - let prefix = &line[..line.len() - without_prefix.len()]; - - if idx == 0 { - options.initial_indent = prefix; - } else if idx == 1 { - options.subsequent_indent = prefix; - } else if idx > 1 { - for ((idx, x), y) in prefix.char_indices().zip(options.subsequent_indent.chars()) { - if x != y { - options.subsequent_indent = &prefix[..idx]; - break; - } - } - if prefix.len() < options.subsequent_indent.len() { - options.subsequent_indent = prefix; - } - } - } - - let mut unfilled = String::with_capacity(text.len()); - for (idx, line) in trimmed.split('\n').enumerate() { - if idx == 0 { - unfilled.push_str(&line[options.initial_indent.len()..]); - } else { - unfilled.push(' '); - unfilled.push_str(&line[options.subsequent_indent.len()..]); - } - } - - unfilled.push_str(&text[trimmed.len()..]); - (unfilled, options) -} - -/// Refill a paragraph of wrapped text with a new width. -/// -/// This function will first use the [`unfill`] function to remove -/// newlines from the text. Afterwards the text is filled again using -/// the [`fill`] function. -/// -/// The `new_width_or_options` argument specify the new width and can -/// specify other options as well — except for -/// [`Options::initial_indent`] and [`Options::subsequent_indent`], -/// which are deduced from `filled_text`. -/// -/// # Examples -/// -/// ``` -/// use textwrap::refill; -/// -/// // Some loosely wrapped text. The "> " prefix is recognized automatically. -/// let text = "\ -/// > Memory -/// > safety without garbage -/// > collection. -/// "; -/// -/// assert_eq!(refill(text, 20), "\ -/// > Memory safety -/// > without garbage -/// > collection. -/// "); -/// -/// assert_eq!(refill(text, 40), "\ -/// > Memory safety without garbage -/// > collection. -/// "); -/// -/// assert_eq!(refill(text, 60), "\ -/// > Memory safety without garbage collection. -/// "); -/// ``` -/// -/// You can also reshape bullet points: -/// -/// ``` -/// use textwrap::refill; -/// -/// let text = "\ -/// - This is my -/// list item. -/// "; -/// -/// assert_eq!(refill(text, 20), "\ -/// - This is my list -/// item. -/// "); -/// ``` -pub fn refill<'a, Opt>(filled_text: &str, new_width_or_options: Opt) -> String -where - Opt: Into<Options<'a>>, -{ - let trimmed = filled_text.trim_end_matches('\n'); - let (text, options) = unfill(trimmed); - let mut new_options = new_width_or_options.into(); - new_options.initial_indent = options.initial_indent; - new_options.subsequent_indent = options.subsequent_indent; - let mut refilled = fill(&text, new_options); - refilled.push_str(&filled_text[trimmed.len()..]); - refilled -} - -/// Wrap a line of text at a given width. -/// -/// The result is a vector of lines, each line is of type [`Cow<'_, -/// str>`](Cow), which means that the line will borrow from the input -/// `&str` if possible. The lines do not have trailing whitespace, -/// including a final `'\n'`. Please use the [`fill`] function if you -/// need a [`String`] instead. -/// -/// The easiest way to use this function is to pass an integer for -/// `width_or_options`: -/// -/// ``` -/// use textwrap::wrap; -/// -/// let lines = wrap("Memory safety without garbage collection.", 15); -/// assert_eq!(lines, &[ -/// "Memory safety", -/// "without garbage", -/// "collection.", -/// ]); -/// ``` -/// -/// If you need to customize the wrapping, you can pass an [`Options`] -/// instead of an `usize`: -/// -/// ``` -/// use textwrap::{wrap, Options}; -/// -/// let options = Options::new(15) -/// .initial_indent("- ") -/// .subsequent_indent(" "); -/// let lines = wrap("Memory safety without garbage collection.", &options); -/// assert_eq!(lines, &[ -/// "- Memory safety", -/// " without", -/// " garbage", -/// " collection.", -/// ]); -/// ``` -/// -/// # Optimal-Fit Wrapping -/// -/// By default, `wrap` will try to ensure an even right margin by -/// finding breaks which avoid short lines. We call this an -/// “optimal-fit algorithm” since the line breaks are computed by -/// considering all possible line breaks. The alternative is a -/// “first-fit algorithm” which simply accumulates words until they no -/// longer fit on the line. -/// -/// As an example, using the first-fit algorithm to wrap the famous -/// Hamlet quote “To be, or not to be: that is the question” in a -/// narrow column with room for only 10 characters looks like this: -/// -/// ``` -/// # use textwrap::{WrapAlgorithm::FirstFit, Options, wrap}; -/// # -/// # let lines = wrap("To be, or not to be: that is the question", -/// # Options::new(10).wrap_algorithm(FirstFit)); -/// # assert_eq!(lines.join("\n") + "\n", "\ -/// To be, or -/// not to be: -/// that is -/// the -/// question -/// # "); -/// ``` -/// -/// Notice how the second to last line is quite narrow because -/// “question” was too large to fit? The greedy first-fit algorithm -/// doesn’t look ahead, so it has no other option than to put -/// “question” onto its own line. -/// -/// With the optimal-fit wrapping algorithm, the previous lines are -/// shortened slightly in order to make the word “is” go into the -/// second last line: -/// -/// ``` -/// # #[cfg(feature = "smawk")] { -/// # use textwrap::{Options, WrapAlgorithm, wrap}; -/// # -/// # let lines = wrap( -/// # "To be, or not to be: that is the question", -/// # Options::new(10).wrap_algorithm(WrapAlgorithm::new_optimal_fit()) -/// # ); -/// # assert_eq!(lines.join("\n") + "\n", "\ -/// To be, -/// or not to -/// be: that -/// is the -/// question -/// # "); } -/// ``` -/// -/// Please see [`WrapAlgorithm`] for details on the choices. -/// -/// # Examples -/// -/// The returned iterator yields lines of type `Cow<'_, str>`. If -/// possible, the wrapped lines will borrow from the input string. As -/// an example, a hanging indentation, the first line can borrow from -/// the input, but the subsequent lines become owned strings: -/// -/// ``` -/// use std::borrow::Cow::{Borrowed, Owned}; -/// use textwrap::{wrap, Options}; -/// -/// let options = Options::new(15).subsequent_indent("...."); -/// let lines = wrap("Wrapping text all day long.", &options); -/// let annotated = lines -/// .iter() -/// .map(|line| match line { -/// Borrowed(text) => format!("[Borrowed] {}", text), -/// Owned(text) => format!("[Owned] {}", text), -/// }) -/// .collect::<Vec<_>>(); -/// assert_eq!( -/// annotated, -/// &[ -/// "[Borrowed] Wrapping text", -/// "[Owned] ....all day", -/// "[Owned] ....long.", -/// ] -/// ); -/// ``` -/// -/// ## Leading and Trailing Whitespace -/// -/// As a rule, leading whitespace (indentation) is preserved and -/// trailing whitespace is discarded. -/// -/// In more details, when wrapping words into lines, words are found -/// by splitting the input text on space characters. One or more -/// spaces (shown here as “␣”) are attached to the end of each word: -/// -/// ```text -/// "Foo␣␣␣bar␣baz" -> ["Foo␣␣␣", "bar␣", "baz"] -/// ``` -/// -/// These words are then put into lines. The interword whitespace is -/// preserved, unless the lines are wrapped so that the `"Foo␣␣␣"` -/// word falls at the end of a line: -/// -/// ``` -/// use textwrap::wrap; -/// -/// assert_eq!(wrap("Foo bar baz", 10), vec!["Foo bar", "baz"]); -/// assert_eq!(wrap("Foo bar baz", 8), vec!["Foo", "bar baz"]); -/// ``` -/// -/// Notice how the trailing whitespace is removed in both case: in the -/// first example, `"bar␣"` becomes `"bar"` and in the second case -/// `"Foo␣␣␣"` becomes `"Foo"`. -/// -/// Leading whitespace is preserved when the following word fits on -/// the first line. To understand this, consider how words are found -/// in a text with leading spaces: -/// -/// ```text -/// "␣␣foo␣bar" -> ["␣␣", "foo␣", "bar"] -/// ``` -/// -/// When put into lines, the indentation is preserved if `"foo"` fits -/// on the first line, otherwise you end up with an empty line: -/// -/// ``` -/// use textwrap::wrap; -/// -/// assert_eq!(wrap(" foo bar", 8), vec![" foo", "bar"]); -/// assert_eq!(wrap(" foo bar", 4), vec!["", "foo", "bar"]); -/// ``` -pub fn wrap<'a, Opt>(text: &str, width_or_options: Opt) -> Vec<Cow<'_, str>> -where - Opt: Into<Options<'a>>, -{ - let options = width_or_options.into(); - - let initial_width = options - .width - .saturating_sub(core::display_width(options.initial_indent)); - let subsequent_width = options - .width - .saturating_sub(core::display_width(options.subsequent_indent)); - - let mut lines = Vec::new(); - for line in text.split('\n') { - let words = options.word_separator.find_words(line); - let split_words = word_splitters::split_words(words, &options.word_splitter); - let broken_words = if options.break_words { - let mut broken_words = core::break_words(split_words, subsequent_width); - if !options.initial_indent.is_empty() { - // Without this, the first word will always go into - // the first line. However, since we break words based - // on the _second_ line width, it can be wrong to - // unconditionally put the first word onto the first - // line. An empty zero-width word fixed this. - broken_words.insert(0, core::Word::from("")); - } - broken_words - } else { - split_words.collect::<Vec<_>>() - }; - - let line_widths = [initial_width, subsequent_width]; - let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths); - - let mut idx = 0; - for words in wrapped_words { - let last_word = match words.last() { - None => { - lines.push(Cow::from("")); - continue; - } - Some(word) => word, - }; - - // We assume here that all words are contiguous in `line`. - // That is, the sum of their lengths should add up to the - // length of `line`. - let len = words - .iter() - .map(|word| word.len() + word.whitespace.len()) - .sum::<usize>() - - last_word.whitespace.len(); - - // The result is owned if we have indentation, otherwise - // we can simply borrow an empty string. - let mut result = if lines.is_empty() && !options.initial_indent.is_empty() { - Cow::Owned(options.initial_indent.to_owned()) - } else if !lines.is_empty() && !options.subsequent_indent.is_empty() { - Cow::Owned(options.subsequent_indent.to_owned()) - } else { - // We can use an empty string here since string - // concatenation for `Cow` preserves a borrowed value - // when either side is empty. - Cow::from("") - }; - - result += &line[idx..idx + len]; - - if !last_word.penalty.is_empty() { - result.to_mut().push_str(last_word.penalty); - } - - lines.push(result); - - // Advance by the length of `result`, plus the length of - // `last_word.whitespace` -- even if we had a penalty, we - // need to skip over the whitespace. - idx += len + last_word.whitespace.len(); - } - } - - lines -} - -/// Wrap text into columns with a given total width. -/// -/// The `left_gap`, `middle_gap` and `right_gap` arguments specify the -/// strings to insert before, between, and after the columns. The -/// total width of all columns and all gaps is specified using the -/// `total_width_or_options` argument. This argument can simply be an -/// integer if you want to use default settings when wrapping, or it -/// can be a [`Options`] value if you want to customize the wrapping. -/// -/// If the columns are narrow, it is recommended to set -/// [`Options::break_words`] to `true` to prevent words from -/// protruding into the margins. -/// -/// The per-column width is computed like this: -/// -/// ``` -/// # let (left_gap, middle_gap, right_gap) = ("", "", ""); -/// # let columns = 2; -/// # let options = textwrap::Options::new(80); -/// let inner_width = options.width -/// - textwrap::core::display_width(left_gap) -/// - textwrap::core::display_width(right_gap) -/// - textwrap::core::display_width(middle_gap) * (columns - 1); -/// let column_width = inner_width / columns; -/// ``` -/// -/// The `text` is wrapped using [`wrap`] and the given `options` -/// argument, but the width is overwritten to the computed -/// `column_width`. -/// -/// # Panics -/// -/// Panics if `columns` is zero. -/// -/// # Examples -/// -/// ``` -/// use textwrap::wrap_columns; -/// -/// let text = "\ -/// This is an example text, which is wrapped into three columns. \ -/// Notice how the final column can be shorter than the others."; -/// -/// #[cfg(feature = "smawk")] -/// assert_eq!(wrap_columns(text, 3, 50, "| ", " | ", " |"), -/// vec!["| This is | into three | column can be |", -/// "| an example | columns. | shorter than |", -/// "| text, which | Notice how | the others. |", -/// "| is wrapped | the final | |"]); -/// -/// // Without the `smawk` feature, the middle column is a little more uneven: -/// #[cfg(not(feature = "smawk"))] -/// assert_eq!(wrap_columns(text, 3, 50, "| ", " | ", " |"), -/// vec!["| This is an | three | column can be |", -/// "| example text, | columns. | shorter than |", -/// "| which is | Notice how | the others. |", -/// "| wrapped into | the final | |"]); -pub fn wrap_columns<'a, Opt>( - text: &str, - columns: usize, - total_width_or_options: Opt, - left_gap: &str, - middle_gap: &str, - right_gap: &str, -) -> Vec<String> -where - Opt: Into<Options<'a>>, -{ - assert!(columns > 0); - - let mut options = total_width_or_options.into(); - - let inner_width = options - .width - .saturating_sub(core::display_width(left_gap)) - .saturating_sub(core::display_width(right_gap)) - .saturating_sub(core::display_width(middle_gap) * (columns - 1)); - - let column_width = std::cmp::max(inner_width / columns, 1); - options.width = column_width; - let last_column_padding = " ".repeat(inner_width % column_width); - let wrapped_lines = wrap(text, options); - let lines_per_column = - wrapped_lines.len() / columns + usize::from(wrapped_lines.len() % columns > 0); - let mut lines = Vec::new(); - for line_no in 0..lines_per_column { - let mut line = String::from(left_gap); - for column_no in 0..columns { - match wrapped_lines.get(line_no + column_no * lines_per_column) { - Some(column_line) => { - line.push_str(column_line); - line.push_str(&" ".repeat(column_width - core::display_width(column_line))); - } - None => { - line.push_str(&" ".repeat(column_width)); - } - } - if column_no == columns - 1 { - line.push_str(&last_column_padding); - } else { - line.push_str(middle_gap); - } - } - line.push_str(right_gap); - lines.push(line); - } - - lines -} - -/// Fill `text` in-place without reallocating the input string. -/// -/// This function works by modifying the input string: some `' '` -/// characters will be replaced by `'\n'` characters. The rest of the -/// text remains untouched. -/// -/// Since we can only replace existing whitespace in the input with -/// `'\n'`, we cannot do hyphenation nor can we split words longer -/// than the line width. We also need to use `AsciiSpace` as the word -/// separator since we need `' '` characters between words in order to -/// replace some of them with a `'\n'`. Indentation is also ruled out. -/// In other words, `fill_inplace(width)` behaves as if you had called -/// [`fill`] with these options: -/// -/// ``` -/// # use textwrap::{core, Options, WordSplitter, WordSeparator, WrapAlgorithm}; -/// # let width = 80; -/// Options { -/// width: width, -/// initial_indent: "", -/// subsequent_indent: "", -/// break_words: false, -/// word_separator: WordSeparator::AsciiSpace, -/// wrap_algorithm: WrapAlgorithm::FirstFit, -/// word_splitter: WordSplitter::NoHyphenation, -/// }; -/// ``` -/// -/// The wrap algorithm is [`WrapAlgorithm::FirstFit`] since this -/// is the fastest algorithm — and the main reason to use -/// `fill_inplace` is to get the string broken into newlines as fast -/// as possible. -/// -/// A last difference is that (unlike [`fill`]) `fill_inplace` can -/// leave trailing whitespace on lines. This is because we wrap by -/// inserting a `'\n'` at the final whitespace in the input string: -/// -/// ``` -/// let mut text = String::from("Hello World!"); -/// textwrap::fill_inplace(&mut text, 10); -/// assert_eq!(text, "Hello \nWorld!"); -/// ``` -/// -/// If we didn't do this, the word `World!` would end up being -/// indented. You can avoid this if you make sure that your input text -/// has no double spaces. -/// -/// # Performance -/// -/// In benchmarks, `fill_inplace` is about twice as fast as [`fill`]. -/// Please see the [`linear` -/// benchmark](https://github.com/mgeisler/textwrap/blob/master/benches/linear.rs) -/// for details. -pub fn fill_inplace(text: &mut String, width: usize) { - let mut indices = Vec::new(); - - let mut offset = 0; - for line in text.split('\n') { - let words = WordSeparator::AsciiSpace - .find_words(line) - .collect::<Vec<_>>(); - let wrapped_words = wrap_algorithms::wrap_first_fit(&words, &[width as f64]); - - let mut line_offset = offset; - for words in &wrapped_words[..wrapped_words.len() - 1] { - let line_len = words - .iter() - .map(|word| word.len() + word.whitespace.len()) - .sum::<usize>(); - - line_offset += line_len; - // We've advanced past all ' ' characters -- want to move - // one ' ' backwards and insert our '\n' there. - indices.push(line_offset - 1); - } - - // Advance past entire line, plus the '\n' which was removed - // by the split call above. - offset += line.len() + 1; - } - - let mut bytes = std::mem::take(text).into_bytes(); - for idx in indices { - bytes[idx] = b'\n'; - } - *text = String::from_utf8(bytes).unwrap(); -} - -#[cfg(test)] -mod tests { - use super::*; - - #[cfg(feature = "hyphenation")] - use hyphenation::{Language, Load, Standard}; - - #[test] - fn options_agree_with_usize() { - let opt_usize = Options::from(42_usize); - let opt_options = Options::new(42); - - assert_eq!(opt_usize.width, opt_options.width); - assert_eq!(opt_usize.initial_indent, opt_options.initial_indent); - assert_eq!(opt_usize.subsequent_indent, opt_options.subsequent_indent); - assert_eq!(opt_usize.break_words, opt_options.break_words); - assert_eq!( - opt_usize.word_splitter.split_points("hello-world"), - opt_options.word_splitter.split_points("hello-world") - ); - } - - #[test] - fn no_wrap() { - assert_eq!(wrap("foo", 10), vec!["foo"]); - } - - #[test] - fn wrap_simple() { - assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]); - } - - #[test] - fn to_be_or_not() { - assert_eq!( - wrap( - "To be, or not to be, that is the question.", - Options::new(10).wrap_algorithm(WrapAlgorithm::FirstFit) - ), - vec!["To be, or", "not to be,", "that is", "the", "question."] - ); - } - - #[test] - fn multiple_words_on_first_line() { - assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]); - } - - #[test] - fn long_word() { - assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]); - } - - #[test] - fn long_words() { - assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]); - } - - #[test] - fn max_width() { - assert_eq!(wrap("foo bar", usize::MAX), vec!["foo bar"]); - - let text = "Hello there! This is some English text. \ - It should not be wrapped given the extents below."; - assert_eq!(wrap(text, usize::MAX), vec![text]); - } - - #[test] - fn leading_whitespace() { - assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]); - } - - #[test] - fn leading_whitespace_empty_first_line() { - // If there is no space for the first word, the first line - // will be empty. This is because the string is split into - // words like [" ", "foobar ", "baz"], which puts "foobar " on - // the second line. We never output trailing whitespace - assert_eq!(wrap(" foobar baz", 6), vec!["", "foobar", "baz"]); - } - - #[test] - fn trailing_whitespace() { - // Whitespace is only significant inside a line. After a line - // gets too long and is broken, the first word starts in - // column zero and is not indented. - assert_eq!(wrap("foo bar baz ", 5), vec!["foo", "bar", "baz"]); - } - - #[test] - fn issue_99() { - // We did not reset the in_whitespace flag correctly and did - // not handle single-character words after a line break. - assert_eq!( - wrap("aaabbbccc x yyyzzzwww", 9), - vec!["aaabbbccc", "x", "yyyzzzwww"] - ); - } - - #[test] - fn issue_129() { - // The dash is an em-dash which takes up four bytes. We used - // to panic since we tried to index into the character. - let options = Options::new(1).word_separator(WordSeparator::AsciiSpace); - assert_eq!(wrap("x – x", options), vec!["x", "–", "x"]); - } - - #[test] - fn wide_character_handling() { - assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]); - assert_eq!( - wrap( - "Hello, World!", - Options::new(15).word_separator(WordSeparator::AsciiSpace) - ), - vec!["Hello,", "World!"] - ); - - // Wide characters are allowed to break if the - // unicode-linebreak feature is enabled. - #[cfg(feature = "unicode-linebreak")] - assert_eq!( - wrap( - "Hello, World!", - Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties) - ), - vec!["Hello, W", "orld!"] - ); - } - - #[test] - fn empty_line_is_indented() { - // Previously, indentation was not applied to empty lines. - // However, this is somewhat inconsistent and undesirable if - // the indentation is something like a border ("| ") which you - // want to apply to all lines, empty or not. - let options = Options::new(10).initial_indent("!!!"); - assert_eq!(fill("", &options), "!!!"); - } - - #[test] - fn indent_single_line() { - let options = Options::new(10).initial_indent(">>>"); // No trailing space - assert_eq!(fill("foo", &options), ">>>foo"); - } - - #[test] - fn indent_first_emoji() { - let options = Options::new(10).initial_indent("👉👉"); - assert_eq!( - wrap("x x x x x x x x x x x x x", &options), - vec!["👉👉x x x", "x x x x x", "x x x x x"] - ); - } - - #[test] - fn indent_multiple_lines() { - let options = Options::new(6).initial_indent("* ").subsequent_indent(" "); - assert_eq!( - wrap("foo bar baz", &options), - vec!["* foo", " bar", " baz"] - ); - } - - #[test] - fn indent_break_words() { - let options = Options::new(5).initial_indent("* ").subsequent_indent(" "); - assert_eq!(wrap("foobarbaz", &options), vec!["* foo", " bar", " baz"]); - } - - #[test] - fn initial_indent_break_words() { - // This is a corner-case showing how the long word is broken - // according to the width of the subsequent lines. The first - // fragment of the word no longer fits on the first line, - // which ends up being pure indentation. - let options = Options::new(5).initial_indent("-->"); - assert_eq!(wrap("foobarbaz", &options), vec!["-->", "fooba", "rbaz"]); - } - - #[test] - fn hyphens() { - assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]); - } - - #[test] - fn trailing_hyphen() { - let options = Options::new(5).break_words(false); - assert_eq!(wrap("foobar-", &options), vec!["foobar-"]); - } - - #[test] - fn multiple_hyphens() { - assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]); - } - - #[test] - fn hyphens_flag() { - let options = Options::new(5).break_words(false); - assert_eq!( - wrap("The --foo-bar flag.", &options), - vec!["The", "--foo-", "bar", "flag."] - ); - } - - #[test] - fn repeated_hyphens() { - let options = Options::new(4).break_words(false); - assert_eq!(wrap("foo--bar", &options), vec!["foo--bar"]); - } - - #[test] - fn hyphens_alphanumeric() { - assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]); - } - - #[test] - fn hyphens_non_alphanumeric() { - let options = Options::new(5).break_words(false); - assert_eq!(wrap("foo(-)bar", &options), vec!["foo(-)bar"]); - } - - #[test] - fn multiple_splits() { - assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]); - } - - #[test] - fn forced_split() { - let options = Options::new(5).break_words(false); - assert_eq!(wrap("foobar-baz", &options), vec!["foobar-", "baz"]); - } - - #[test] - fn multiple_unbroken_words_issue_193() { - let options = Options::new(3).break_words(false); - assert_eq!( - wrap("small large tiny", &options), - vec!["small", "large", "tiny"] - ); - assert_eq!( - wrap("small large tiny", &options), - vec!["small", "large", "tiny"] - ); - } - - #[test] - fn very_narrow_lines_issue_193() { - let options = Options::new(1).break_words(false); - assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]); - assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]); - } - - #[test] - fn simple_hyphens() { - let options = Options::new(8).word_splitter(WordSplitter::HyphenSplitter); - assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]); - } - - #[test] - fn no_hyphenation() { - let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation); - assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]); - } - - #[test] - #[cfg(feature = "hyphenation")] - fn auto_hyphenation_double_hyphenation() { - let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); - let options = Options::new(10); - assert_eq!( - wrap("Internationalization", &options), - vec!["Internatio", "nalization"] - ); - - let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary)); - assert_eq!( - wrap("Internationalization", &options), - vec!["Interna-", "tionaliza-", "tion"] - ); - } - - #[test] - #[cfg(feature = "hyphenation")] - fn auto_hyphenation_issue_158() { - let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); - let options = Options::new(10); - assert_eq!( - wrap("participation is the key to success", &options), - vec!["participat", "ion is", "the key to", "success"] - ); - - let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary)); - assert_eq!( - wrap("participation is the key to success", &options), - vec!["partici-", "pation is", "the key to", "success"] - ); - } - - #[test] - #[cfg(feature = "hyphenation")] - fn split_len_hyphenation() { - // Test that hyphenation takes the width of the whitespace - // into account. - let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); - let options = Options::new(15).word_splitter(WordSplitter::Hyphenation(dictionary)); - assert_eq!( - wrap("garbage collection", &options), - vec!["garbage col-", "lection"] - ); - } - - #[test] - #[cfg(feature = "hyphenation")] - fn borrowed_lines() { - // Lines that end with an extra hyphen are owned, the final - // line is borrowed. - use std::borrow::Cow::{Borrowed, Owned}; - let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); - let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary)); - let lines = wrap("Internationalization", &options); - assert_eq!(lines, vec!["Interna-", "tionaliza-", "tion"]); - if let Borrowed(s) = lines[0] { - assert!(false, "should not have been borrowed: {:?}", s); - } - if let Borrowed(s) = lines[1] { - assert!(false, "should not have been borrowed: {:?}", s); - } - if let Owned(ref s) = lines[2] { - assert!(false, "should not have been owned: {:?}", s); - } - } - - #[test] - #[cfg(feature = "hyphenation")] - fn auto_hyphenation_with_hyphen() { - let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); - let options = Options::new(8).break_words(false); - assert_eq!( - wrap("over-caffinated", &options), - vec!["over-", "caffinated"] - ); - - let options = options.word_splitter(WordSplitter::Hyphenation(dictionary)); - assert_eq!( - wrap("over-caffinated", &options), - vec!["over-", "caffi-", "nated"] - ); - } - - #[test] - fn break_words() { - assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]); - } - - #[test] - fn break_words_wide_characters() { - // Even the poor man's version of `ch_width` counts these - // characters as wide. - let options = Options::new(5).word_separator(WordSeparator::AsciiSpace); - assert_eq!(wrap("Hello", options), vec!["He", "ll", "o"]); - } - - #[test] - fn break_words_zero_width() { - assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]); - } - - #[test] - fn break_long_first_word() { - assert_eq!(wrap("testx y", 4), vec!["test", "x y"]); - } - - #[test] - fn break_words_line_breaks() { - assert_eq!(fill("ab\ncdefghijkl", 5), "ab\ncdefg\nhijkl"); - assert_eq!(fill("abcdefgh\nijkl", 5), "abcde\nfgh\nijkl"); - } - - #[test] - fn break_words_empty_lines() { - assert_eq!( - fill("foo\nbar", &Options::new(2).break_words(false)), - "foo\nbar" - ); - } - - #[test] - fn preserve_line_breaks() { - assert_eq!(fill("", 80), ""); - assert_eq!(fill("\n", 80), "\n"); - assert_eq!(fill("\n\n\n", 80), "\n\n\n"); - assert_eq!(fill("test\n", 80), "test\n"); - assert_eq!(fill("test\n\na\n\n", 80), "test\n\na\n\n"); - assert_eq!( - fill( - "1 3 5 7\n1 3 5 7", - Options::new(7).wrap_algorithm(WrapAlgorithm::FirstFit) - ), - "1 3 5 7\n1 3 5 7" - ); - assert_eq!( - fill( - "1 3 5 7\n1 3 5 7", - Options::new(5).wrap_algorithm(WrapAlgorithm::FirstFit) - ), - "1 3 5\n7\n1 3 5\n7" - ); - } - - #[test] - fn preserve_line_breaks_with_whitespace() { - assert_eq!(fill(" ", 80), ""); - assert_eq!(fill(" \n ", 80), "\n"); - assert_eq!(fill(" \n \n \n ", 80), "\n\n\n"); - } - - #[test] - fn non_breaking_space() { - let options = Options::new(5).break_words(false); - assert_eq!(fill("foo bar baz", &options), "foo bar baz"); - } - - #[test] - fn non_breaking_hyphen() { - let options = Options::new(5).break_words(false); - assert_eq!(fill("foo‑bar‑baz", &options), "foo‑bar‑baz"); - } - - #[test] - fn fill_simple() { - assert_eq!(fill("foo bar baz", 10), "foo bar\nbaz"); - } - - #[test] - fn fill_colored_text() { - // The words are much longer than 6 bytes, but they remain - // intact after filling the text. - let green_hello = "\u{1b}[0m\u{1b}[32mHello\u{1b}[0m"; - let blue_world = "\u{1b}[0m\u{1b}[34mWorld!\u{1b}[0m"; - assert_eq!( - fill(&(String::from(green_hello) + " " + &blue_world), 6), - String::from(green_hello) + "\n" + &blue_world - ); - } - - #[test] - fn fill_unicode_boundary() { - // https://github.com/mgeisler/textwrap/issues/390 - fill("\u{1b}!Ͽ", 10); - } - - #[test] - fn fill_inplace_empty() { - let mut text = String::from(""); - fill_inplace(&mut text, 80); - assert_eq!(text, ""); - } - - #[test] - fn fill_inplace_simple() { - let mut text = String::from("foo bar baz"); - fill_inplace(&mut text, 10); - assert_eq!(text, "foo bar\nbaz"); - } - - #[test] - fn fill_inplace_multiple_lines() { - let mut text = String::from("Some text to wrap over multiple lines"); - fill_inplace(&mut text, 12); - assert_eq!(text, "Some text to\nwrap over\nmultiple\nlines"); - } - - #[test] - fn fill_inplace_long_word() { - let mut text = String::from("Internationalization is hard"); - fill_inplace(&mut text, 10); - assert_eq!(text, "Internationalization\nis hard"); - } - - #[test] - fn fill_inplace_no_hyphen_splitting() { - let mut text = String::from("A well-chosen example"); - fill_inplace(&mut text, 10); - assert_eq!(text, "A\nwell-chosen\nexample"); - } - - #[test] - fn fill_inplace_newlines() { - let mut text = String::from("foo bar\n\nbaz\n\n\n"); - fill_inplace(&mut text, 10); - assert_eq!(text, "foo bar\n\nbaz\n\n\n"); - } - - #[test] - fn fill_inplace_newlines_reset_line_width() { - let mut text = String::from("1 3 5\n1 3 5 7 9\n1 3 5 7 9 1 3"); - fill_inplace(&mut text, 10); - assert_eq!(text, "1 3 5\n1 3 5 7 9\n1 3 5 7 9\n1 3"); - } - - #[test] - fn fill_inplace_leading_whitespace() { - let mut text = String::from(" foo bar baz"); - fill_inplace(&mut text, 10); - assert_eq!(text, " foo bar\nbaz"); - } - - #[test] - fn fill_inplace_trailing_whitespace() { - let mut text = String::from("foo bar baz "); - fill_inplace(&mut text, 10); - assert_eq!(text, "foo bar\nbaz "); - } - - #[test] - fn fill_inplace_interior_whitespace() { - // To avoid an unwanted indentation of "baz", it is important - // to replace the final ' ' with '\n'. - let mut text = String::from("foo bar baz"); - fill_inplace(&mut text, 10); - assert_eq!(text, "foo bar \nbaz"); - } - - #[test] - fn unfill_simple() { - let (text, options) = unfill("foo\nbar"); - assert_eq!(text, "foo bar"); - assert_eq!(options.width, 3); - } - - #[test] - fn unfill_trailing_newlines() { - let (text, options) = unfill("foo\nbar\n\n\n"); - assert_eq!(text, "foo bar\n\n\n"); - assert_eq!(options.width, 3); - } - - #[test] - fn unfill_initial_indent() { - let (text, options) = unfill(" foo\nbar\nbaz"); - assert_eq!(text, "foo bar baz"); - assert_eq!(options.width, 5); - assert_eq!(options.initial_indent, " "); - } - - #[test] - fn unfill_differing_indents() { - let (text, options) = unfill(" foo\n bar\n baz"); - assert_eq!(text, "foo bar baz"); - assert_eq!(options.width, 7); - assert_eq!(options.initial_indent, " "); - assert_eq!(options.subsequent_indent, " "); - } - - #[test] - fn unfill_list_item() { - let (text, options) = unfill("* foo\n bar\n baz"); - assert_eq!(text, "foo bar baz"); - assert_eq!(options.width, 5); - assert_eq!(options.initial_indent, "* "); - assert_eq!(options.subsequent_indent, " "); - } - - #[test] - fn unfill_multiple_char_prefix() { - let (text, options) = unfill(" // foo bar\n // baz\n // quux"); - assert_eq!(text, "foo bar baz quux"); - assert_eq!(options.width, 14); - assert_eq!(options.initial_indent, " // "); - assert_eq!(options.subsequent_indent, " // "); - } - - #[test] - fn unfill_block_quote() { - let (text, options) = unfill("> foo\n> bar\n> baz"); - assert_eq!(text, "foo bar baz"); - assert_eq!(options.width, 5); - assert_eq!(options.initial_indent, "> "); - assert_eq!(options.subsequent_indent, "> "); - } - - #[test] - fn unfill_whitespace() { - assert_eq!(unfill("foo bar").0, "foo bar"); - } - - #[test] - fn wrap_columns_empty_text() { - assert_eq!(wrap_columns("", 1, 10, "| ", "", " |"), vec!["| |"]); - } - - #[test] - fn wrap_columns_single_column() { - assert_eq!( - wrap_columns("Foo", 3, 30, "| ", " | ", " |"), - vec!["| Foo | | |"] - ); - } - - #[test] - fn wrap_columns_uneven_columns() { - // The gaps take up a total of 5 columns, so the columns are - // (21 - 5)/4 = 4 columns wide: - assert_eq!( - wrap_columns("Foo Bar Baz Quux", 4, 21, "|", "|", "|"), - vec!["|Foo |Bar |Baz |Quux|"] - ); - // As the total width increases, the last column absorbs the - // excess width: - assert_eq!( - wrap_columns("Foo Bar Baz Quux", 4, 24, "|", "|", "|"), - vec!["|Foo |Bar |Baz |Quux |"] - ); - // Finally, when the width is 25, the columns can be resized - // to a width of (25 - 5)/4 = 5 columns: - assert_eq!( - wrap_columns("Foo Bar Baz Quux", 4, 25, "|", "|", "|"), - vec!["|Foo |Bar |Baz |Quux |"] - ); - } - - #[test] - #[cfg(feature = "unicode-width")] - fn wrap_columns_with_emojis() { - assert_eq!( - wrap_columns( - "Words and a few emojis 😍 wrapped in ⓶ columns", - 2, - 30, - "✨ ", - " ⚽ ", - " 👀" - ), - vec![ - "✨ Words ⚽ wrapped in 👀", - "✨ and a few ⚽ ⓶ columns 👀", - "✨ emojis 😍 ⚽ 👀" - ] - ); - } - - #[test] - fn wrap_columns_big_gaps() { - // The column width shrinks to 1 because the gaps take up all - // the space. - assert_eq!( - wrap_columns("xyz", 2, 10, "----> ", " !!! ", " <----"), - vec![ - "----> x !!! z <----", // - "----> y !!! <----" - ] - ); - } - - #[test] - #[should_panic] - fn wrap_columns_panic_with_zero_columns() { - wrap_columns("", 0, 10, "", "", ""); - } -} diff --git a/vendor/textwrap/src/word_separators.rs b/vendor/textwrap/src/word_separators.rs deleted file mode 100644 index 25adf31..0000000 --- a/vendor/textwrap/src/word_separators.rs +++ /dev/null @@ -1,428 +0,0 @@ -//! Functionality for finding words. -//! -//! In order to wrap text, we need to know where the legal break -//! points are, i.e., where the words of the text are. This means that -//! we need to define what a "word" is. -//! -//! A simple approach is to simply split the text on whitespace, but -//! this does not work for East-Asian languages such as Chinese or -//! Japanese where there are no spaces between words. Breaking a long -//! sequence of emojis is another example where line breaks might be -//! wanted even if there are no whitespace to be found. -//! -//! The [`WordSeparator`] trait is responsible for determining where -//! there words are in a line of text. Please refer to the trait and -//! the structs which implement it for more information. - -#[cfg(feature = "unicode-linebreak")] -use crate::core::skip_ansi_escape_sequence; -use crate::core::Word; - -/// Describes where words occur in a line of text. -/// -/// The simplest approach is say that words are separated by one or -/// more ASCII spaces (`' '`). This works for Western languages -/// without emojis. A more complex approach is to use the Unicode line -/// breaking algorithm, which finds break points in non-ASCII text. -/// -/// The line breaks occur between words, please see -/// [`WordSplitter`](crate::WordSplitter) for options of how to handle -/// hyphenation of individual words. -/// -/// # Examples -/// -/// ``` -/// use textwrap::core::Word; -/// use textwrap::WordSeparator::AsciiSpace; -/// -/// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>(); -/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]); -/// ``` -#[derive(Clone, Copy)] -pub enum WordSeparator { - /// Find words by splitting on runs of `' '` characters. - /// - /// # Examples - /// - /// ``` - /// use textwrap::core::Word; - /// use textwrap::WordSeparator::AsciiSpace; - /// - /// let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>(); - /// assert_eq!(words, vec![Word::from("Hello "), - /// Word::from("World!")]); - /// ``` - AsciiSpace, - - /// Split `line` into words using Unicode break properties. - /// - /// This word separator uses the Unicode line breaking algorithm - /// described in [Unicode Standard Annex - /// #14](https://www.unicode.org/reports/tr14/) to find legal places - /// to break lines. There is a small difference in that the U+002D - /// (Hyphen-Minus) and U+00AD (Soft Hyphen) don’t create a line break: - /// to allow a line break at a hyphen, use - /// [`WordSplitter::HyphenSplitter`](crate::WordSplitter::HyphenSplitter). - /// Soft hyphens are not currently supported. - /// - /// # Examples - /// - /// Unlike [`WordSeparator::AsciiSpace`], the Unicode line - /// breaking algorithm will find line break opportunities between - /// some characters with no intervening whitespace: - /// - /// ``` - /// #[cfg(feature = "unicode-linebreak")] { - /// use textwrap::core::Word; - /// use textwrap::WordSeparator::UnicodeBreakProperties; - /// - /// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂😍").collect::<Vec<_>>(), - /// vec![Word::from("Emojis: "), - /// Word::from("😂"), - /// Word::from("😍")]); - /// - /// assert_eq!(UnicodeBreakProperties.find_words("CJK: 你好").collect::<Vec<_>>(), - /// vec![Word::from("CJK: "), - /// Word::from("你"), - /// Word::from("好")]); - /// } - /// ``` - /// - /// A U+2060 (Word Joiner) character can be inserted if you want to - /// manually override the defaults and keep the characters together: - /// - /// ``` - /// #[cfg(feature = "unicode-linebreak")] { - /// use textwrap::core::Word; - /// use textwrap::WordSeparator::UnicodeBreakProperties; - /// - /// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂\u{2060}😍").collect::<Vec<_>>(), - /// vec![Word::from("Emojis: "), - /// Word::from("😂\u{2060}😍")]); - /// } - /// ``` - /// - /// The Unicode line breaking algorithm will also automatically - /// suppress break breaks around certain punctuation characters:: - /// - /// ``` - /// #[cfg(feature = "unicode-linebreak")] { - /// use textwrap::core::Word; - /// use textwrap::WordSeparator::UnicodeBreakProperties; - /// - /// assert_eq!(UnicodeBreakProperties.find_words("[ foo ] bar !").collect::<Vec<_>>(), - /// vec![Word::from("[ foo ] "), - /// Word::from("bar !")]); - /// } - /// ``` - #[cfg(feature = "unicode-linebreak")] - UnicodeBreakProperties, - - /// Find words using a custom word separator - Custom(fn(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_>), -} - -impl std::fmt::Debug for WordSeparator { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - WordSeparator::AsciiSpace => f.write_str("AsciiSpace"), - #[cfg(feature = "unicode-linebreak")] - WordSeparator::UnicodeBreakProperties => f.write_str("UnicodeBreakProperties"), - WordSeparator::Custom(_) => f.write_str("Custom(...)"), - } - } -} - -impl WordSeparator { - // This function should really return impl Iterator<Item = Word>, but - // this isn't possible until Rust supports higher-kinded types: - // https://github.com/rust-lang/rfcs/blob/master/text/1522-conservative-impl-trait.md - /// Find all words in `line`. - pub fn find_words<'a>(&self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> { - match self { - WordSeparator::AsciiSpace => find_words_ascii_space(line), - #[cfg(feature = "unicode-linebreak")] - WordSeparator::UnicodeBreakProperties => find_words_unicode_break_properties(line), - WordSeparator::Custom(func) => func(line), - } - } -} - -fn find_words_ascii_space<'a>(line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> { - let mut start = 0; - let mut in_whitespace = false; - let mut char_indices = line.char_indices(); - - Box::new(std::iter::from_fn(move || { - // for (idx, ch) in char_indices does not work, gives this - // error: - // - // > cannot move out of `char_indices`, a captured variable in - // > an `FnMut` closure - #[allow(clippy::while_let_on_iterator)] - while let Some((idx, ch)) = char_indices.next() { - if in_whitespace && ch != ' ' { - let word = Word::from(&line[start..idx]); - start = idx; - in_whitespace = ch == ' '; - return Some(word); - } - - in_whitespace = ch == ' '; - } - - if start < line.len() { - let word = Word::from(&line[start..]); - start = line.len(); - return Some(word); - } - - None - })) -} - -// Strip all ANSI escape sequences from `text`. -#[cfg(feature = "unicode-linebreak")] -fn strip_ansi_escape_sequences(text: &str) -> String { - let mut result = String::with_capacity(text.len()); - - let mut chars = text.chars(); - while let Some(ch) = chars.next() { - if skip_ansi_escape_sequence(ch, &mut chars) { - continue; - } - result.push(ch); - } - - result -} - -/// Soft hyphen, also knows as a “shy hyphen”. Should show up as ‘-’ -/// if a line is broken at this point, and otherwise be invisible. -/// Textwrap does not currently support breaking words at soft -/// hyphens. -#[cfg(feature = "unicode-linebreak")] -const SHY: char = '\u{00ad}'; - -/// Find words in line. ANSI escape sequences are ignored in `line`. -#[cfg(feature = "unicode-linebreak")] -fn find_words_unicode_break_properties<'a>( - line: &'a str, -) -> Box<dyn Iterator<Item = Word<'a>> + 'a> { - // Construct an iterator over (original index, stripped index) - // tuples. We find the Unicode linebreaks on a stripped string, - // but we need the original indices so we can form words based on - // the original string. - let mut last_stripped_idx = 0; - let mut char_indices = line.char_indices(); - let mut idx_map = std::iter::from_fn(move || match char_indices.next() { - Some((orig_idx, ch)) => { - let stripped_idx = last_stripped_idx; - if !skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) { - last_stripped_idx += ch.len_utf8(); - } - Some((orig_idx, stripped_idx)) - } - None => None, - }); - - let stripped = strip_ansi_escape_sequences(line); - let mut opportunities = unicode_linebreak::linebreaks(&stripped) - .filter(|(idx, _)| { - #[allow(clippy::match_like_matches_macro)] - match &stripped[..*idx].chars().next_back() { - // We suppress breaks at ‘-’ since we want to control - // this via the WordSplitter. - Some('-') => false, - // Soft hyphens are currently not supported since we - // require all `Word` fragments to be continuous in - // the input string. - Some(SHY) => false, - // Other breaks should be fine! - _ => true, - } - }) - .collect::<Vec<_>>() - .into_iter(); - - // Remove final break opportunity, we will add it below using - // &line[start..]; This ensures that we correctly include a - // trailing ANSI escape sequence. - opportunities.next_back(); - - let mut start = 0; - Box::new(std::iter::from_fn(move || { - #[allow(clippy::while_let_on_iterator)] - while let Some((idx, _)) = opportunities.next() { - if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx) { - let word = Word::from(&line[start..orig_idx]); - start = orig_idx; - return Some(word); - } - } - - if start < line.len() { - let word = Word::from(&line[start..]); - start = line.len(); - return Some(word); - } - - None - })) -} - -#[cfg(test)] -mod tests { - use super::WordSeparator::*; - use super::*; - - // Like assert_eq!, but the left expression is an iterator. - macro_rules! assert_iter_eq { - ($left:expr, $right:expr) => { - assert_eq!($left.collect::<Vec<_>>(), $right); - }; - } - - fn to_words<'a>(words: Vec<&'a str>) -> Vec<Word<'a>> { - words.into_iter().map(|w: &str| Word::from(&w)).collect() - } - - macro_rules! test_find_words { - ($ascii_name:ident, - $unicode_name:ident, - $([ $line:expr, $ascii_words:expr, $unicode_words:expr ]),+) => { - #[test] - fn $ascii_name() { - $( - let expected_words = to_words($ascii_words.to_vec()); - let actual_words = WordSeparator::AsciiSpace - .find_words($line) - .collect::<Vec<_>>(); - assert_eq!(actual_words, expected_words, "Line: {:?}", $line); - )+ - } - - #[test] - #[cfg(feature = "unicode-linebreak")] - fn $unicode_name() { - $( - let expected_words = to_words($unicode_words.to_vec()); - let actual_words = WordSeparator::UnicodeBreakProperties - .find_words($line) - .collect::<Vec<_>>(); - assert_eq!(actual_words, expected_words, "Line: {:?}", $line); - )+ - } - }; - } - - test_find_words!(ascii_space_empty, unicode_empty, ["", [], []]); - - test_find_words!( - ascii_single_word, - unicode_single_word, - ["foo", ["foo"], ["foo"]] - ); - - test_find_words!( - ascii_two_words, - unicode_two_words, - ["foo bar", ["foo ", "bar"], ["foo ", "bar"]] - ); - - test_find_words!( - ascii_multiple_words, - unicode_multiple_words, - ["foo bar", ["foo ", "bar"], ["foo ", "bar"]], - ["x y z", ["x ", "y ", "z"], ["x ", "y ", "z"]] - ); - - test_find_words!( - ascii_only_whitespace, - unicode_only_whitespace, - [" ", [" "], [" "]], - [" ", [" "], [" "]] - ); - - test_find_words!( - ascii_inter_word_whitespace, - unicode_inter_word_whitespace, - ["foo bar", ["foo ", "bar"], ["foo ", "bar"]] - ); - - test_find_words!( - ascii_trailing_whitespace, - unicode_trailing_whitespace, - ["foo ", ["foo "], ["foo "]] - ); - - test_find_words!( - ascii_leading_whitespace, - unicode_leading_whitespace, - [" foo", [" ", "foo"], [" ", "foo"]] - ); - - test_find_words!( - ascii_multi_column_char, - unicode_multi_column_char, - ["\u{1f920}", ["\u{1f920}"], ["\u{1f920}"]] // cowboy emoji 🤠 - ); - - test_find_words!( - ascii_hyphens, - unicode_hyphens, - ["foo-bar", ["foo-bar"], ["foo-bar"]], - ["foo- bar", ["foo- ", "bar"], ["foo- ", "bar"]], - ["foo - bar", ["foo ", "- ", "bar"], ["foo ", "- ", "bar"]], - ["foo -bar", ["foo ", "-bar"], ["foo ", "-bar"]] - ); - - test_find_words!( - ascii_newline, - unicode_newline, - ["foo\nbar", ["foo\nbar"], ["foo\n", "bar"]] - ); - - test_find_words!( - ascii_tab, - unicode_tab, - ["foo\tbar", ["foo\tbar"], ["foo\t", "bar"]] - ); - - test_find_words!( - ascii_non_breaking_space, - unicode_non_breaking_space, - ["foo\u{00A0}bar", ["foo\u{00A0}bar"], ["foo\u{00A0}bar"]] - ); - - #[test] - #[cfg(unix)] - fn find_words_colored_text() { - use termion::color::{Blue, Fg, Green, Reset}; - - let green_hello = format!("{}Hello{} ", Fg(Green), Fg(Reset)); - let blue_world = format!("{}World!{}", Fg(Blue), Fg(Reset)); - assert_iter_eq!( - AsciiSpace.find_words(&format!("{}{}", green_hello, blue_world)), - vec![Word::from(&green_hello), Word::from(&blue_world)] - ); - - #[cfg(feature = "unicode-linebreak")] - assert_iter_eq!( - UnicodeBreakProperties.find_words(&format!("{}{}", green_hello, blue_world)), - vec![Word::from(&green_hello), Word::from(&blue_world)] - ); - } - - #[test] - fn find_words_color_inside_word() { - let text = "foo\u{1b}[0m\u{1b}[32mbar\u{1b}[0mbaz"; - assert_iter_eq!(AsciiSpace.find_words(&text), vec![Word::from(text)]); - - #[cfg(feature = "unicode-linebreak")] - assert_iter_eq!( - UnicodeBreakProperties.find_words(&text), - vec![Word::from(text)] - ); - } -} diff --git a/vendor/textwrap/src/word_splitters.rs b/vendor/textwrap/src/word_splitters.rs deleted file mode 100644 index 69e246f..0000000 --- a/vendor/textwrap/src/word_splitters.rs +++ /dev/null @@ -1,314 +0,0 @@ -//! Word splitting functionality. -//! -//! To wrap text into lines, long words sometimes need to be split -//! across lines. The [`WordSplitter`] enum defines this -//! functionality. - -use crate::core::{display_width, Word}; - -/// The `WordSplitter` enum describes where words can be split. -/// -/// If the textwrap crate has been compiled with the `hyphenation` -/// Cargo feature enabled, you will find a -/// [`WordSplitter::Hyphenation`] variant. Use this struct for -/// language-aware hyphenation: -/// -/// ``` -/// #[cfg(feature = "hyphenation")] { -/// use hyphenation::{Language, Load, Standard}; -/// use textwrap::{wrap, Options, WordSplitter}; -/// -/// let text = "Oxidation is the loss of electrons."; -/// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); -/// let options = Options::new(8).word_splitter(WordSplitter::Hyphenation(dictionary)); -/// assert_eq!(wrap(text, &options), vec!["Oxida-", -/// "tion is", -/// "the loss", -/// "of elec-", -/// "trons."]); -/// } -/// ``` -/// -/// Please see the documentation for the [hyphenation] crate for more -/// details. -/// -/// [hyphenation]: https://docs.rs/hyphenation/ -#[derive(Clone)] -pub enum WordSplitter { - /// Use this as a [`Options.word_splitter`] to avoid any kind of - /// hyphenation: - /// - /// ``` - /// use textwrap::{wrap, Options, WordSplitter}; - /// - /// let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation); - /// assert_eq!(wrap("foo bar-baz", &options), - /// vec!["foo", "bar-baz"]); - /// ``` - /// - /// [`Options.word_splitter`]: super::Options::word_splitter - NoHyphenation, - - /// `HyphenSplitter` is the default `WordSplitter` used by - /// [`Options::new`](super::Options::new). It will split words on - /// existing hyphens in the word. - /// - /// It will only use hyphens that are surrounded by alphanumeric - /// characters, which prevents a word like `"--foo-bar"` from - /// being split into `"--"` and `"foo-bar"`. - /// - /// # Examples - /// - /// ``` - /// use textwrap::WordSplitter; - /// - /// assert_eq!(WordSplitter::HyphenSplitter.split_points("--foo-bar"), - /// vec![6]); - /// ``` - HyphenSplitter, - - /// Use a custom function as the word splitter. - /// - /// This varian lets you implement a custom word splitter using - /// your own function. - /// - /// # Examples - /// - /// ``` - /// use textwrap::WordSplitter; - /// - /// fn split_at_underscore(word: &str) -> Vec<usize> { - /// word.match_indices('_').map(|(idx, _)| idx + 1).collect() - /// } - /// - /// let word_splitter = WordSplitter::Custom(split_at_underscore); - /// assert_eq!(word_splitter.split_points("a_long_identifier"), - /// vec![2, 7]); - /// ``` - Custom(fn(word: &str) -> Vec<usize>), - - /// A hyphenation dictionary can be used to do language-specific - /// hyphenation using patterns from the [hyphenation] crate. - /// - /// **Note:** Only available when the `hyphenation` Cargo feature is - /// enabled. - /// - /// [hyphenation]: https://docs.rs/hyphenation/ - #[cfg(feature = "hyphenation")] - Hyphenation(hyphenation::Standard), -} - -impl std::fmt::Debug for WordSplitter { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - WordSplitter::NoHyphenation => f.write_str("NoHyphenation"), - WordSplitter::HyphenSplitter => f.write_str("HyphenSplitter"), - WordSplitter::Custom(_) => f.write_str("Custom(...)"), - #[cfg(feature = "hyphenation")] - WordSplitter::Hyphenation(dict) => write!(f, "Hyphenation({})", dict.language()), - } - } -} - -impl PartialEq<WordSplitter> for WordSplitter { - fn eq(&self, other: &WordSplitter) -> bool { - match (self, other) { - (WordSplitter::NoHyphenation, WordSplitter::NoHyphenation) => true, - (WordSplitter::HyphenSplitter, WordSplitter::HyphenSplitter) => true, - #[cfg(feature = "hyphenation")] - (WordSplitter::Hyphenation(this_dict), WordSplitter::Hyphenation(other_dict)) => { - this_dict.language() == other_dict.language() - } - (_, _) => false, - } - } -} - -impl WordSplitter { - /// Return all possible indices where `word` can be split. - /// - /// The indices are in the range `0..word.len()`. They point to - /// the index _after_ the split point, i.e., after `-` if - /// splitting on hyphens. This way, `word.split_at(idx)` will - /// break the word into two well-formed pieces. - /// - /// # Examples - /// - /// ``` - /// use textwrap::WordSplitter; - /// assert_eq!(WordSplitter::NoHyphenation.split_points("cannot-be-split"), vec![]); - /// assert_eq!(WordSplitter::HyphenSplitter.split_points("can-be-split"), vec![4, 7]); - /// assert_eq!(WordSplitter::Custom(|word| vec![word.len()/2]).split_points("middle"), vec![3]); - /// ``` - pub fn split_points(&self, word: &str) -> Vec<usize> { - match self { - WordSplitter::NoHyphenation => Vec::new(), - WordSplitter::HyphenSplitter => { - let mut splits = Vec::new(); - - for (idx, _) in word.match_indices('-') { - // We only use hyphens that are surrounded by alphanumeric - // characters. This is to avoid splitting on repeated hyphens, - // such as those found in --foo-bar. - let prev = word[..idx].chars().next_back(); - let next = word[idx + 1..].chars().next(); - - if prev.filter(|ch| ch.is_alphanumeric()).is_some() - && next.filter(|ch| ch.is_alphanumeric()).is_some() - { - splits.push(idx + 1); // +1 due to width of '-'. - } - } - - splits - } - WordSplitter::Custom(splitter_func) => splitter_func(word), - #[cfg(feature = "hyphenation")] - WordSplitter::Hyphenation(dictionary) => { - use hyphenation::Hyphenator; - dictionary.hyphenate(word).breaks - } - } - } -} - -/// Split words into smaller words according to the split points given -/// by `word_splitter`. -/// -/// Note that we split all words, regardless of their length. This is -/// to more cleanly separate the business of splitting (including -/// automatic hyphenation) from the business of word wrapping. -pub fn split_words<'a, I>( - words: I, - word_splitter: &'a WordSplitter, -) -> impl Iterator<Item = Word<'a>> -where - I: IntoIterator<Item = Word<'a>>, -{ - words.into_iter().flat_map(move |word| { - let mut prev = 0; - let mut split_points = word_splitter.split_points(&word).into_iter(); - std::iter::from_fn(move || { - if let Some(idx) = split_points.next() { - let need_hyphen = !word[..idx].ends_with('-'); - let w = Word { - word: &word.word[prev..idx], - width: display_width(&word[prev..idx]), - whitespace: "", - penalty: if need_hyphen { "-" } else { "" }, - }; - prev = idx; - return Some(w); - } - - if prev < word.word.len() || prev == 0 { - let w = Word { - word: &word.word[prev..], - width: display_width(&word[prev..]), - whitespace: word.whitespace, - penalty: word.penalty, - }; - prev = word.word.len() + 1; - return Some(w); - } - - None - }) - }) -} - -#[cfg(test)] -mod tests { - use super::*; - - // Like assert_eq!, but the left expression is an iterator. - macro_rules! assert_iter_eq { - ($left:expr, $right:expr) => { - assert_eq!($left.collect::<Vec<_>>(), $right); - }; - } - - #[test] - fn split_words_no_words() { - assert_iter_eq!(split_words(vec![], &WordSplitter::HyphenSplitter), vec![]); - } - - #[test] - fn split_words_empty_word() { - assert_iter_eq!( - split_words(vec![Word::from(" ")], &WordSplitter::HyphenSplitter), - vec![Word::from(" ")] - ); - } - - #[test] - fn split_words_single_word() { - assert_iter_eq!( - split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter), - vec![Word::from("foobar")] - ); - } - - #[test] - fn split_words_hyphen_splitter() { - assert_iter_eq!( - split_words(vec![Word::from("foo-bar")], &WordSplitter::HyphenSplitter), - vec![Word::from("foo-"), Word::from("bar")] - ); - } - - #[test] - fn split_words_no_hyphenation() { - assert_iter_eq!( - split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation), - vec![Word::from("foo-bar")] - ); - } - - #[test] - fn split_words_adds_penalty() { - let fixed_split_point = |_: &str| vec![3]; - - assert_iter_eq!( - split_words( - vec![Word::from("foobar")].into_iter(), - &WordSplitter::Custom(fixed_split_point) - ), - vec![ - Word { - word: "foo", - width: 3, - whitespace: "", - penalty: "-" - }, - Word { - word: "bar", - width: 3, - whitespace: "", - penalty: "" - } - ] - ); - - assert_iter_eq!( - split_words( - vec![Word::from("fo-bar")].into_iter(), - &WordSplitter::Custom(fixed_split_point) - ), - vec![ - Word { - word: "fo-", - width: 3, - whitespace: "", - penalty: "" - }, - Word { - word: "bar", - width: 3, - whitespace: "", - penalty: "" - } - ] - ); - } -} diff --git a/vendor/textwrap/src/wrap_algorithms.rs b/vendor/textwrap/src/wrap_algorithms.rs deleted file mode 100644 index 5ca49c3..0000000 --- a/vendor/textwrap/src/wrap_algorithms.rs +++ /dev/null @@ -1,381 +0,0 @@ -//! Word wrapping algorithms. -//! -//! After a text has been broken into words (or [`Fragment`]s), one -//! now has to decide how to break the fragments into lines. The -//! simplest algorithm for this is implemented by [`wrap_first_fit`]: -//! it uses no look-ahead and simply adds fragments to the line as -//! long as they fit. However, this can lead to poor line breaks if a -//! large fragment almost-but-not-quite fits on a line. When that -//! happens, the fragment is moved to the next line and it will leave -//! behind a large gap. A more advanced algorithm, implemented by -//! [`wrap_optimal_fit`], will take this into account. The optimal-fit -//! algorithm considers all possible line breaks and will attempt to -//! minimize the gaps left behind by overly short lines. -//! -//! While both algorithms run in linear time, the first-fit algorithm -//! is about 4 times faster than the optimal-fit algorithm. - -#[cfg(feature = "smawk")] -mod optimal_fit; -#[cfg(feature = "smawk")] -pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties}; - -use crate::core::{Fragment, Word}; - -/// Describes how to wrap words into lines. -/// -/// The simplest approach is to wrap words one word at a time and -/// accept the first way of wrapping which fit -/// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is -/// enabled, a more complex algorithm is available which will look at -/// an entire paragraph at a time in order to find optimal line breaks -/// ([`WrapAlgorithm::OptimalFit`]). -#[derive(Clone, Copy)] -pub enum WrapAlgorithm { - /// Wrap words using a fast and simple algorithm. - /// - /// This algorithm uses no look-ahead when finding line breaks. - /// Implemented by [`wrap_first_fit`], please see that function for - /// details and examples. - FirstFit, - - /// Wrap words using an advanced algorithm with look-ahead. - /// - /// This wrapping algorithm considers the entire paragraph to find - /// optimal line breaks. When wrapping text, "penalties" are - /// assigned to line breaks based on the gaps left at the end of - /// lines. See [`Penalties`] for details. - /// - /// The underlying wrapping algorithm is implemented by - /// [`wrap_optimal_fit`], please see that function for examples. - /// - /// **Note:** Only available when the `smawk` Cargo feature is - /// enabled. - #[cfg(feature = "smawk")] - OptimalFit(Penalties), - - /// Custom wrapping function. - /// - /// Use this if you want to implement your own wrapping algorithm. - /// The function can freely decide how to turn a slice of - /// [`Word`]s into lines. - /// - /// # Example - /// - /// ``` - /// use textwrap::core::Word; - /// use textwrap::{wrap, Options, WrapAlgorithm}; - /// - /// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> { - /// let mut lines = Vec::new(); - /// let mut step = 1; - /// let mut start_idx = 0; - /// while start_idx + step <= words.len() { - /// lines.push(&words[start_idx .. start_idx+step]); - /// start_idx += step; - /// step += 1; - /// } - /// lines - /// } - /// - /// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair)); - /// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options), - /// vec!["First,", - /// "second, third,", - /// "fourth, fifth, sixth"]); - /// ``` - Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>), -} - -impl std::fmt::Debug for WrapAlgorithm { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - WrapAlgorithm::FirstFit => f.write_str("FirstFit"), - #[cfg(feature = "smawk")] - WrapAlgorithm::OptimalFit(penalties) => write!(f, "OptimalFit({:?})", penalties), - WrapAlgorithm::Custom(_) => f.write_str("Custom(...)"), - } - } -} - -impl WrapAlgorithm { - /// Create new wrap algorithm. - /// - /// The best wrapping algorithm is used by default, i.e., - /// [`WrapAlgorithm::OptimalFit`] if available, otherwise - /// [`WrapAlgorithm::FirstFit`]. - pub const fn new() -> Self { - #[cfg(not(feature = "smawk"))] - { - WrapAlgorithm::FirstFit - } - - #[cfg(feature = "smawk")] - { - WrapAlgorithm::new_optimal_fit() - } - } - - /// New [`WrapAlgorithm::OptimalFit`] with default penalties. This - /// works well for monospace text. - /// - /// **Note:** Only available when the `smawk` Cargo feature is - /// enabled. - #[cfg(feature = "smawk")] - pub const fn new_optimal_fit() -> Self { - WrapAlgorithm::OptimalFit(Penalties::new()) - } - - /// Wrap words according to line widths. - /// - /// The `line_widths` slice gives the target line width for each - /// line (the last slice element is repeated as necessary). This - /// can be used to implement hanging indentation. - #[inline] - pub fn wrap<'a, 'b>( - &self, - words: &'b [Word<'a>], - line_widths: &'b [usize], - ) -> Vec<&'b [Word<'a>]> { - // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53 - // = 9_007_199_254_740_992 can be represented without loss by - // a f64. Larger line widths will be rounded to the nearest - // representable number. - let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>(); - - match self { - WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths), - - #[cfg(feature = "smawk")] - WrapAlgorithm::OptimalFit(penalties) => { - // The computation cannnot overflow when the line - // widths are restricted to usize. - wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap() - } - - WrapAlgorithm::Custom(func) => func(words, line_widths), - } - } -} - -impl Default for WrapAlgorithm { - fn default() -> Self { - WrapAlgorithm::new() - } -} - -/// Wrap abstract fragments into lines with a first-fit algorithm. -/// -/// The `line_widths` slice gives the target line width for each line -/// (the last slice element is repeated as necessary). This can be -/// used to implement hanging indentation. -/// -/// The fragments must already have been split into the desired -/// widths, this function will not (and cannot) attempt to split them -/// further when arranging them into lines. -/// -/// # First-Fit Algorithm -/// -/// This implements a simple “greedy” algorithm: accumulate fragments -/// one by one and when a fragment no longer fits, start a new line. -/// There is no look-ahead, we simply take first fit of the fragments -/// we find. -/// -/// While fast and predictable, this algorithm can produce poor line -/// breaks when a long fragment is moved to a new line, leaving behind -/// a large gap: -/// -/// ``` -/// use textwrap::core::Word; -/// use textwrap::wrap_algorithms::wrap_first_fit; -/// use textwrap::WordSeparator; -/// -/// // Helper to convert wrapped lines to a Vec<String>. -/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> { -/// lines.iter().map(|line| { -/// line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ") -/// }).collect::<Vec<_>>() -/// } -/// -/// let text = "These few words will unfortunately not wrap nicely."; -/// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>(); -/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])), -/// vec!["These few words", -/// "will", // <-- short line -/// "unfortunately", -/// "not wrap", -/// "nicely."]); -/// -/// // We can avoid the short line if we look ahead: -/// #[cfg(feature = "smawk")] -/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties}; -/// #[cfg(feature = "smawk")] -/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()), -/// vec!["These few", -/// "words will", -/// "unfortunately", -/// "not wrap", -/// "nicely."]); -/// ``` -/// -/// The [`wrap_optimal_fit`] function was used above to get better -/// line breaks. It uses an advanced algorithm which tries to avoid -/// short lines. This function is about 4 times faster than -/// [`wrap_optimal_fit`]. -/// -/// # Examples -/// -/// Imagine you're building a house site and you have a number of -/// tasks you need to execute. Things like pour foundation, complete -/// framing, install plumbing, electric cabling, install insulation. -/// -/// The construction workers can only work during daytime, so they -/// need to pack up everything at night. Because they need to secure -/// their tools and move machines back to the garage, this process -/// takes much more time than the time it would take them to simply -/// switch to another task. -/// -/// You would like to make a list of tasks to execute every day based -/// on your estimates. You can model this with a program like this: -/// -/// ``` -/// use textwrap::core::{Fragment, Word}; -/// use textwrap::wrap_algorithms::wrap_first_fit; -/// -/// #[derive(Debug)] -/// struct Task<'a> { -/// name: &'a str, -/// hours: f64, // Time needed to complete task. -/// sweep: f64, // Time needed for a quick sweep after task during the day. -/// cleanup: f64, // Time needed for full cleanup if day ends with this task. -/// } -/// -/// impl Fragment for Task<'_> { -/// fn width(&self) -> f64 { self.hours } -/// fn whitespace_width(&self) -> f64 { self.sweep } -/// fn penalty_width(&self) -> f64 { self.cleanup } -/// } -/// -/// // The morning tasks -/// let tasks = vec![ -/// Task { name: "Foundation", hours: 4.0, sweep: 2.0, cleanup: 3.0 }, -/// Task { name: "Framing", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, -/// Task { name: "Plumbing", hours: 2.0, sweep: 2.0, cleanup: 2.0 }, -/// Task { name: "Electrical", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, -/// Task { name: "Insulation", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, -/// Task { name: "Drywall", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, -/// Task { name: "Floors", hours: 3.0, sweep: 1.0, cleanup: 2.0 }, -/// Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 }, -/// Task { name: "Bathrooms", hours: 2.0, sweep: 1.0, cleanup: 2.0 }, -/// ]; -/// -/// // Fill tasks into days, taking `day_length` into account. The -/// // output shows the hours worked per day along with the names of -/// // the tasks for that day. -/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> { -/// let mut days = Vec::new(); -/// // Assign tasks to days. The assignment is a vector of slices, -/// // with a slice per day. -/// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]); -/// for day in assigned_days.iter() { -/// let last = day.last().unwrap(); -/// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum(); -/// let names = day.iter().map(|t| t.name).collect::<Vec<_>>(); -/// days.push((work_hours - last.sweep + last.cleanup, names)); -/// } -/// days -/// } -/// -/// // With a single crew working 8 hours a day: -/// assert_eq!( -/// assign_days(&tasks, 8.0), -/// [ -/// (7.0, vec!["Foundation"]), -/// (8.0, vec!["Framing", "Plumbing"]), -/// (7.0, vec!["Electrical", "Insulation"]), -/// (5.0, vec!["Drywall"]), -/// (7.0, vec!["Floors", "Countertops"]), -/// (4.0, vec!["Bathrooms"]), -/// ] -/// ); -/// -/// // With two crews working in shifts, 16 hours a day: -/// assert_eq!( -/// assign_days(&tasks, 16.0), -/// [ -/// (14.0, vec!["Foundation", "Framing", "Plumbing"]), -/// (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]), -/// (6.0, vec!["Countertops", "Bathrooms"]), -/// ] -/// ); -/// ``` -/// -/// Apologies to anyone who actually knows how to build a house and -/// knows how long each step takes :-) -pub fn wrap_first_fit<'a, 'b, T: Fragment>( - fragments: &'a [T], - line_widths: &'b [f64], -) -> Vec<&'a [T]> { - // The final line width is used for all remaining lines. - let default_line_width = line_widths.last().copied().unwrap_or(0.0); - let mut lines = Vec::new(); - let mut start = 0; - let mut width = 0.0; - - for (idx, fragment) in fragments.iter().enumerate() { - let line_width = line_widths - .get(lines.len()) - .copied() - .unwrap_or(default_line_width); - if width + fragment.width() + fragment.penalty_width() > line_width && idx > start { - lines.push(&fragments[start..idx]); - start = idx; - width = 0.0; - } - width += fragment.width() + fragment.whitespace_width(); - } - lines.push(&fragments[start..]); - lines -} - -#[cfg(test)] -mod tests { - use super::*; - - #[derive(Debug, PartialEq)] - struct Word(f64); - - #[rustfmt::skip] - impl Fragment for Word { - fn width(&self) -> f64 { self.0 } - fn whitespace_width(&self) -> f64 { 1.0 } - fn penalty_width(&self) -> f64 { 0.0 } - } - - #[test] - fn wrap_string_longer_than_f64() { - let words = vec![ - Word(1e307), - Word(2e307), - Word(3e307), - Word(4e307), - Word(5e307), - Word(6e307), - ]; - // Wrap at just under f64::MAX (~19e307). The tiny - // whitespace_widths disappear because of loss of precision. - assert_eq!( - wrap_first_fit(&words, &[15e307]), - &[ - vec![ - Word(1e307), - Word(2e307), - Word(3e307), - Word(4e307), - Word(5e307) - ], - vec![Word(6e307)] - ] - ); - } -} diff --git a/vendor/textwrap/src/wrap_algorithms/optimal_fit.rs b/vendor/textwrap/src/wrap_algorithms/optimal_fit.rs deleted file mode 100644 index 0625e28..0000000 --- a/vendor/textwrap/src/wrap_algorithms/optimal_fit.rs +++ /dev/null @@ -1,433 +0,0 @@ -use std::cell::RefCell; - -use crate::core::Fragment; - -/// Penalties for -/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit) -/// and [`wrap_optimal_fit`]. -/// -/// This wrapping algorithm in [`wrap_optimal_fit`] considers the -/// entire paragraph to find optimal line breaks. When wrapping text, -/// "penalties" are assigned to line breaks based on the gaps left at -/// the end of lines. The penalties are given by this struct, with -/// [`Penalties::default`] assigning penalties that work well for -/// monospace text. -/// -/// If you are wrapping proportional text, you are advised to assign -/// your own penalties according to your font size. See the individual -/// penalties below for details. -/// -/// **Note:** Only available when the `smawk` Cargo feature is -/// enabled. -#[derive(Clone, Copy, Debug)] -pub struct Penalties { - /// Per-line penalty. This is added for every line, which makes it - /// expensive to output more lines than the minimum required. - pub nline_penalty: usize, - - /// Per-character cost for lines that overflow the target line width. - /// - /// With a default value of 50², every single character costs as - /// much as leaving a gap of 50 characters behind. This is because - /// we assign as cost of `gap * gap` to a short line. When - /// wrapping monospace text, we can overflow the line by 1 - /// character in extreme cases: - /// - /// ``` - /// use textwrap::core::Word; - /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties}; - /// - /// let short = "foo "; - /// let long = "x".repeat(50); - /// let length = (short.len() + long.len()) as f64; - /// let fragments = vec![Word::from(short), Word::from(&long)]; - /// let penalties = Penalties::new(); - /// - /// // Perfect fit, both words are on a single line with no overflow. - /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap(); - /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); - /// - /// // The words no longer fit, yet we get a single line back. While - /// // the cost of overflow (`1 * 2500`) is the same as the cost of the - /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty` - /// // which makes it cheaper to overflow than to use two lines. - /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap(); - /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); - /// - /// // The cost of overflow would be 2 * 2500, whereas the cost of - /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 = - /// // 3401`. We therefore get two lines. - /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap(); - /// assert_eq!(wrapped, vec![&[Word::from(short)], - /// &[Word::from(&long)]]); - /// ``` - /// - /// This only happens if the overflowing word is 50 characters - /// long _and_ if the word overflows the line by exactly one - /// character. If it overflows by more than one character, the - /// overflow penalty will quickly outgrow the cost of the gap, as - /// seen above. - pub overflow_penalty: usize, - - /// When should the a single word on the last line be considered - /// "too short"? - /// - /// If the last line of the text consist of a single word and if - /// this word is shorter than `1 / short_last_line_fraction` of - /// the line width, then the final line will be considered "short" - /// and `short_last_line_penalty` is added as an extra penalty. - /// - /// The effect of this is to avoid a final line consisting of a - /// single small word. For example, with a - /// `short_last_line_penalty` of 25 (the default), a gap of up to - /// 5 columns will be seen as more desirable than having a final - /// short line. - /// - /// ## Examples - /// - /// ``` - /// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm}; - /// - /// let text = "This is a demo of the short last line penalty."; - /// - /// // The first-fit algorithm leaves a single short word on the last line: - /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)), - /// vec!["This is a demo of the short last line", - /// "penalty."]); - /// - /// #[cfg(feature = "smawk")] { - /// let mut penalties = wrap_algorithms::Penalties::new(); - /// - /// // Since "penalty." is shorter than 25% of the line width, the - /// // optimal-fit algorithm adds a penalty of 25. This is enough - /// // to move "line " down: - /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), - /// vec!["This is a demo of the short last", - /// "line penalty."]); - /// - /// // We can change the meaning of "short" lines. Here, only words - /// // shorter than 1/10th of the line width will be considered short: - /// penalties.short_last_line_fraction = 10; - /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), - /// vec!["This is a demo of the short last line", - /// "penalty."]); - /// - /// // If desired, the penalty can also be disabled: - /// penalties.short_last_line_fraction = 4; - /// penalties.short_last_line_penalty = 0; - /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), - /// vec!["This is a demo of the short last line", - /// "penalty."]); - /// } - /// ``` - pub short_last_line_fraction: usize, - - /// Penalty for a last line with a single short word. - /// - /// Set this to zero if you do not want to penalize short last lines. - pub short_last_line_penalty: usize, - - /// Penalty for lines ending with a hyphen. - pub hyphen_penalty: usize, -} - -impl Penalties { - /// Default penalties for monospace text. - /// - /// The penalties here work well for monospace text. This is - /// because they expect the gaps at the end of lines to be roughly - /// in the range `0..100`. If the gaps are larger, the - /// `overflow_penalty` and `hyphen_penalty` become insignificant. - pub const fn new() -> Self { - Penalties { - nline_penalty: 1000, - overflow_penalty: 50 * 50, - short_last_line_fraction: 4, - short_last_line_penalty: 25, - hyphen_penalty: 25, - } - } -} - -impl Default for Penalties { - fn default() -> Self { - Self::new() - } -} - -/// Cache for line numbers. This is necessary to avoid a O(n**2) -/// behavior when computing line numbers in [`wrap_optimal_fit`]. -struct LineNumbers { - line_numbers: RefCell<Vec<usize>>, -} - -impl LineNumbers { - fn new(size: usize) -> Self { - let mut line_numbers = Vec::with_capacity(size); - line_numbers.push(0); - LineNumbers { - line_numbers: RefCell::new(line_numbers), - } - } - - fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize { - while self.line_numbers.borrow_mut().len() < i + 1 { - let pos = self.line_numbers.borrow().len(); - let line_number = 1 + self.get(minima[pos].0, minima); - self.line_numbers.borrow_mut().push(line_number); - } - - self.line_numbers.borrow()[i] - } -} - -/// Overflow error during the [`wrap_optimal_fit`] computation. -#[derive(Debug, PartialEq, Eq)] -pub struct OverflowError; - -impl std::fmt::Display for OverflowError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "wrap_optimal_fit cost computation overflowed") - } -} - -impl std::error::Error for OverflowError {} - -/// Wrap abstract fragments into lines with an optimal-fit algorithm. -/// -/// The `line_widths` slice gives the target line width for each line -/// (the last slice element is repeated as necessary). This can be -/// used to implement hanging indentation. -/// -/// The fragments must already have been split into the desired -/// widths, this function will not (and cannot) attempt to split them -/// further when arranging them into lines. -/// -/// # Optimal-Fit Algorithm -/// -/// The algorithm considers all possible break points and picks the -/// breaks which minimizes the gaps at the end of each line. More -/// precisely, the algorithm assigns a cost or penalty to each break -/// point, determined by `cost = gap * gap` where `gap = target_width - -/// line_width`. Shorter lines are thus penalized more heavily since -/// they leave behind a larger gap. -/// -/// We can illustrate this with the text “To be, or not to be: that is -/// the question”. We will be wrapping it in a narrow column with room -/// for only 10 characters. The [greedy -/// algorithm](super::wrap_first_fit) will produce these lines, each -/// annotated with the corresponding penalty: -/// -/// ```text -/// "To be, or" 1² = 1 -/// "not to be:" 0² = 0 -/// "that is" 3² = 9 -/// "the" 7² = 49 -/// "question" 2² = 4 -/// ``` -/// -/// We see that line four with “the” leaves a gap of 7 columns, which -/// gives it a penalty of 49. The sum of the penalties is 63. -/// -/// There are 10 words, which means that there are `2_u32.pow(9)` or -/// 512 different ways to typeset it. We can compute -/// the sum of the penalties for each possible line break and search -/// for the one with the lowest sum: -/// -/// ```text -/// "To be," 4² = 16 -/// "or not to" 1² = 1 -/// "be: that" 2² = 4 -/// "is the" 4² = 16 -/// "question" 2² = 4 -/// ``` -/// -/// The sum of the penalties is 41, which is better than what the -/// greedy algorithm produced. -/// -/// Searching through all possible combinations would normally be -/// prohibitively slow. However, it turns out that the problem can be -/// formulated as the task of finding column minima in a cost matrix. -/// This matrix has a special form (totally monotone) which lets us -/// use a [linear-time algorithm called -/// SMAWK](https://lib.rs/crates/smawk) to find the optimal break -/// points. -/// -/// This means that the time complexity remains O(_n_) where _n_ is -/// the number of words. Compared to -/// [`wrap_first_fit`](super::wrap_first_fit), this function is about -/// 4 times slower. -/// -/// The optimization of per-line costs over the entire paragraph is -/// inspired by the line breaking algorithm used in TeX, as described -/// in the 1981 article [_Breaking Paragraphs into -/// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf) -/// by Knuth and Plass. The implementation here is based on [Python -/// code by David -/// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py). -/// -/// # Errors -/// -/// In case of an overflow during the cost computation, an `Err` is -/// returned. Overflows happens when fragments or lines have infinite -/// widths (`f64::INFINITY`) or if the widths are so large that the -/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()` -/// (approximately 1e154): -/// -/// ``` -/// use textwrap::core::Fragment; -/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties}; -/// -/// #[derive(Debug, PartialEq)] -/// struct Word(f64); -/// -/// impl Fragment for Word { -/// fn width(&self) -> f64 { self.0 } -/// fn whitespace_width(&self) -> f64 { 1.0 } -/// fn penalty_width(&self) -> f64 { 0.0 } -/// } -/// -/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is -/// // larger than f64::MAX: -/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()), -/// Err(OverflowError)); -/// ``` -/// -/// When using fragment widths and line widths which fit inside an -/// `u64`, overflows cannot happen. This means that fragments derived -/// from a `&str` cannot cause overflows. -/// -/// **Note:** Only available when the `smawk` Cargo feature is -/// enabled. -pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( - fragments: &'a [T], - line_widths: &'b [f64], - penalties: &'b Penalties, -) -> Result<Vec<&'a [T]>, OverflowError> { - // The final line width is used for all remaining lines. - let default_line_width = line_widths.last().copied().unwrap_or(0.0); - let mut widths = Vec::with_capacity(fragments.len() + 1); - let mut width = 0.0; - widths.push(width); - for fragment in fragments { - width += fragment.width() + fragment.whitespace_width(); - widths.push(width); - } - - let line_numbers = LineNumbers::new(fragments.len()); - - let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| { - // Line number for fragment `i`. - let line_number = line_numbers.get(i, minima); - let line_width = line_widths - .get(line_number) - .copied() - .unwrap_or(default_line_width); - let target_width = line_width.max(1.0); - - // Compute the width of a line spanning fragments[i..j] in - // constant time. We need to adjust widths[j] by subtracting - // the whitespace of fragment[j-1] and then add the penalty. - let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width() - + fragments[j - 1].penalty_width(); - - // We compute cost of the line containing fragments[i..j]. We - // start with values[i].1, which is the optimal cost for - // breaking before fragments[i]. - // - // First, every extra line cost NLINE_PENALTY. - let mut cost = minima[i].1 + penalties.nline_penalty as f64; - - // Next, we add a penalty depending on the line length. - if line_width > target_width { - // Lines that overflow get a hefty penalty. - let overflow = line_width - target_width; - cost += overflow * penalties.overflow_penalty as f64; - } else if j < fragments.len() { - // Other lines (except for the last line) get a milder - // penalty which depend on the size of the gap. - let gap = target_width - line_width; - cost += gap * gap; - } else if i + 1 == j - && line_width < target_width / penalties.short_last_line_fraction as f64 - { - // The last line can have any size gap, but we do add a - // penalty if the line is very short (typically because it - // contains just a single word). - cost += penalties.short_last_line_penalty as f64; - } - - // Finally, we discourage hyphens. - if fragments[j - 1].penalty_width() > 0.0 { - // TODO: this should use a penalty value from the fragment - // instead. - cost += penalties.hyphen_penalty as f64; - } - - cost - }); - - for (_, cost) in &minima { - if cost.is_infinite() { - return Err(OverflowError); - } - } - - let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima)); - let mut pos = fragments.len(); - loop { - let prev = minima[pos].0; - lines.push(&fragments[prev..pos]); - pos = prev; - if pos == 0 { - break; - } - } - - lines.reverse(); - Ok(lines) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[derive(Debug, PartialEq)] - struct Word(f64); - - #[rustfmt::skip] - impl Fragment for Word { - fn width(&self) -> f64 { self.0 } - fn whitespace_width(&self) -> f64 { 1.0 } - fn penalty_width(&self) -> f64 { 0.0 } - } - - #[test] - fn wrap_fragments_with_infinite_widths() { - let words = vec![Word(f64::INFINITY)]; - assert_eq!( - wrap_optimal_fit(&words, &[0.0], &Penalties::default()), - Err(OverflowError) - ); - } - - #[test] - fn wrap_fragments_with_huge_widths() { - let words = vec![Word(1e200), Word(1e250), Word(1e300)]; - assert_eq!( - wrap_optimal_fit(&words, &[1e300], &Penalties::default()), - Err(OverflowError) - ); - } - - #[test] - fn wrap_fragments_with_large_widths() { - // The gaps will be of the sizes between 1e25 and 1e75. This - // makes the `gap * gap` cost fit comfortably in a f64. - let words = vec![Word(1e25), Word(1e50), Word(1e75)]; - assert_eq!( - wrap_optimal_fit(&words, &[1e100], &Penalties::default()), - Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]]) - ); - } -} |