use std::cell::RefCell; use crate::core::Fragment; /// Penalties for /// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit) /// and [`wrap_optimal_fit`]. /// /// This wrapping algorithm in [`wrap_optimal_fit`] considers the /// entire paragraph to find optimal line breaks. When wrapping text, /// "penalties" are assigned to line breaks based on the gaps left at /// the end of lines. The penalties are given by this struct, with /// [`Penalties::default`] assigning penalties that work well for /// monospace text. /// /// If you are wrapping proportional text, you are advised to assign /// your own penalties according to your font size. See the individual /// penalties below for details. /// /// **Note:** Only available when the `smawk` Cargo feature is /// enabled. #[derive(Clone, Copy, Debug)] pub struct Penalties { /// Per-line penalty. This is added for every line, which makes it /// expensive to output more lines than the minimum required. pub nline_penalty: usize, /// Per-character cost for lines that overflow the target line width. /// /// With a default value of 50², every single character costs as /// much as leaving a gap of 50 characters behind. This is because /// we assign as cost of `gap * gap` to a short line. When /// wrapping monospace text, we can overflow the line by 1 /// character in extreme cases: /// /// ``` /// use textwrap::core::Word; /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties}; /// /// let short = "foo "; /// let long = "x".repeat(50); /// let length = (short.len() + long.len()) as f64; /// let fragments = vec![Word::from(short), Word::from(&long)]; /// let penalties = Penalties::new(); /// /// // Perfect fit, both words are on a single line with no overflow. /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap(); /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); /// /// // The words no longer fit, yet we get a single line back. While /// // the cost of overflow (`1 * 2500`) is the same as the cost of the /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty` /// // which makes it cheaper to overflow than to use two lines. /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap(); /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); /// /// // The cost of overflow would be 2 * 2500, whereas the cost of /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 = /// // 3401`. We therefore get two lines. /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap(); /// assert_eq!(wrapped, vec![&[Word::from(short)], /// &[Word::from(&long)]]); /// ``` /// /// This only happens if the overflowing word is 50 characters /// long _and_ if the word overflows the line by exactly one /// character. If it overflows by more than one character, the /// overflow penalty will quickly outgrow the cost of the gap, as /// seen above. pub overflow_penalty: usize, /// When should the a single word on the last line be considered /// "too short"? /// /// If the last line of the text consist of a single word and if /// this word is shorter than `1 / short_last_line_fraction` of /// the line width, then the final line will be considered "short" /// and `short_last_line_penalty` is added as an extra penalty. /// /// The effect of this is to avoid a final line consisting of a /// single small word. For example, with a /// `short_last_line_penalty` of 25 (the default), a gap of up to /// 5 columns will be seen as more desirable than having a final /// short line. /// /// ## Examples /// /// ``` /// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm}; /// /// let text = "This is a demo of the short last line penalty."; /// /// // The first-fit algorithm leaves a single short word on the last line: /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)), /// vec!["This is a demo of the short last line", /// "penalty."]); /// /// #[cfg(feature = "smawk")] { /// let mut penalties = wrap_algorithms::Penalties::new(); /// /// // Since "penalty." is shorter than 25% of the line width, the /// // optimal-fit algorithm adds a penalty of 25. This is enough /// // to move "line " down: /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), /// vec!["This is a demo of the short last", /// "line penalty."]); /// /// // We can change the meaning of "short" lines. Here, only words /// // shorter than 1/10th of the line width will be considered short: /// penalties.short_last_line_fraction = 10; /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), /// vec!["This is a demo of the short last line", /// "penalty."]); /// /// // If desired, the penalty can also be disabled: /// penalties.short_last_line_fraction = 4; /// penalties.short_last_line_penalty = 0; /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))), /// vec!["This is a demo of the short last line", /// "penalty."]); /// } /// ``` pub short_last_line_fraction: usize, /// Penalty for a last line with a single short word. /// /// Set this to zero if you do not want to penalize short last lines. pub short_last_line_penalty: usize, /// Penalty for lines ending with a hyphen. pub hyphen_penalty: usize, } impl Penalties { /// Default penalties for monospace text. /// /// The penalties here work well for monospace text. This is /// because they expect the gaps at the end of lines to be roughly /// in the range `0..100`. If the gaps are larger, the /// `overflow_penalty` and `hyphen_penalty` become insignificant. pub const fn new() -> Self { Penalties { nline_penalty: 1000, overflow_penalty: 50 * 50, short_last_line_fraction: 4, short_last_line_penalty: 25, hyphen_penalty: 25, } } } impl Default for Penalties { fn default() -> Self { Self::new() } } /// Cache for line numbers. This is necessary to avoid a O(n**2) /// behavior when computing line numbers in [`wrap_optimal_fit`]. struct LineNumbers { line_numbers: RefCell>, } impl LineNumbers { fn new(size: usize) -> Self { let mut line_numbers = Vec::with_capacity(size); line_numbers.push(0); LineNumbers { line_numbers: RefCell::new(line_numbers), } } fn get(&self, i: usize, minima: &[(usize, T)]) -> usize { while self.line_numbers.borrow_mut().len() < i + 1 { let pos = self.line_numbers.borrow().len(); let line_number = 1 + self.get(minima[pos].0, minima); self.line_numbers.borrow_mut().push(line_number); } self.line_numbers.borrow()[i] } } /// Overflow error during the [`wrap_optimal_fit`] computation. #[derive(Debug, PartialEq, Eq)] pub struct OverflowError; impl std::fmt::Display for OverflowError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "wrap_optimal_fit cost computation overflowed") } } impl std::error::Error for OverflowError {} /// Wrap abstract fragments into lines with an optimal-fit algorithm. /// /// The `line_widths` slice gives the target line width for each line /// (the last slice element is repeated as necessary). This can be /// used to implement hanging indentation. /// /// The fragments must already have been split into the desired /// widths, this function will not (and cannot) attempt to split them /// further when arranging them into lines. /// /// # Optimal-Fit Algorithm /// /// The algorithm considers all possible break points and picks the /// breaks which minimizes the gaps at the end of each line. More /// precisely, the algorithm assigns a cost or penalty to each break /// point, determined by `cost = gap * gap` where `gap = target_width - /// line_width`. Shorter lines are thus penalized more heavily since /// they leave behind a larger gap. /// /// We can illustrate this with the text “To be, or not to be: that is /// the question”. We will be wrapping it in a narrow column with room /// for only 10 characters. The [greedy /// algorithm](super::wrap_first_fit) will produce these lines, each /// annotated with the corresponding penalty: /// /// ```text /// "To be, or" 1² = 1 /// "not to be:" 0² = 0 /// "that is" 3² = 9 /// "the" 7² = 49 /// "question" 2² = 4 /// ``` /// /// We see that line four with “the” leaves a gap of 7 columns, which /// gives it a penalty of 49. The sum of the penalties is 63. /// /// There are 10 words, which means that there are `2_u32.pow(9)` or /// 512 different ways to typeset it. We can compute /// the sum of the penalties for each possible line break and search /// for the one with the lowest sum: /// /// ```text /// "To be," 4² = 16 /// "or not to" 1² = 1 /// "be: that" 2² = 4 /// "is the" 4² = 16 /// "question" 2² = 4 /// ``` /// /// The sum of the penalties is 41, which is better than what the /// greedy algorithm produced. /// /// Searching through all possible combinations would normally be /// prohibitively slow. However, it turns out that the problem can be /// formulated as the task of finding column minima in a cost matrix. /// This matrix has a special form (totally monotone) which lets us /// use a [linear-time algorithm called /// SMAWK](https://lib.rs/crates/smawk) to find the optimal break /// points. /// /// This means that the time complexity remains O(_n_) where _n_ is /// the number of words. Compared to /// [`wrap_first_fit`](super::wrap_first_fit), this function is about /// 4 times slower. /// /// The optimization of per-line costs over the entire paragraph is /// inspired by the line breaking algorithm used in TeX, as described /// in the 1981 article [_Breaking Paragraphs into /// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf) /// by Knuth and Plass. The implementation here is based on [Python /// code by David /// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py). /// /// # Errors /// /// In case of an overflow during the cost computation, an `Err` is /// returned. Overflows happens when fragments or lines have infinite /// widths (`f64::INFINITY`) or if the widths are so large that the /// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()` /// (approximately 1e154): /// /// ``` /// use textwrap::core::Fragment; /// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties}; /// /// #[derive(Debug, PartialEq)] /// struct Word(f64); /// /// impl Fragment for Word { /// fn width(&self) -> f64 { self.0 } /// fn whitespace_width(&self) -> f64 { 1.0 } /// fn penalty_width(&self) -> f64 { 0.0 } /// } /// /// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is /// // larger than f64::MAX: /// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()), /// Err(OverflowError)); /// ``` /// /// When using fragment widths and line widths which fit inside an /// `u64`, overflows cannot happen. This means that fragments derived /// from a `&str` cannot cause overflows. /// /// **Note:** Only available when the `smawk` Cargo feature is /// enabled. pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( fragments: &'a [T], line_widths: &'b [f64], penalties: &'b Penalties, ) -> Result, OverflowError> { // The final line width is used for all remaining lines. let default_line_width = line_widths.last().copied().unwrap_or(0.0); let mut widths = Vec::with_capacity(fragments.len() + 1); let mut width = 0.0; widths.push(width); for fragment in fragments { width += fragment.width() + fragment.whitespace_width(); widths.push(width); } let line_numbers = LineNumbers::new(fragments.len()); let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| { // Line number for fragment `i`. let line_number = line_numbers.get(i, minima); let line_width = line_widths .get(line_number) .copied() .unwrap_or(default_line_width); let target_width = line_width.max(1.0); // Compute the width of a line spanning fragments[i..j] in // constant time. We need to adjust widths[j] by subtracting // the whitespace of fragment[j-1] and then add the penalty. let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width() + fragments[j - 1].penalty_width(); // We compute cost of the line containing fragments[i..j]. We // start with values[i].1, which is the optimal cost for // breaking before fragments[i]. // // First, every extra line cost NLINE_PENALTY. let mut cost = minima[i].1 + penalties.nline_penalty as f64; // Next, we add a penalty depending on the line length. if line_width > target_width { // Lines that overflow get a hefty penalty. let overflow = line_width - target_width; cost += overflow * penalties.overflow_penalty as f64; } else if j < fragments.len() { // Other lines (except for the last line) get a milder // penalty which depend on the size of the gap. let gap = target_width - line_width; cost += gap * gap; } else if i + 1 == j && line_width < target_width / penalties.short_last_line_fraction as f64 { // The last line can have any size gap, but we do add a // penalty if the line is very short (typically because it // contains just a single word). cost += penalties.short_last_line_penalty as f64; } // Finally, we discourage hyphens. if fragments[j - 1].penalty_width() > 0.0 { // TODO: this should use a penalty value from the fragment // instead. cost += penalties.hyphen_penalty as f64; } cost }); for (_, cost) in &minima { if cost.is_infinite() { return Err(OverflowError); } } let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima)); let mut pos = fragments.len(); loop { let prev = minima[pos].0; lines.push(&fragments[prev..pos]); pos = prev; if pos == 0 { break; } } lines.reverse(); Ok(lines) } #[cfg(test)] mod tests { use super::*; #[derive(Debug, PartialEq)] struct Word(f64); #[rustfmt::skip] impl Fragment for Word { fn width(&self) -> f64 { self.0 } fn whitespace_width(&self) -> f64 { 1.0 } fn penalty_width(&self) -> f64 { 0.0 } } #[test] fn wrap_fragments_with_infinite_widths() { let words = vec![Word(f64::INFINITY)]; assert_eq!( wrap_optimal_fit(&words, &[0.0], &Penalties::default()), Err(OverflowError) ); } #[test] fn wrap_fragments_with_huge_widths() { let words = vec![Word(1e200), Word(1e250), Word(1e300)]; assert_eq!( wrap_optimal_fit(&words, &[1e300], &Penalties::default()), Err(OverflowError) ); } #[test] fn wrap_fragments_with_large_widths() { // The gaps will be of the sizes between 1e25 and 1e75. This // makes the `gap * gap` cost fit comfortably in a f64. let words = vec![Word(1e25), Word(1e50), Word(1e75)]; assert_eq!( wrap_optimal_fit(&words, &[1e100], &Penalties::default()), Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]]) ); } }