aboutsummaryrefslogtreecommitdiff
path: root/vendor/textwrap/src/wrap_algorithms/optimal_fit.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/textwrap/src/wrap_algorithms/optimal_fit.rs')
-rw-r--r--vendor/textwrap/src/wrap_algorithms/optimal_fit.rs433
1 files changed, 0 insertions, 433 deletions
diff --git a/vendor/textwrap/src/wrap_algorithms/optimal_fit.rs b/vendor/textwrap/src/wrap_algorithms/optimal_fit.rs
deleted file mode 100644
index 0625e28..0000000
--- a/vendor/textwrap/src/wrap_algorithms/optimal_fit.rs
+++ /dev/null
@@ -1,433 +0,0 @@
-use std::cell::RefCell;
-
-use crate::core::Fragment;
-
-/// Penalties for
-/// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit)
-/// and [`wrap_optimal_fit`].
-///
-/// This wrapping algorithm in [`wrap_optimal_fit`] considers the
-/// entire paragraph to find optimal line breaks. When wrapping text,
-/// "penalties" are assigned to line breaks based on the gaps left at
-/// the end of lines. The penalties are given by this struct, with
-/// [`Penalties::default`] assigning penalties that work well for
-/// monospace text.
-///
-/// If you are wrapping proportional text, you are advised to assign
-/// your own penalties according to your font size. See the individual
-/// penalties below for details.
-///
-/// **Note:** Only available when the `smawk` Cargo feature is
-/// enabled.
-#[derive(Clone, Copy, Debug)]
-pub struct Penalties {
- /// Per-line penalty. This is added for every line, which makes it
- /// expensive to output more lines than the minimum required.
- pub nline_penalty: usize,
-
- /// Per-character cost for lines that overflow the target line width.
- ///
- /// With a default value of 50², every single character costs as
- /// much as leaving a gap of 50 characters behind. This is because
- /// we assign as cost of `gap * gap` to a short line. When
- /// wrapping monospace text, we can overflow the line by 1
- /// character in extreme cases:
- ///
- /// ```
- /// use textwrap::core::Word;
- /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
- ///
- /// let short = "foo ";
- /// let long = "x".repeat(50);
- /// let length = (short.len() + long.len()) as f64;
- /// let fragments = vec![Word::from(short), Word::from(&long)];
- /// let penalties = Penalties::new();
- ///
- /// // Perfect fit, both words are on a single line with no overflow.
- /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap();
- /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
- ///
- /// // The words no longer fit, yet we get a single line back. While
- /// // the cost of overflow (`1 * 2500`) is the same as the cost of the
- /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty`
- /// // which makes it cheaper to overflow than to use two lines.
- /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap();
- /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
- ///
- /// // The cost of overflow would be 2 * 2500, whereas the cost of
- /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 =
- /// // 3401`. We therefore get two lines.
- /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap();
- /// assert_eq!(wrapped, vec![&[Word::from(short)],
- /// &[Word::from(&long)]]);
- /// ```
- ///
- /// This only happens if the overflowing word is 50 characters
- /// long _and_ if the word overflows the line by exactly one
- /// character. If it overflows by more than one character, the
- /// overflow penalty will quickly outgrow the cost of the gap, as
- /// seen above.
- pub overflow_penalty: usize,
-
- /// When should the a single word on the last line be considered
- /// "too short"?
- ///
- /// If the last line of the text consist of a single word and if
- /// this word is shorter than `1 / short_last_line_fraction` of
- /// the line width, then the final line will be considered "short"
- /// and `short_last_line_penalty` is added as an extra penalty.
- ///
- /// The effect of this is to avoid a final line consisting of a
- /// single small word. For example, with a
- /// `short_last_line_penalty` of 25 (the default), a gap of up to
- /// 5 columns will be seen as more desirable than having a final
- /// short line.
- ///
- /// ## Examples
- ///
- /// ```
- /// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm};
- ///
- /// let text = "This is a demo of the short last line penalty.";
- ///
- /// // The first-fit algorithm leaves a single short word on the last line:
- /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)),
- /// vec!["This is a demo of the short last line",
- /// "penalty."]);
- ///
- /// #[cfg(feature = "smawk")] {
- /// let mut penalties = wrap_algorithms::Penalties::new();
- ///
- /// // Since "penalty." is shorter than 25% of the line width, the
- /// // optimal-fit algorithm adds a penalty of 25. This is enough
- /// // to move "line " down:
- /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
- /// vec!["This is a demo of the short last",
- /// "line penalty."]);
- ///
- /// // We can change the meaning of "short" lines. Here, only words
- /// // shorter than 1/10th of the line width will be considered short:
- /// penalties.short_last_line_fraction = 10;
- /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
- /// vec!["This is a demo of the short last line",
- /// "penalty."]);
- ///
- /// // If desired, the penalty can also be disabled:
- /// penalties.short_last_line_fraction = 4;
- /// penalties.short_last_line_penalty = 0;
- /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
- /// vec!["This is a demo of the short last line",
- /// "penalty."]);
- /// }
- /// ```
- pub short_last_line_fraction: usize,
-
- /// Penalty for a last line with a single short word.
- ///
- /// Set this to zero if you do not want to penalize short last lines.
- pub short_last_line_penalty: usize,
-
- /// Penalty for lines ending with a hyphen.
- pub hyphen_penalty: usize,
-}
-
-impl Penalties {
- /// Default penalties for monospace text.
- ///
- /// The penalties here work well for monospace text. This is
- /// because they expect the gaps at the end of lines to be roughly
- /// in the range `0..100`. If the gaps are larger, the
- /// `overflow_penalty` and `hyphen_penalty` become insignificant.
- pub const fn new() -> Self {
- Penalties {
- nline_penalty: 1000,
- overflow_penalty: 50 * 50,
- short_last_line_fraction: 4,
- short_last_line_penalty: 25,
- hyphen_penalty: 25,
- }
- }
-}
-
-impl Default for Penalties {
- fn default() -> Self {
- Self::new()
- }
-}
-
-/// Cache for line numbers. This is necessary to avoid a O(n**2)
-/// behavior when computing line numbers in [`wrap_optimal_fit`].
-struct LineNumbers {
- line_numbers: RefCell<Vec<usize>>,
-}
-
-impl LineNumbers {
- fn new(size: usize) -> Self {
- let mut line_numbers = Vec::with_capacity(size);
- line_numbers.push(0);
- LineNumbers {
- line_numbers: RefCell::new(line_numbers),
- }
- }
-
- fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
- while self.line_numbers.borrow_mut().len() < i + 1 {
- let pos = self.line_numbers.borrow().len();
- let line_number = 1 + self.get(minima[pos].0, minima);
- self.line_numbers.borrow_mut().push(line_number);
- }
-
- self.line_numbers.borrow()[i]
- }
-}
-
-/// Overflow error during the [`wrap_optimal_fit`] computation.
-#[derive(Debug, PartialEq, Eq)]
-pub struct OverflowError;
-
-impl std::fmt::Display for OverflowError {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- write!(f, "wrap_optimal_fit cost computation overflowed")
- }
-}
-
-impl std::error::Error for OverflowError {}
-
-/// Wrap abstract fragments into lines with an optimal-fit algorithm.
-///
-/// The `line_widths` slice gives the target line width for each line
-/// (the last slice element is repeated as necessary). This can be
-/// used to implement hanging indentation.
-///
-/// The fragments must already have been split into the desired
-/// widths, this function will not (and cannot) attempt to split them
-/// further when arranging them into lines.
-///
-/// # Optimal-Fit Algorithm
-///
-/// The algorithm considers all possible break points and picks the
-/// breaks which minimizes the gaps at the end of each line. More
-/// precisely, the algorithm assigns a cost or penalty to each break
-/// point, determined by `cost = gap * gap` where `gap = target_width -
-/// line_width`. Shorter lines are thus penalized more heavily since
-/// they leave behind a larger gap.
-///
-/// We can illustrate this with the text “To be, or not to be: that is
-/// the question”. We will be wrapping it in a narrow column with room
-/// for only 10 characters. The [greedy
-/// algorithm](super::wrap_first_fit) will produce these lines, each
-/// annotated with the corresponding penalty:
-///
-/// ```text
-/// "To be, or" 1² = 1
-/// "not to be:" 0² = 0
-/// "that is" 3² = 9
-/// "the" 7² = 49
-/// "question" 2² = 4
-/// ```
-///
-/// We see that line four with “the” leaves a gap of 7 columns, which
-/// gives it a penalty of 49. The sum of the penalties is 63.
-///
-/// There are 10 words, which means that there are `2_u32.pow(9)` or
-/// 512 different ways to typeset it. We can compute
-/// the sum of the penalties for each possible line break and search
-/// for the one with the lowest sum:
-///
-/// ```text
-/// "To be," 4² = 16
-/// "or not to" 1² = 1
-/// "be: that" 2² = 4
-/// "is the" 4² = 16
-/// "question" 2² = 4
-/// ```
-///
-/// The sum of the penalties is 41, which is better than what the
-/// greedy algorithm produced.
-///
-/// Searching through all possible combinations would normally be
-/// prohibitively slow. However, it turns out that the problem can be
-/// formulated as the task of finding column minima in a cost matrix.
-/// This matrix has a special form (totally monotone) which lets us
-/// use a [linear-time algorithm called
-/// SMAWK](https://lib.rs/crates/smawk) to find the optimal break
-/// points.
-///
-/// This means that the time complexity remains O(_n_) where _n_ is
-/// the number of words. Compared to
-/// [`wrap_first_fit`](super::wrap_first_fit), this function is about
-/// 4 times slower.
-///
-/// The optimization of per-line costs over the entire paragraph is
-/// inspired by the line breaking algorithm used in TeX, as described
-/// in the 1981 article [_Breaking Paragraphs into
-/// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf)
-/// by Knuth and Plass. The implementation here is based on [Python
-/// code by David
-/// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py).
-///
-/// # Errors
-///
-/// In case of an overflow during the cost computation, an `Err` is
-/// returned. Overflows happens when fragments or lines have infinite
-/// widths (`f64::INFINITY`) or if the widths are so large that the
-/// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()`
-/// (approximately 1e154):
-///
-/// ```
-/// use textwrap::core::Fragment;
-/// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties};
-///
-/// #[derive(Debug, PartialEq)]
-/// struct Word(f64);
-///
-/// impl Fragment for Word {
-/// fn width(&self) -> f64 { self.0 }
-/// fn whitespace_width(&self) -> f64 { 1.0 }
-/// fn penalty_width(&self) -> f64 { 0.0 }
-/// }
-///
-/// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is
-/// // larger than f64::MAX:
-/// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()),
-/// Err(OverflowError));
-/// ```
-///
-/// When using fragment widths and line widths which fit inside an
-/// `u64`, overflows cannot happen. This means that fragments derived
-/// from a `&str` cannot cause overflows.
-///
-/// **Note:** Only available when the `smawk` Cargo feature is
-/// enabled.
-pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
- fragments: &'a [T],
- line_widths: &'b [f64],
- penalties: &'b Penalties,
-) -> Result<Vec<&'a [T]>, OverflowError> {
- // The final line width is used for all remaining lines.
- let default_line_width = line_widths.last().copied().unwrap_or(0.0);
- let mut widths = Vec::with_capacity(fragments.len() + 1);
- let mut width = 0.0;
- widths.push(width);
- for fragment in fragments {
- width += fragment.width() + fragment.whitespace_width();
- widths.push(width);
- }
-
- let line_numbers = LineNumbers::new(fragments.len());
-
- let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| {
- // Line number for fragment `i`.
- let line_number = line_numbers.get(i, minima);
- let line_width = line_widths
- .get(line_number)
- .copied()
- .unwrap_or(default_line_width);
- let target_width = line_width.max(1.0);
-
- // Compute the width of a line spanning fragments[i..j] in
- // constant time. We need to adjust widths[j] by subtracting
- // the whitespace of fragment[j-1] and then add the penalty.
- let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
- + fragments[j - 1].penalty_width();
-
- // We compute cost of the line containing fragments[i..j]. We
- // start with values[i].1, which is the optimal cost for
- // breaking before fragments[i].
- //
- // First, every extra line cost NLINE_PENALTY.
- let mut cost = minima[i].1 + penalties.nline_penalty as f64;
-
- // Next, we add a penalty depending on the line length.
- if line_width > target_width {
- // Lines that overflow get a hefty penalty.
- let overflow = line_width - target_width;
- cost += overflow * penalties.overflow_penalty as f64;
- } else if j < fragments.len() {
- // Other lines (except for the last line) get a milder
- // penalty which depend on the size of the gap.
- let gap = target_width - line_width;
- cost += gap * gap;
- } else if i + 1 == j
- && line_width < target_width / penalties.short_last_line_fraction as f64
- {
- // The last line can have any size gap, but we do add a
- // penalty if the line is very short (typically because it
- // contains just a single word).
- cost += penalties.short_last_line_penalty as f64;
- }
-
- // Finally, we discourage hyphens.
- if fragments[j - 1].penalty_width() > 0.0 {
- // TODO: this should use a penalty value from the fragment
- // instead.
- cost += penalties.hyphen_penalty as f64;
- }
-
- cost
- });
-
- for (_, cost) in &minima {
- if cost.is_infinite() {
- return Err(OverflowError);
- }
- }
-
- let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
- let mut pos = fragments.len();
- loop {
- let prev = minima[pos].0;
- lines.push(&fragments[prev..pos]);
- pos = prev;
- if pos == 0 {
- break;
- }
- }
-
- lines.reverse();
- Ok(lines)
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[derive(Debug, PartialEq)]
- struct Word(f64);
-
- #[rustfmt::skip]
- impl Fragment for Word {
- fn width(&self) -> f64 { self.0 }
- fn whitespace_width(&self) -> f64 { 1.0 }
- fn penalty_width(&self) -> f64 { 0.0 }
- }
-
- #[test]
- fn wrap_fragments_with_infinite_widths() {
- let words = vec![Word(f64::INFINITY)];
- assert_eq!(
- wrap_optimal_fit(&words, &[0.0], &Penalties::default()),
- Err(OverflowError)
- );
- }
-
- #[test]
- fn wrap_fragments_with_huge_widths() {
- let words = vec![Word(1e200), Word(1e250), Word(1e300)];
- assert_eq!(
- wrap_optimal_fit(&words, &[1e300], &Penalties::default()),
- Err(OverflowError)
- );
- }
-
- #[test]
- fn wrap_fragments_with_large_widths() {
- // The gaps will be of the sizes between 1e25 and 1e75. This
- // makes the `gap * gap` cost fit comfortably in a f64.
- let words = vec![Word(1e25), Word(1e50), Word(1e75)];
- assert_eq!(
- wrap_optimal_fit(&words, &[1e100], &Penalties::default()),
- Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]])
- );
- }
-}