Optimize optimized paragraph layout (#4483)

2025-07-13 15:42:53 +08:00 · 2024-07-01 15:04:58 +02:00 · 2024-07-01 15:04:58 +02:00 · 6d835ecb92
commit 6d835ecb92
parent 45366c0112
3 changed files with 591 additions and 153 deletions
--- a/crates/typst/src/layout/inline/line.rs
+++ b/crates/typst/src/layout/inline/line.rs
@ -3,7 +3,7 @@ use unicode_bidi::BidiInfo;
 use super::*;
 use crate::engine::Engine;
 use crate::layout::{Abs, Em, Fr, Frame, FrameItem, Point};
-use crate::text::TextElem;
+use crate::text::{Lang, TextElem};
 use crate::utils::Numeric;
 /// A layouted line, consisting of a sequence of layouted paragraph items that
@ -99,6 +99,15 @@ impl<'a> Line<'a> {
        self.items().filter_map(Item::text).map(|s| s.shrinkability()).sum()
    }
    /// Whether the line has items with negative width.
    pub fn has_negative_width_items(&self) -> bool {
        self.items().any(|item| match item {
            Item::Absolute(amount, _) => *amount < Abs::zero(),
            Item::Frame(frame, _) => frame.width() < Abs::zero(),
            _ => false,
        })
    }
    /// The sum of fractions in the line.
    pub fn fr(&self) -> Fr {
        self.items()
@ -129,7 +138,7 @@ pub fn line<'a>(
    p: &'a Preparation,
    mut range: Range,
    breakpoint: Breakpoint,
-    prepend_hyphen: bool,
+    pred: Option<&Line>,
 ) -> Line<'a> {
    let end = range.end;
    let mut justify =
@ -149,6 +158,8 @@ pub fn line<'a>(
        };
    }
    let prepend_hyphen = pred.map_or(false, should_insert_hyphen);
    // Slice out the relevant items.
    let (mut expanded, mut inner) = p.slice(range.clone());
    let mut width = Abs::zero();
@ -528,6 +539,54 @@ fn reorder<'a>(line: &'a Line<'a>) -> (Vec<&Item<'a>>, bool) {
    (reordered, starts_rtl)
 }
 /// Whether a hyphen should be inserted at the start of the next line.
 fn should_insert_hyphen(pred_line: &Line) -> bool {
    // If the predecessor line does not end with a Dash::HardHyphen, we shall
    // not place a hyphen at the start of the next line.
    if pred_line.dash != Some(Dash::HardHyphen) {
        return false;
    }
    // If there's a trimmed out space, we needn't repeat the hyphen. That's the
    // case of a text like "...kebab é a -melhor- comida que existe", where the
    // hyphens are a kind of emphasis marker.
    if pred_line.trimmed.end != pred_line.end {
        return false;
    }
    // The hyphen should repeat only in the languages that require that feature.
    // For more information see the discussion at https://github.com/typst/typst/issues/3235
    let Some(Item::Text(shape)) = pred_line.last.as_ref() else { return false };
    match shape.lang {
        // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
        // - Czech: see https://prirucka.ujc.cas.cz/?id=164
        // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
        // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
        // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
        // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
        Lang::LOWER_SORBIAN
        | Lang::CZECH
        | Lang::CROATIAN
        | Lang::POLISH
        | Lang::PORTUGUESE
        | Lang::SLOVAK => true,
        // In Spanish the hyphen is required only if the word next to hyphen is
        // not capitalized. Otherwise, the hyphen must not be repeated.
        //
        // See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
        // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
        Lang::SPANISH => pred_line.bidi.text[pred_line.end..]
            .chars()
            .next()
            .map(|c| !c.is_uppercase())
            .unwrap_or(false),
        _ => false,
    }
 }
 /// How much a character should hang into the end margin.
 ///
 /// For more discussion, see:
--- a/crates/typst/src/layout/inline/linebreak.rs
+++ b/crates/typst/src/layout/inline/linebreak.rs
@ -1,3 +1,5 @@
 use std::ops::{Add, Sub};
 use icu_properties::maps::CodePointMapData;
 use icu_properties::LineBreak;
 use icu_provider::AsDeserializingBufferProvider;
@ -8,11 +10,23 @@ use once_cell::sync::Lazy;
 use super::*;
 use crate::engine::Engine;
-use crate::layout::Abs;
+use crate::layout::{Abs, Em};
 use crate::model::Linebreaks;
 use crate::syntax::link_prefix;
 use crate::text::{Lang, TextElem};
 /// The cost of a line or paragraph layout.
 type Cost = f64;
 // Cost parameters.
 const DEFAULT_HYPH_COST: Cost = 0.5;
 const DEFAULT_RUNT_COST: Cost = 0.5;
 const CONSECUTIVE_DASH_COST: Cost = 0.3;
 const MAX_COST: Cost = 1_000_000.0;
 const MIN_RATIO: f64 = -1.0;
 const MIN_APPROX_RATIO: f64 = -0.5;
 const BOUND_EPS: f64 = 1e-3;
 /// The general line break segmenter.
 static SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
    let provider =
@ -84,10 +98,8 @@ fn linebreak_simple<'a>(
    let mut last = None;
    breakpoints(p, |end, breakpoint| {
        let prepend_hyphen = lines.last().map(should_repeat_hyphen).unwrap_or(false);
        // Compute the line and its size.
-        let mut attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);
+        let mut attempt = line(engine, p, start..end, breakpoint, lines.last());
        // If the line doesn't fit anymore, we push the last fitting attempt
        // into the stack and rebuild the line from the attempt's end. The
@ -96,7 +108,7 @@ fn linebreak_simple<'a>(
            if let Some((last_attempt, last_end)) = last.take() {
                lines.push(last_attempt);
                start = last_end;
-                attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);
+                attempt = line(engine, p, start..end, breakpoint, lines.last());
            }
        }
@ -142,144 +154,142 @@ fn linebreak_optimized<'a>(
    p: &'a Preparation<'a>,
    width: Abs,
 ) -> Vec<Line<'a>> {
-    /// The cost of a line or paragraph layout.
+    let metrics = CostMetrics::compute(p);
    type Cost = f64;
-    /// An entry in the dynamic programming table.
+    // Determines the exact costs of a likely good layout through Knuth-Plass
    // with approximate metrics. We can use this cost as an upper bound to prune
    // the search space in our proper optimization pass below.
    let upper_bound = linebreak_optimized_approximate(engine, p, width, &metrics);
    // Using the upper bound, perform exact optimized linebreaking.
    linebreak_optimized_bounded(engine, p, width, &metrics, upper_bound)
 }
 /// Performs line breaking in optimized Knuth-Plass style, but with an upper
 /// bound on the cost. This allows us to skip many parts of the search space.
 #[typst_macros::time]
 fn linebreak_optimized_bounded<'a>(
    engine: &Engine,
    p: &'a Preparation<'a>,
    width: Abs,
    metrics: &CostMetrics,
    upper_bound: Cost,
 ) -> Vec<Line<'a>> {
    /// An entry in the dynamic programming table for paragraph optimization.
    struct Entry<'a> {
        pred: usize,
        total: Cost,
        line: Line<'a>,
    }
    // Cost parameters.
    const DEFAULT_HYPH_COST: Cost = 0.5;
    const DEFAULT_RUNT_COST: Cost = 0.5;
    const CONSECUTIVE_DASH_COST: Cost = 0.3;
    const MAX_COST: Cost = 1_000_000.0;
    const MIN_RATIO: f64 = -1.0;
    let hyph_cost = DEFAULT_HYPH_COST * p.costs.hyphenation().get();
    let runt_cost = DEFAULT_RUNT_COST * p.costs.runt().get();
    // Dynamic programming table.
    let mut active = 0;
    let mut table = vec![Entry {
        pred: 0,
        total: 0.0,
-        line: line(engine, p, 0..0, Breakpoint::Mandatory, false),
+        line: line(engine, p, 0..0, Breakpoint::Mandatory, None),
    }];
-    let em = p.size;
+    let mut active = 0;
-    let mut lines = Vec::with_capacity(16);
+    let mut prev_end = 0;
    breakpoints(p, |end, breakpoint| {
-        let k = table.len();
+        // Find the optimal predecessor.
        let is_end = end == p.bidi.text.len();
        let mut best: Option<Entry> = None;
-        // Find the optimal predecessor.
+        // A lower bound for the cost of all following line attempts.
-        for (i, pred) in table.iter().enumerate().skip(active) {
+        let mut line_lower_bound = None;
-            // Layout the line.
+
        for (pred_index, pred) in table.iter().enumerate().skip(active) {
            let start = pred.line.end;
-            let prepend_hyphen = should_repeat_hyphen(&pred.line);
+            let unbreakable = prev_end == start;
-            let attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);
+            // If the minimum cost we've established for the line is already
-
+            // too much, skip this attempt.
-            // Determine how much the line's spaces would need to be stretched
+            if line_lower_bound
-            // to make it the desired width.
+                .is_some_and(|lower| pred.total + lower > upper_bound + BOUND_EPS)
-            let delta = width - attempt.width;
+            {
-            // Determine how much stretch are permitted.
+                continue;
            let adjust = if delta >= Abs::zero() {
                attempt.stretchability()
            } else {
                attempt.shrinkability()
            };
            // Ideally, the ratio should between -1.0 and 1.0, but sometimes a
            // value above 1.0 is possible, in which case the line is underfull.
            let mut ratio = delta / adjust;
            if ratio.is_nan() {
                // The line is not stretchable, but it just fits. This often
                // happens with monospace fonts and CJK texts.
                ratio = 0.0;
            }
            if ratio > 1.0 {
                // We should stretch the line above its stretchability. Now
                // calculate the extra amount. Also, don't divide by zero.
                let extra_stretch =
                    (delta - adjust) / attempt.justifiables().max(1) as f64;
                // Normalize the amount by half Em size.
                ratio = 1.0 + extra_stretch / (em / 2.0);
            }
-            // Determine the cost of the line.
+            // Build the line.
-            let min_ratio = if p.justify { MIN_RATIO } else { 0.0 };
+            let attempt = line(engine, p, start..end, breakpoint, Some(&pred.line));
            let mut cost = if ratio < min_ratio {
                // The line is overfull. This is the case if
                // - justification is on, but we'd need to shrink too much
                // - justification is off and the line just doesn't fit
                //
                // If this is the earliest breakpoint in the active set
                // (active == i), remove it from the active set. If there is an
                // earlier one (active < i), then the logically shorter line was
                // in fact longer (can happen with negative spacing) and we
                // can't trim the active set just yet.
                if active == i {
                    active += 1;
                }
                MAX_COST
            } else if breakpoint == Breakpoint::Mandatory || is_end {
                // This is a mandatory break and the line is not overfull, so
                // all breakpoints before this one become inactive since no line
                // can span above the mandatory break.
                active = k;
                // - If ratio > 0, we need to stretch the line only when justify
                //   is needed.
                // - If ratio < 0, we always need to shrink the line.
                if (ratio > 0.0 && attempt.justify) || ratio < 0.0 {
                    ratio.powi(3).abs()
                } else {
                    0.0
                }
            } else {
                // Normal line with cost of |ratio^3|.
                ratio.powi(3).abs()
            };
-            // Penalize runts.
+            // Determine the cost of the line and its stretch ratio.
-            if k == i + 1 && is_end {
+            let (line_ratio, line_cost) = ratio_and_cost(
-                cost += runt_cost;
+                p,
-            }
+                metrics,
                width,
                &pred.line,
                &attempt,
                breakpoint,
                unbreakable,
            );
-            // Penalize hyphens.
+            // If the line is overfull, we adjust the set of active candidate
-            if breakpoint == Breakpoint::Hyphen {
+            // line starts. This is the case if
-                cost += hyph_cost;
+            // - justification is on, but we'd need to shrink too much
-            }
+            // - justification is off and the line just doesn't fit
-
+            //
-            // In Knuth paper, cost = (1 + 100|r|^3 + p)^2 + a,
+            // If this is the earliest breakpoint in the active set
-            // where r is the ratio, p=50 is the penalty, and a=3000 is
+            // (active == i), remove it from the active set. If there is an
-            // consecutive the penalty. We divide the whole formula by 10,
+            // earlier one (active < i), then the logically shorter line was
-            // resulting (0.01 + |r|^3 + p)^2 + a, where p=0.5 and a=0.3
+            // in fact longer (can happen with negative spacing) and we
-            cost = (0.01 + cost).powi(2);
+            // can't trim the active set just yet.
-
+            if line_ratio < metrics.min_ratio && active == pred_index {
-            // Penalize two consecutive dashes (not necessarily hyphens) extra.
+                active += 1;
            if attempt.dash.is_some() && pred.line.dash.is_some() {
                cost += CONSECUTIVE_DASH_COST;
            }
            // The total cost of this line and its chain of predecessors.
-            let total = pred.total + cost;
+            let total = pred.total + line_cost;
            // If the line is already underfull (`line_ratio > 0`), any shorter
            // slice of the line will be even more underfull. So it'll only get
            // worse from here and further attempts would also have a cost
            // exceeding `bound`. There is one exception: When the line has
            // negative spacing, we can't know for sure, so we don't assign the
            // lower bound in that case.
            if line_ratio > 0.0
                && line_lower_bound.is_none()
                && !attempt.has_negative_width_items()
            {
                line_lower_bound = Some(line_cost);
            }
            // If the cost already exceeds the upper bound, we don't need to
            // integrate this result into the table.
            if total > upper_bound + BOUND_EPS {
                continue;
            }
            // If this attempt is better than what we had before, take it!
            if best.as_ref().map_or(true, |best| best.total >= total) {
-                best = Some(Entry { pred: i, total, line: attempt });
+                best = Some(Entry { pred: pred_index, total, line: attempt });
            }
        }
-        table.push(best.unwrap());
+        // If this is a mandatory break, all breakpoints before this one become
        // inactive since no line can span over the mandatory break.
        if breakpoint == Breakpoint::Mandatory {
            active = table.len();
        }
        table.extend(best);
        prev_end = end;
    });
    // Retrace the best path.
    let mut lines = Vec::with_capacity(16);
    let mut idx = table.len() - 1;
    // This should only happen if our bound was faulty. Which shouldn't happen!
    if table[idx].line.end != p.bidi.text.len() {
        #[cfg(debug_assertions)]
        panic!("bounded paragraph layout is incomplete");
        #[cfg(not(debug_assertions))]
        return linebreak_optimized_bounded(engine, p, width, metrics, Cost::INFINITY);
    }
    while idx != 0 {
        table.truncate(idx + 1);
        let entry = table.pop().unwrap();
@ -291,6 +301,282 @@ fn linebreak_optimized<'a>(
    lines
 }
 /// Runs the normal Knuth-Plass algorithm, but instead of building proper lines
 /// (which is costly) to determine costs, it determines approximate costs using
 /// cummulative arrays.
 ///
 /// This results in a likely good paragraph layouts, for which we then compute
 /// the exact cost. This cost is an upper bound for proper optimized
 /// linebreaking. We can use it to heavily prune the search space.
 #[typst_macros::time]
 fn linebreak_optimized_approximate(
    engine: &Engine,
    p: &Preparation,
    width: Abs,
    metrics: &CostMetrics,
 ) -> Cost {
    // Determine the cummulative estimation metrics.
    let estimates = Estimates::compute(p);
    /// An entry in the dynamic programming table for paragraph optimization.
    struct Entry {
        pred: usize,
        total: Cost,
        end: usize,
        unbreakable: bool,
        breakpoint: Breakpoint,
    }
    // Dynamic programming table.
    let mut table = vec![Entry {
        pred: 0,
        total: 0.0,
        end: 0,
        unbreakable: false,
        breakpoint: Breakpoint::Mandatory,
    }];
    let mut active = 0;
    let mut prev_end = 0;
    breakpoints(p, |end, breakpoint| {
        let at_end = end == p.bidi.text.len();
        // Find the optimal predecessor.
        let mut best: Option<Entry> = None;
        for (pred_index, pred) in table.iter().enumerate().skip(active) {
            let start = pred.end;
            let unbreakable = prev_end == start;
            // Whether the line is justified. This is not 100% accurate w.r.t
            // to line()'s behaviour, but good enough.
            let justify = p.justify && !at_end && breakpoint != Breakpoint::Mandatory;
            // We don't really know whether the line naturally ends with a dash
            // here, so we can miss that case, but it's ok, since all of this
            // just an estimate.
            let consecutive_dash =
                pred.breakpoint == Breakpoint::Hyphen && breakpoint == Breakpoint::Hyphen;
            // Estimate how much the line's spaces would need to be stretched to
            // make it the desired width. We trim at the end to not take into
            // account trailing spaces. This is, again, only an approximation of
            // the real behaviour of `line`.
            let trimmed_end = start + p.bidi.text[start..end].trim_end().len();
            let line_ratio = raw_ratio(
                p,
                width,
                estimates.widths.estimate(start..trimmed_end)
                    + if breakpoint == Breakpoint::Hyphen {
                        metrics.approx_hyphen_width
                    } else {
                        Abs::zero()
                    },
                estimates.stretchability.estimate(start..trimmed_end),
                estimates.shrinkability.estimate(start..trimmed_end),
                estimates.justifiables.estimate(start..trimmed_end),
            );
            // Determine the line's cost.
            let line_cost = raw_cost(
                metrics,
                breakpoint,
                line_ratio,
                at_end,
                justify,
                unbreakable,
                consecutive_dash,
                true,
            );
            // Adjust the set of active breakpoints.
            // See `linebreak_optimized` for details.
            if line_ratio < metrics.min_ratio && active == pred_index {
                active += 1;
            }
            // The total cost of this line and its chain of predecessors.
            let total = pred.total + line_cost;
            // If this attempt is better than what we had before, take it!
            if best.as_ref().map_or(true, |best| best.total >= total) {
                best = Some(Entry {
                    pred: pred_index,
                    total,
                    end,
                    unbreakable,
                    breakpoint,
                });
            }
        }
        // If this is a mandatory break, all breakpoints before this one become
        // inactive.
        if breakpoint == Breakpoint::Mandatory {
            active = table.len();
        }
        table.extend(best);
        prev_end = end;
    });
    // Retrace the best path.
    let mut indices = Vec::with_capacity(16);
    let mut idx = table.len() - 1;
    while idx != 0 {
        indices.push(idx);
        idx = table[idx].pred;
    }
    let mut exact = 0.0;
    let mut pred = line(engine, p, 0..0, Breakpoint::Mandatory, None);
    // The cost that we optimized was only an approximate cost, so the layout we
    // got here is only likely to be good, not guaranteed to be the best. We now
    // computes its exact cost as that gives us a sound upper bound for the
    // proper optimization pass.
    for idx in indices.into_iter().rev() {
        let Entry { end, breakpoint, unbreakable, .. } = table[idx];
        let start = pred.end;
        let attempt = line(engine, p, start..end, breakpoint, Some(&pred));
        let (_, line_cost) =
            ratio_and_cost(p, metrics, width, &pred, &attempt, breakpoint, unbreakable);
        exact += line_cost;
        pred = attempt;
    }
    exact
 }
 /// Compute the stretch ratio and cost of a line.
 fn ratio_and_cost(
    p: &Preparation,
    metrics: &CostMetrics,
    available_width: Abs,
    pred: &Line,
    attempt: &Line,
    breakpoint: Breakpoint,
    unbreakable: bool,
 ) -> (f64, Cost) {
    let ratio = raw_ratio(
        p,
        available_width,
        attempt.width,
        attempt.stretchability(),
        attempt.shrinkability(),
        attempt.justifiables(),
    );
    let cost = raw_cost(
        metrics,
        breakpoint,
        ratio,
        attempt.end == p.bidi.text.len(),
        attempt.justify,
        unbreakable,
        pred.dash.is_some() && attempt.dash.is_some(),
        false,
    );
    (ratio, cost)
 }
 /// Determine the stretch ratio for a line given raw metrics.
 fn raw_ratio(
    p: &Preparation,
    available_width: Abs,
    line_width: Abs,
    stretchability: Abs,
    shrinkability: Abs,
    justifiables: usize,
 ) -> f64 {
    // Determine how much the line's spaces would need to be stretched
    // to make it the desired width.
    let delta = available_width - line_width;
    // Determine how much stretch is permitted.
    let adjust = if delta >= Abs::zero() { stretchability } else { shrinkability };
    // Ideally, the ratio should between -1.0 and 1.0.
    //
    // A ratio above 1.0 is possible for an underfull line, but a ratio below
    // -1.0 is forbidden because the line would overflow.
    let mut ratio = delta / adjust;
    // The line is not stretchable, but it just fits. This often happens with
    // monospace fonts and CJK texts.
    if ratio.is_nan() {
        ratio = 0.0;
    }
    if ratio > 1.0 {
        // We should stretch the line above its stretchability. Now
        // calculate the extra amount. Also, don't divide by zero.
        let extra_stretch = (delta - adjust) / justifiables.max(1) as f64;
        // Normalize the amount by half the em size.
        ratio = 1.0 + extra_stretch / (p.size / 2.0);
    }
    ratio
 }
 /// Compute the cost of a line given raw metrics.
 #[allow(clippy::too_many_arguments)]
 fn raw_cost(
    metrics: &CostMetrics,
    breakpoint: Breakpoint,
    ratio: f64,
    at_end: bool,
    justify: bool,
    unbreakable: bool,
    consecutive_dash: bool,
    approx: bool,
 ) -> Cost {
    // Determine the cost of the line.
    let mut cost = if ratio < metrics.min_ratio(approx) {
        // Overfull line always has maximum cost.
        MAX_COST
    } else if breakpoint == Breakpoint::Mandatory || at_end {
        // - If ratio < 0, we always need to shrink the line (even the last one).
        // - If ratio > 0, we need to stretch the line only when it is justified
        //   (last line is not justified by default even if `p.justify` is true).
        if ratio < 0.0 || (ratio > 0.0 && justify) {
            ratio.powi(3).abs()
        } else {
            0.0
        }
    } else {
        // Normal line with cost of |ratio^3|.
        ratio.powi(3).abs()
    };
    // Penalize runts (lone words in the last line).
    if unbreakable && at_end {
        cost += metrics.runt_cost;
    }
    // Penalize hyphenation.
    if breakpoint == Breakpoint::Hyphen {
        cost += metrics.hyph_cost;
    }
    // In the Knuth paper, cost = (1 + 100|r|^3 + p)^2 + a,
    // where r is the ratio, p=50 is the penalty, and a=3000 is
    // consecutive the penalty. We divide the whole formula by 10,
    // resulting (0.01 + |r|^3 + p)^2 + a, where p=0.5 and a=0.3
    let mut cost = (0.01 + cost).powi(2);
    // Penalize two consecutive dashes (not necessarily hyphens) extra.
    if consecutive_dash {
        cost += CONSECUTIVE_DASH_COST;
    }
    cost
 }
 /// Calls `f` for all possible points in the text where lines can broken.
 ///
 /// Yields for each breakpoint the text index, whether the break is mandatory
@ -433,7 +719,7 @@ fn linebreak_link(link: &str, mut f: impl FnMut(usize)) {
        // - other -> other
        // - alphabetic -> numeric
        // - numeric -> alphabetic
-        // Never before after opening delimiters.
+        // Never before/after opening delimiters.
        if end > 0
            && prev != Class::Open
            && if class == Class::Other { prev == Class::Other } else { class != prev }
@ -478,48 +764,141 @@ fn lang_at(p: &Preparation, offset: usize) -> Option<hypher::Lang> {
    hypher::Lang::from_iso(bytes)
 }
-/// Whether the hyphen should repeat at the start of the next line.
+/// Resolved metrics relevant for cost computation.
-fn should_repeat_hyphen(pred_line: &Line) -> bool {
+struct CostMetrics {
-    // If the predecessor line does not end with a Dash::HardHyphen, we shall
+    min_ratio: f64,
-    // not place a hyphen at the start of the next line.
+    min_approx_ratio: f64,
-    if pred_line.dash != Some(Dash::HardHyphen) {
+    hyph_cost: Cost,
-        return false;
+    runt_cost: Cost,
    approx_hyphen_width: Abs,
 }
 impl CostMetrics {
    /// Compute shared metrics for paragraph optimization.
    fn compute(p: &Preparation) -> Self {
        Self {
            // When justifying, we may stretch spaces below their natural width.
            min_ratio: if p.justify { MIN_RATIO } else { 0.0 },
            min_approx_ratio: if p.justify { MIN_APPROX_RATIO } else { 0.0 },
            hyph_cost: DEFAULT_HYPH_COST * p.costs.hyphenation().get(),
            runt_cost: DEFAULT_RUNT_COST * p.costs.runt().get(),
            // Approximate hyphen width for estimates.
            approx_hyphen_width: Em::new(0.33).at(p.size),
        }
    }
-    // If there's a trimmed out space, we needn't repeat the hyphen. That's the
+    /// The minimum line ratio we allow for shrinking. For approximate layout,
-    // case of a text like "...kebab é a -melhor- comida que existe", where the
+    /// we allow less because otherwise we get an invalid layout fairly often,
-    // hyphens are a kind of emphasis marker.
+    /// which makes our bound useless.
-    if pred_line.trimmed.end != pred_line.end {
+    fn min_ratio(&self, approx: bool) -> f64 {
-        return false;
+        if approx {
-    }
+            self.min_approx_ratio
-
+        } else {
-    // The hyphen should repeat only in the languages that require that feature.
+            self.min_ratio
-    // For more information see the discussion at https://github.com/typst/typst/issues/3235
+        }
-    let Some(Item::Text(shape)) = pred_line.last.as_ref() else { return false };
+    }
-
+}
-    match shape.lang {
+
-        // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
+/// Estimated line metrics.
-        // - Czech: see https://prirucka.ujc.cas.cz/?id=164
+///
-        // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
+/// Allows to get a quick estimate of a metric for a line between two byte
-        // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
+/// positions.
-        // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
+struct Estimates {
-        // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
+    widths: CummulativeVec<Abs>,
-        Lang::LOWER_SORBIAN
+    stretchability: CummulativeVec<Abs>,
-        | Lang::CZECH
+    shrinkability: CummulativeVec<Abs>,
-        | Lang::CROATIAN
+    justifiables: CummulativeVec<usize>,
-        | Lang::POLISH
+}
-        | Lang::PORTUGUESE
+
-        | Lang::SLOVAK => true,
+impl Estimates {
-        // In Spanish the hyphen is required only if the word next to hyphen is
+    /// Compute estimations for approximate Knuth-Plass layout.
-        // not capitalized. Otherwise, the hyphen must not be repeated.
+    fn compute(p: &Preparation) -> Self {
-        //
+        let cap = p.bidi.text.len();
-        // See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
+
-        // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
+        let mut widths = CummulativeVec::with_capacity(cap);
-        Lang::SPANISH => pred_line.bidi.text[pred_line.end..]
+        let mut stretchability = CummulativeVec::with_capacity(cap);
-            .chars()
+        let mut shrinkability = CummulativeVec::with_capacity(cap);
-            .next()
+        let mut justifiables = CummulativeVec::with_capacity(cap);
-            .map(|c| !c.is_uppercase())
+
-            .unwrap_or(false),
+        for item in &p.items {
-        _ => false,
+            let textual_len = item.textual_len();
            let after = widths.len() + textual_len;
            if let Item::Text(shaped) = item {
                for g in shaped.glyphs.iter() {
                    let byte_len = g.range.len();
                    let stretch = g.stretchability().0 + g.stretchability().1;
                    let shrink = g.shrinkability().0 + g.shrinkability().1;
                    widths.push(byte_len, g.x_advance.at(shaped.size));
                    stretchability.push(byte_len, stretch.at(shaped.size));
                    shrinkability.push(byte_len, shrink.at(shaped.size));
                    justifiables.push(byte_len, g.is_justifiable() as usize);
                }
            } else {
                widths.push(textual_len, item.width());
            }
            widths.adjust(after);
            stretchability.adjust(after);
            shrinkability.adjust(after);
            justifiables.adjust(after);
        }
        Self {
            widths,
            stretchability,
            shrinkability,
            justifiables,
        }
    }
 }
 /// An accumulative array of a metric.
 struct CummulativeVec<T> {
    total: T,
    summed: Vec<T>,
 }
 impl<T> CummulativeVec<T>
 where
    T: Default + Copy + Add<Output = T> + Sub<Output = T>,
 {
    /// Create a new instance with the given capacity.
    fn with_capacity(capacity: usize) -> Self {
        let total = T::default();
        let mut summed = Vec::with_capacity(capacity);
        summed.push(total);
        Self { total, summed }
    }
    /// Get the covered byte length.
    fn len(&self) -> usize {
        self.summed.len()
    }
    /// Adjust to cover the given byte length.
    fn adjust(&mut self, len: usize) {
        self.summed.resize(len, self.total);
    }
    /// Adds a new segment with the given byte length and metric.
    fn push(&mut self, byte_len: usize, metric: T) {
        self.total = self.total + metric;
        for _ in 0..byte_len {
            self.summed.push(self.total);
        }
    }
    /// Estimates the metrics for the line spanned by the range.
    fn estimate(&self, range: Range) -> T {
        self.get(range.end) - self.get(range.start)
    }
    /// Get the metric at the given byte position.
    fn get(&self, index: usize) -> T {
        match index.checked_sub(1) {
            None => T::default(),
            Some(i) => self.summed[i],
        }
    }
 }
--- a/crates/typst/src/layout/inline/mod.rs
+++ b/crates/typst/src/layout/inline/mod.rs
@ -9,7 +9,7 @@ use comemo::{Track, Tracked, TrackedMut};
 use self::collect::{collect, Item, Segment, SpanMapper};
 use self::finalize::finalize;
-use self::line::{commit, line, Dash, Line};
+use self::line::{commit, line, Line};
 use self::linebreak::{linebreak, Breakpoint};
 use self::prepare::{prepare, Preparation};
 use self::shaping::{