Refactor line building (#4497)

2025-06-28 00:03:17 +08:00 · 2024-07-04 12:57:40 +02:00 · 2024-07-04 12:57:40 +02:00 · 0ef672c347
commit 0ef672c347
parent 75246f930b
22 changed files with 651 additions and 517 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2604,8 +2604,7 @@ dependencies = [
 [[package]]
 name = "typst-assets"
 version = "0.11.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/typst/typst-assets?rev=4ee794c#4ee794cf8fb98eb67194e757c9820ab8562d853b"
 checksum = "f13f85360328da54847dd7fefaf272dfa5b6d1fdeb53f32938924c39bf5b2c6c"
 [[package]]
 name = "typst-cli"
@ -2656,7 +2655,7 @@ dependencies = [
 [[package]]
 name = "typst-dev-assets"
 version = "0.11.0"
-source = "git+https://github.com/typst/typst-dev-assets?rev=48a924d9de82b631bc775124a69384c8d860db04#48a924d9de82b631bc775124a69384c8d860db04"
+source = "git+https://github.com/typst/typst-dev-assets?rev=48a924d#48a924d9de82b631bc775124a69384c8d860db04"
 [[package]]
 name = "typst-docs"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -26,8 +26,8 @@ typst-svg = { path = "crates/typst-svg", version = "0.11.0" }
 typst-syntax = { path = "crates/typst-syntax", version = "0.11.0" }
 typst-timing = { path = "crates/typst-timing", version = "0.11.0" }
 typst-utils = { path = "crates/typst-utils", version = "0.11.0" }
-typst-assets = "0.11.0"
+typst-assets = { git = "https://github.com/typst/typst-assets", rev = "4ee794c" }
-typst-dev-assets = { git = "https://github.com/typst/typst-dev-assets", rev = "48a924d9de82b631bc775124a69384c8d860db04" }
+typst-dev-assets = { git = "https://github.com/typst/typst-dev-assets", rev = "48a924d" }
 az = "1.2"
 base64 = "0.22"
 bitflags = { version = "2", features = ["serde"] }
--- a/crates/typst/src/introspection/mod.rs
+++ b/crates/typst/src/introspection/mod.rs
@ -116,6 +116,6 @@ impl Tag {
 impl Debug for Tag {
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        write!(f, "Tag({:?})", self.elem)
+        write!(f, "Tag({:?})", self.elem.elem().name())
    }
 }
--- a/crates/typst/src/layout/inline/collect.rs
+++ b/crates/typst/src/layout/inline/collect.rs
@ -79,7 +79,7 @@ impl<'a> Item<'a> {
    }
    /// The natural layouted width of the item.
-    pub fn width(&self) -> Abs {
+    pub fn natural_width(&self) -> Abs {
        match self {
            Self::Text(shaped) => shaped.width,
            Self::Absolute(v, _) => *v,
--- a/crates/typst/src/layout/inline/line.rs
+++ b/crates/typst/src/layout/inline/line.rs
@ -1,11 +1,18 @@
-use unicode_bidi::BidiInfo;
+use std::fmt::{self, Debug, Formatter};
 use std::ops::{Deref, DerefMut};
 use super::*;
 use crate::engine::Engine;
-use crate::layout::{Abs, Em, Fr, Frame, FrameItem, Point};
+use crate::layout::{Abs, Dir, Em, Fr, Frame, FrameItem, Point};
 use crate::text::{Lang, TextElem};
 use crate::utils::Numeric;
 const SHY: char = '\u{ad}';
 const HYPHEN: char = '-';
 const EN_DASH: char = '–';
 const EM_DASH: char = '—';
 const LINE_SEPARATOR: char = '\u{2028}'; // We use LS to distinguish justified breaks.
 /// A layouted line, consisting of a sequence of layouted paragraph items that
 /// are mostly borrowed from the preparation phase. This type enables you to
 /// measure the size of a line in a range before committing to building the
@ -16,20 +23,9 @@ use crate::utils::Numeric;
 /// line, respectively. But even those can partially reuse previous results when
 /// the break index is safe-to-break per rustybuzz.
 pub struct Line<'a> {
-    /// Bidi information about the paragraph.
+    /// The items the line is made of.
-    pub bidi: &'a BidiInfo<'a>,
+    pub items: Items<'a>,
-    /// The trimmed range the line spans in the paragraph.
+    /// The exact natural width of the line.
    pub trimmed: Range,
    /// The untrimmed end where the line ends.
    pub end: usize,
    /// A reshaped text item if the line sliced up a text item at the start.
    pub first: Option<Item<'a>>,
    /// Inner items which don't need to be reprocessed.
    pub inner: &'a [Item<'a>],
    /// A reshaped text item if the line sliced up a text item at the end. If
    /// there is only one text item, this takes precedence over `first`.
    pub last: Option<Item<'a>>,
    /// The width of the line.
    pub width: Abs,
    /// Whether the line should be justified.
    pub justify: bool,
@ -39,45 +35,27 @@ pub struct Line<'a> {
 }
 impl<'a> Line<'a> {
-    /// Iterate over the line's items.
+    /// Create an empty line.
-    pub fn items(&self) -> impl Iterator<Item = &Item<'a>> {
+    pub fn empty() -> Self {
-        self.first.iter().chain(self.inner).chain(&self.last)
+        Self {
            items: Items::new(),
            width: Abs::zero(),
            justify: false,
            dash: None,
        }
    /// Return items that intersect the given `text_range`.
    pub fn slice(&self, text_range: Range) -> impl Iterator<Item = &Item<'a>> {
        let mut cursor = self.trimmed.start;
        let mut start = 0;
        let mut end = 0;
        for (i, item) in self.items().enumerate() {
            if cursor <= text_range.start {
                start = i;
            }
            let len = item.textual_len();
            if cursor < text_range.end || cursor + len <= text_range.end {
                end = i + 1;
            } else {
                break;
            }
            cursor += len;
        }
        self.items().skip(start).take(end - start)
    }
    /// How many glyphs are in the text where we can insert additional
    /// space when encountering underfull lines.
    pub fn justifiables(&self) -> usize {
        let mut count = 0;
-        for shaped in self.items().filter_map(Item::text) {
+        for shaped in self.items.iter().filter_map(Item::text) {
            count += shaped.justifiables();
        }
        // CJK character at line end should not be adjusted.
        if self
-            .items()
+            .items
            .last()
            .and_then(Item::text)
            .map(|s| s.cjk_justifiable_at_last())
@ -89,19 +67,27 @@ impl<'a> Line<'a> {
        count
    }
-    /// How much can the line stretch
+    /// How much the line can stretch.
    pub fn stretchability(&self) -> Abs {
-        self.items().filter_map(Item::text).map(|s| s.stretchability()).sum()
+        self.items
            .iter()
            .filter_map(Item::text)
            .map(|s| s.stretchability())
            .sum()
    }
-    /// How much can the line shrink
+    /// How much the line can shrink.
    pub fn shrinkability(&self) -> Abs {
-        self.items().filter_map(Item::text).map(|s| s.shrinkability()).sum()
+        self.items
            .iter()
            .filter_map(Item::text)
            .map(|s| s.shrinkability())
            .sum()
    }
    /// Whether the line has items with negative width.
    pub fn has_negative_width_items(&self) -> bool {
-        self.items().any(|item| match item {
+        self.items.iter().any(|item| match item {
            Item::Absolute(amount, _) => *amount < Abs::zero(),
            Item::Frame(frame, _) => frame.width() < Abs::zero(),
            _ => false,
@ -110,7 +96,8 @@ impl<'a> Line<'a> {
    /// The sum of fractions in the line.
    pub fn fr(&self) -> Fr {
-        self.items()
+        self.items
            .iter()
            .filter_map(|item| match item {
                Item::Fractional(fr, _) => Some(*fr),
                _ => None,
@ -122,234 +109,299 @@ impl<'a> Line<'a> {
 /// A dash at the end of a line.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub enum Dash {
-    /// A hyphen added to break a word.
+    /// A soft hyphen added to break a word.
-    SoftHyphen,
+    Soft,
-    /// Regular hyphen, present in a compound word, e.g. beija-flor.
+    /// A regular hyphen, present in a compound word, e.g. beija-flor.
-    HardHyphen,
+    Hard,
-    /// An em dash.
+    /// Another kind of dash. Only relevant for cost computation.
-    Long,
+    Other,
    /// An en dash.
    Short,
 }
 /// Create a line which spans the given range.
 pub fn line<'a>(
    engine: &Engine,
    p: &'a Preparation,
-    mut range: Range,
+    range: Range,
    breakpoint: Breakpoint,
    pred: Option<&Line>,
 ) -> Line<'a> {
-    let end = range.end;
+    // The line's full text.
-    let mut justify =
+    let full = &p.text[range.clone()];
        p.justify && end < p.bidi.text.len() && breakpoint != Breakpoint::Mandatory;
-    if range.is_empty() {
+    // Whether the line is justified.
-        return Line {
+    let justify = full.ends_with(LINE_SEPARATOR)
-            bidi: &p.bidi,
+        || (p.justify && breakpoint != Breakpoint::Mandatory);
            end,
            trimmed: range,
            first: None,
            inner: &[],
            last: None,
            width: Abs::zero(),
            justify,
            dash: None,
        };
    }
-    let prepend_hyphen = pred.map_or(false, should_insert_hyphen);
+    // Process dashes.
-
+    let dash = if breakpoint == Breakpoint::Hyphen || full.ends_with(SHY) {
-    // Slice out the relevant items.
+        Some(Dash::Soft)
-    let (mut expanded, mut inner) = p.slice(range.clone());
+    } else if full.ends_with(HYPHEN) {
-    let mut width = Abs::zero();
+        Some(Dash::Hard)
-
+    } else if full.ends_with([EN_DASH, EM_DASH]) {
-    // Weak space (`Absolute(_, true)`) is removed at the end of the line
+        Some(Dash::Other)
    while let Some((Item::Absolute(_, true), before)) = inner.split_last() {
        inner = before;
        range.end -= 1;
        expanded.end -= 1;
    }
    // Weak space (`Absolute(_, true)`) is removed at the beginning of the line
    while let Some((Item::Absolute(_, true), after)) = inner.split_first() {
        inner = after;
        range.start += 1;
        expanded.end += 1;
    }
    // Reshape the last item if it's split in half or hyphenated.
    let mut last = None;
    let mut dash = None;
    if let Some((Item::Text(shaped), before)) = inner.split_last() {
        // Compute the range we want to shape, trimming whitespace at the
        // end of the line.
        let base = expanded.end - shaped.text.len();
        let start = range.start.max(base);
        let text = &p.bidi.text[start..range.end];
        // U+200B ZERO WIDTH SPACE is used to provide a line break opportunity,
        // we want to trim it too.
        let trimmed = text.trim_end().trim_end_matches('\u{200B}');
        range.end = start + trimmed.len();
        // Deal with hyphens, dashes and justification.
        let shy = trimmed.ends_with('\u{ad}');
        let hyphen = breakpoint == Breakpoint::Hyphen;
        dash = if hyphen || shy {
            Some(Dash::SoftHyphen)
        } else if trimmed.ends_with('-') {
            Some(Dash::HardHyphen)
        } else if trimmed.ends_with('–') {
            Some(Dash::Short)
        } else if trimmed.ends_with('—') {
            Some(Dash::Long)
    } else {
        None
    };
        justify |= text.ends_with('\u{2028}');
-        // Deal with CJK punctuation at line ends.
+    // Trim the line at the end, if necessary for this breakpoint.
-        let gb_style = cjk_punct_style(shaped.lang, shaped.region);
+    let trim = range.start + breakpoint.trim(full).len();
        let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT)
            || (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script));
-        // Usually, we don't want to shape an empty string because:
+    // Collect the items for the line.
-        // - We don't want the height of trimmed whitespace in a different font
+    let mut items = collect_items(engine, p, range, trim);
-        //   to be considered for the line height.
+
-        // - Even if it's in the same font, its unnecessary.
+    // Add a hyphen at the line start, if a previous dash should be repeated.
-        //
+    if pred.map_or(false, |pred| should_repeat_hyphen(pred, full)) {
-        // There is one exception though. When the whole line is empty, we need
+        if let Some(shaped) = items.first_text_mut() {
-        // the shaped empty string to make the line the appropriate height. That
+            shaped.prepend_hyphen(engine, p.fallback);
-        // is the case exactly if the string is empty and there are no other
+        }
-        // items in the line.
+    }
-        if hyphen
+
-            || start + shaped.text.len() > range.end
+    // Add a hyphen at the line end, if we ended on a soft hyphen.
-            || maybe_adjust_last_glyph
+    if dash == Some(Dash::Soft) {
-            || prepend_hyphen
+        if let Some(shaped) = items.last_text_mut() {
            shaped.push_hyphen(engine, p.fallback);
        }
    }
    // Deal with CJ characters at line boundaries.
    adjust_cj_at_line_boundaries(p, full, &mut items);
    // Compute the line's width.
    let width = items.iter().map(Item::natural_width).sum();
    Line { items, width, justify, dash }
 }
 /// Collects / reshapes all items for the line with the given `range`.
 ///
 /// The `trim` defines an end position to which text items are trimmed. For
 /// example, the `range` may span "hello\n", but the `trim` specifies that the
 /// linebreak is trimmed.
 ///
 /// We do not factor the `trim` diredctly into the `range` because we still want
 /// to keep non-text items after the trim (e.g. tags).
 fn collect_items<'a>(
    engine: &Engine,
    p: &'a Preparation,
    range: Range,
    trim: usize,
 ) -> Items<'a> {
    let mut items = Items::new();
    let mut fallback = None;
    // Collect the items for each consecutively ordered run.
    reorder(p, range.clone(), |subrange, rtl| {
        let from = items.len();
        collect_range(engine, p, subrange, trim, &mut items, &mut fallback);
        if rtl {
            items.reorder(from);
        }
    });
    // Trim weak spacing at the start of the line.
    let prefix = items
        .iter()
        .take_while(|item| matches!(item, Item::Absolute(_, true)))
        .count();
    if prefix > 0 {
        items.drain(..prefix);
    }
    // Trim weak spacing at the end of the line.
    while matches!(items.last(), Some(Item::Absolute(_, true))) {
        items.pop();
    }
    // Add fallback text to expand the line height, if necessary.
    if !items.iter().any(|item| matches!(item, Item::Text(_))) {
        if let Some(fallback) = fallback {
            items.push(fallback);
        }
    }
    items
 }
 /// Calls `f` for the the BiDi-reordered ranges of a line.
 fn reorder<F>(p: &Preparation, range: Range, mut f: F)
 where
    F: FnMut(Range, bool),
 {
    // If there is nothing bidirectional going on, skip reordering.
    let Some(bidi) = &p.bidi else {
        f(range, p.dir == Dir::RTL);
        return;
    };
    // The bidi crate panics for empty lines.
    if range.is_empty() {
        f(range, p.dir == Dir::RTL);
        return;
    }
    // Find the paragraph that contains the line.
    let para = bidi
        .paragraphs
        .iter()
        .find(|para| para.range.contains(&range.start))
        .unwrap();
    // Compute the reordered ranges in visual order (left to right).
    let (levels, runs) = bidi.visual_runs(para, range.clone());
    // Call `f` for each run.
    for run in runs {
        let rtl = levels[run.start].is_rtl();
        f(run, rtl)
    }
 }
 /// Collects / reshapes all items for the given `subrange` with continous
 /// direction.
 fn collect_range<'a>(
    engine: &Engine,
    p: &'a Preparation,
    range: Range,
    trim: usize,
    items: &mut Items<'a>,
    fallback: &mut Option<ItemEntry<'a>>,
 ) {
    for (subrange, item) in p.slice(range.clone()) {
        // All non-text items are just kept, they can't be split.
        let Item::Text(shaped) = item else {
            items.push(item);
            continue;
        };
        // The intersection range of the item, the subrange, and the line's
        // trimming.
        let sliced =
            range.start.max(subrange.start)..range.end.min(subrange.end).min(trim);
        // Whether the item is split by the line.
        let split = subrange.start < sliced.start || sliced.end < subrange.end;
        if sliced.is_empty() {
            // When there is no text, still keep this as a fallback item, which
            // we can use to force a non-zero line-height when the line doesn't
            // contain any other text.
            *fallback = Some(ItemEntry::from(Item::Text(shaped.empty())));
        } else if split {
            // When the item is split in half, reshape it.
            let reshaped = shaped.reshape(engine, sliced);
            items.push(Item::Text(reshaped));
        } else {
            // When the item is fully contained, just keep it.
            items.push(item);
        }
    }
 }
 /// Add spacing around punctuation marks for CJ glyphs at line boundaries.
 ///
 /// See Requirements for Chinese Text Layout, Section 3.1.6.3 Compression of
 /// punctuation marks at line start or line end.
 fn adjust_cj_at_line_boundaries(p: &Preparation, text: &str, items: &mut Items) {
    if text.starts_with(BEGIN_PUNCT_PAT)
        || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script))
    {
-            if hyphen || start < range.end || before.is_empty() {
+        adjust_cj_at_line_start(p, items);
                let mut reshaped = shaped.reshape(engine, &p.spans, start..range.end);
                if hyphen || shy {
                    reshaped.push_hyphen(engine, p.fallback);
    }
-                if let Some(last_glyph) = reshaped.glyphs.last() {
+    if text.ends_with(END_PUNCT_PAT)
-                    if last_glyph.is_cjk_left_aligned_punctuation(gb_style) {
+        || (p.cjk_latin_spacing && text.ends_with(is_of_cj_script))
                        // If the last glyph is a CJK punctuation, we want to
                        // shrink it. See Requirements for Chinese Text Layout,
                        // Section 3.1.6.3 Compression of punctuation marks at
                        // line start or line end
                        let shrink_amount = last_glyph.shrinkability().1;
                        let punct = reshaped.glyphs.to_mut().last_mut().unwrap();
                        punct.shrink_right(shrink_amount);
                        reshaped.width -= shrink_amount.at(reshaped.size);
                    } else if p.cjk_latin_spacing
                        && last_glyph.is_cj_script()
                        && (last_glyph.x_advance - last_glyph.x_offset) > Em::one()
    {
-                        // If the last glyph is a CJK character adjusted by
+        adjust_cj_at_line_end(p, items);
                        // [`add_cjk_latin_spacing`], restore the original
                        // width.
                        let shrink_amount =
                            last_glyph.x_advance - last_glyph.x_offset - Em::one();
                        let glyph = reshaped.glyphs.to_mut().last_mut().unwrap();
                        glyph.x_advance -= shrink_amount;
                        glyph.adjustability.shrinkability.1 = Em::zero();
                        reshaped.width -= shrink_amount.at(reshaped.size);
                    }
    }
 }
-                width += reshaped.width;
+/// Add spacing around punctuation marks for CJ glyphs at the line start.
-                last = Some(Item::Text(reshaped));
+fn adjust_cj_at_line_start(p: &Preparation, items: &mut Items) {
-            }
+    let Some(shaped) = items.first_text_mut() else { return };
    let Some(glyph) = shaped.glyphs.first() else { return };
-            inner = before;
+    if glyph.is_cjk_right_aligned_punctuation() {
        }
    }
    // Deal with CJ characters at line starts.
    let text = &p.bidi.text[range.start..end];
    let maybe_adjust_first_glyph = text.starts_with(BEGIN_PUNCT_PAT)
        || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script));
    // Reshape the start item if it's split in half.
    let mut first = None;
    if let Some((Item::Text(shaped), after)) = inner.split_first() {
        // Compute the range we want to shape.
        let base = expanded.start;
        let end = range.end.min(base + shaped.text.len());
        // Reshape if necessary.
        if range.start + shaped.text.len() > end
            || maybe_adjust_first_glyph
            || prepend_hyphen
        {
            // If the range is empty, we don't want to push an empty text item.
            if range.start < end {
                let reshaped = shaped.reshape(engine, &p.spans, range.start..end);
                width += reshaped.width;
                first = Some(Item::Text(reshaped));
            }
            inner = after;
        }
    }
    if prepend_hyphen {
        let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
        if let Some(reshaped) = reshaped {
            let width_before = reshaped.width;
            reshaped.prepend_hyphen(engine, p.fallback);
            width += reshaped.width - width_before;
        }
    }
    if maybe_adjust_first_glyph {
        let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
        if let Some(reshaped) = reshaped {
            if let Some(first_glyph) = reshaped.glyphs.first() {
                if first_glyph.is_cjk_right_aligned_punctuation() {
        // If the first glyph is a CJK punctuation, we want to
        // shrink it.
-                    let shrink_amount = first_glyph.shrinkability().0;
+        let glyph = shaped.glyphs.to_mut().first_mut().unwrap();
-                    let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
+        let shrink = glyph.shrinkability().0;
-                    glyph.shrink_left(shrink_amount);
+        glyph.shrink_left(shrink);
-                    let amount_abs = shrink_amount.at(reshaped.size);
+        shaped.width -= shrink.at(shaped.size);
-                    reshaped.width -= amount_abs;
+    } else if p.cjk_latin_spacing && glyph.is_cj_script() && glyph.x_offset > Em::zero() {
                    width -= amount_abs;
                } else if p.cjk_latin_spacing
                    && first_glyph.is_cj_script()
                    && first_glyph.x_offset > Em::zero()
                {
        // If the first glyph is a CJK character adjusted by
        // [`add_cjk_latin_spacing`], restore the original width.
-                    let shrink_amount = first_glyph.x_offset;
+        let glyph = shaped.glyphs.to_mut().first_mut().unwrap();
-                    let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
+        let shrink = glyph.x_offset;
-                    glyph.x_advance -= shrink_amount;
+        glyph.x_advance -= shrink;
        glyph.x_offset = Em::zero();
        glyph.adjustability.shrinkability.0 = Em::zero();
-                    let amount_abs = shrink_amount.at(reshaped.size);
+        shaped.width -= shrink.at(shaped.size);
                    reshaped.width -= amount_abs;
                    width -= amount_abs;
                }
    }
 }
 /// Add spacing around punctuation marks for CJ glyphs at the line end.
 fn adjust_cj_at_line_end(p: &Preparation, items: &mut Items) {
    let Some(shaped) = items.last_text_mut() else { return };
    let Some(glyph) = shaped.glyphs.last() else { return };
    // Deal with CJK punctuation at line ends.
    let style = cjk_punct_style(shaped.lang, shaped.region);
    if glyph.is_cjk_left_aligned_punctuation(style) {
        // If the last glyph is a CJK punctuation, we want to
        // shrink it.
        let shrink = glyph.shrinkability().1;
        let punct = shaped.glyphs.to_mut().last_mut().unwrap();
        punct.shrink_right(shrink);
        shaped.width -= shrink.at(shaped.size);
    } else if p.cjk_latin_spacing
        && glyph.is_cj_script()
        && (glyph.x_advance - glyph.x_offset) > Em::one()
    {
        // If the last glyph is a CJK character adjusted by
        // [`add_cjk_latin_spacing`], restore the original width.
        let shrink = glyph.x_advance - glyph.x_offset - Em::one();
        let glyph = shaped.glyphs.to_mut().last_mut().unwrap();
        glyph.x_advance -= shrink;
        glyph.adjustability.shrinkability.1 = Em::zero();
        shaped.width -= shrink.at(shaped.size);
    }
 }
 /// Whether a hyphen should be inserted at the start of the next line.
 fn should_repeat_hyphen(pred_line: &Line, text: &str) -> bool {
    // If the predecessor line does not end with a `Dash::Hard`, we shall
    // not place a hyphen at the start of the next line.
    if pred_line.dash != Some(Dash::Hard) {
        return false;
    }
-    // Measure the inner items.
+    // The hyphen should repeat only in the languages that require that feature.
-    for item in inner {
+    // For more information see the discussion at https://github.com/typst/typst/issues/3235
-        width += item.width();
+    let Some(Item::Text(shaped)) = pred_line.items.last() else { return false };
    }
-    Line {
+    match shaped.lang {
-        bidi: &p.bidi,
+        // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
-        trimmed: range,
+        // - Czech: see https://prirucka.ujc.cas.cz/?id=164
-        end,
+        // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
-        first,
+        // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
-        inner,
+        // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
-        last,
+        // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
-        width,
+        Lang::LOWER_SORBIAN
-        justify,
+        | Lang::CZECH
-        dash,
+        | Lang::CROATIAN
        | Lang::POLISH
        | Lang::PORTUGUESE
        | Lang::SLOVAK => true,
        // In Spanish the hyphen is required only if the word next to hyphen is
        // not capitalized. Otherwise, the hyphen must not be repeated.
        //
        // See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
        // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
        Lang::SPANISH => text.chars().next().map_or(false, |c| !c.is_uppercase()),
        _ => false,
    }
 }
@ -365,18 +417,19 @@ pub fn commit(
    let mut remaining = width - line.width - p.hang;
    let mut offset = Abs::zero();
-    // Reorder the line from logical to visual order.
+    // We always build the line from left to right. In an LTR paragraph, we must
-    let (reordered, starts_rtl) = reorder(line);
+    // thus add the hanging indent to the offset. When the paragraph is RTL, the
-    if !starts_rtl {
+    // hanging indent arises naturally due to the line width.
    if p.dir == Dir::LTR {
        offset += p.hang;
    }
    // Handle hanging punctuation to the left.
-    if let Some(Item::Text(text)) = reordered.first() {
+    if let Some(Item::Text(text)) = line.items.first() {
        if let Some(glyph) = text.glyphs.first() {
            if !text.dir.is_positive()
                && TextElem::overhang_in(text.styles)
-                && (reordered.len() > 1 || text.glyphs.len() > 1)
+                && (line.items.len() > 1 || text.glyphs.len() > 1)
            {
                let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
                offset -= amount;
@ -386,11 +439,11 @@ pub fn commit(
    }
    // Handle hanging punctuation to the right.
-    if let Some(Item::Text(text)) = reordered.last() {
+    if let Some(Item::Text(text)) = line.items.last() {
        if let Some(glyph) = text.glyphs.last() {
            if text.dir.is_positive()
                && TextElem::overhang_in(text.styles)
-                && (reordered.len() > 1 || text.glyphs.len() > 1)
+                && (line.items.len() > 1 || text.glyphs.len() > 1)
            {
                let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
                remaining += amount;
@ -408,16 +461,16 @@ pub fn commit(
    let mut extra_justification = Abs::zero();
    let shrinkability = line.shrinkability();
-    let stretch = line.stretchability();
+    let stretchability = line.stretchability();
    if remaining < Abs::zero() && shrinkability > Abs::zero() && shrink {
        // Attempt to reduce the length of the line, using shrinkability.
        justification_ratio = (remaining / shrinkability).max(-1.0);
        remaining = (remaining + shrinkability).min(Abs::zero());
    } else if line.justify && fr.is_zero() {
        // Attempt to increase the length of the line, using stretchability.
-        if stretch > Abs::zero() {
+        if stretchability > Abs::zero() {
-            justification_ratio = (remaining / stretch).min(1.0);
+            justification_ratio = (remaining / stretchability).min(1.0);
-            remaining = (remaining - stretch).max(Abs::zero());
+            remaining = (remaining - stretchability).max(Abs::zero());
        }
        let justifiables = line.justifiables();
@ -433,7 +486,7 @@ pub fn commit(
    // Build the frames and determine the height and baseline.
    let mut frames = vec![];
-    for item in reordered {
+    for item in line.items.iter() {
        let mut push = |offset: &mut Abs, frame: Frame| {
            let width = frame.width();
            top.set_max(frame.baseline());
@ -460,8 +513,12 @@ pub fn commit(
                }
            }
            Item::Text(shaped) => {
-                let mut frame =
+                let mut frame = shaped.build(
-                    shaped.build(engine, justification_ratio, extra_justification);
+                    engine,
                    &p.spans,
                    justification_ratio,
                    extra_justification,
                );
                frame.post_process(shaped.styles);
                push(&mut offset, frame);
            }
@ -499,94 +556,6 @@ pub fn commit(
    Ok(output)
 }
 /// Return a line's items in visual order.
 fn reorder<'a>(line: &'a Line<'a>) -> (Vec<&Item<'a>>, bool) {
    let mut reordered = vec![];
    // The bidi crate doesn't like empty lines.
    if line.trimmed.is_empty() {
        return (line.slice(line.trimmed.clone()).collect(), false);
    }
    // Find the paragraph that contains the line.
    let para = line
        .bidi
        .paragraphs
        .iter()
        .find(|para| para.range.contains(&line.trimmed.start))
        .unwrap();
    // Compute the reordered ranges in visual order (left to right).
    let (levels, runs) = line.bidi.visual_runs(para, line.trimmed.clone());
    let starts_rtl = levels.first().is_some_and(|level| level.is_rtl());
    // Collect the reordered items.
    for run in runs {
        // Skip reset L1 runs because handling them would require reshaping
        // again in some cases.
        if line.bidi.levels[run.start] != levels[run.start] {
            continue;
        }
        let prev = reordered.len();
        reordered.extend(line.slice(run.clone()));
        if levels[run.start].is_rtl() {
            reordered[prev..].reverse();
        }
    }
    (reordered, starts_rtl)
 }
 /// Whether a hyphen should be inserted at the start of the next line.
 fn should_insert_hyphen(pred_line: &Line) -> bool {
    // If the predecessor line does not end with a Dash::HardHyphen, we shall
    // not place a hyphen at the start of the next line.
    if pred_line.dash != Some(Dash::HardHyphen) {
        return false;
    }
    // If there's a trimmed out space, we needn't repeat the hyphen. That's the
    // case of a text like "...kebab é a -melhor- comida que existe", where the
    // hyphens are a kind of emphasis marker.
    if pred_line.trimmed.end != pred_line.end {
        return false;
    }
    // The hyphen should repeat only in the languages that require that feature.
    // For more information see the discussion at https://github.com/typst/typst/issues/3235
    let Some(Item::Text(shape)) = pred_line.last.as_ref() else { return false };
    match shape.lang {
        // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
        // - Czech: see https://prirucka.ujc.cas.cz/?id=164
        // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
        // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
        // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
        // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
        Lang::LOWER_SORBIAN
        | Lang::CZECH
        | Lang::CROATIAN
        | Lang::POLISH
        | Lang::PORTUGUESE
        | Lang::SLOVAK => true,
        // In Spanish the hyphen is required only if the word next to hyphen is
        // not capitalized. Otherwise, the hyphen must not be repeated.
        //
        // See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
        // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
        Lang::SPANISH => pred_line.bidi.text[pred_line.end..]
            .chars()
            .next()
            .map(|c| !c.is_uppercase())
            .unwrap_or(false),
        _ => false,
    }
 }
 /// How much a character should hang into the end margin.
 ///
 /// For more discussion, see:
@ -607,3 +576,119 @@ fn overhang(c: char) -> f64 {
        _ => 0.0,
    }
 }
 /// A collection of owned or borrowed paragraph items.
 pub struct Items<'a>(Vec<ItemEntry<'a>>);
 impl<'a> Items<'a> {
    /// Create empty items.
    pub fn new() -> Self {
        Self(vec![])
    }
    /// Push a new item.
    pub fn push(&mut self, entry: impl Into<ItemEntry<'a>>) {
        self.0.push(entry.into());
    }
    /// Iterate over the items
    pub fn iter(&self) -> impl Iterator<Item = &Item<'a>> {
        self.0.iter().map(|item| &**item)
    }
    /// Access the first item.
    pub fn first(&self) -> Option<&Item<'a>> {
        self.0.first().map(|item| &**item)
    }
    /// Access the last item.
    pub fn last(&self) -> Option<&Item<'a>> {
        self.0.last().map(|item| &**item)
    }
    /// Access the first item mutably, if it is text.
    pub fn first_text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
        self.0.first_mut()?.text_mut()
    }
    /// Access the last item mutably, if it is text.
    pub fn last_text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
        self.0.last_mut()?.text_mut()
    }
    /// Reorder the items starting at the given index to RTL.
    pub fn reorder(&mut self, from: usize) {
        self.0[from..].reverse()
    }
 }
 impl<'a> FromIterator<ItemEntry<'a>> for Items<'a> {
    fn from_iter<I: IntoIterator<Item = ItemEntry<'a>>>(iter: I) -> Self {
        Self(iter.into_iter().collect())
    }
 }
 impl<'a> Deref for Items<'a> {
    type Target = Vec<ItemEntry<'a>>;
    fn deref(&self) -> &Self::Target {
        &self.0
    }
 }
 impl<'a> DerefMut for Items<'a> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.0
    }
 }
 /// A reference to or a boxed item.
 pub enum ItemEntry<'a> {
    Ref(&'a Item<'a>),
    Box(Box<Item<'a>>),
 }
 impl<'a> ItemEntry<'a> {
    fn text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
        match self {
            Self::Ref(item) => {
                let text = item.text()?;
                *self = Self::Box(Box::new(Item::Text(text.clone())));
                match self {
                    Self::Box(item) => item.text_mut(),
                    _ => unreachable!(),
                }
            }
            Self::Box(item) => item.text_mut(),
        }
    }
 }
 impl<'a> Deref for ItemEntry<'a> {
    type Target = Item<'a>;
    fn deref(&self) -> &Self::Target {
        match self {
            Self::Ref(item) => item,
            Self::Box(item) => item,
        }
    }
 }
 impl Debug for ItemEntry<'_> {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        (**self).fmt(f)
    }
 }
 impl<'a> From<&'a Item<'a>> for ItemEntry<'a> {
    fn from(item: &'a Item<'a>) -> Self {
        Self::Ref(item)
    }
 }
 impl<'a> From<Item<'a>> for ItemEntry<'a> {
    fn from(item: Item<'a>) -> Self {
        Self::Box(Box::new(item))
    }
 }
--- a/crates/typst/src/layout/inline/linebreak.rs
+++ b/crates/typst/src/layout/inline/linebreak.rs
@ -1,6 +1,7 @@
 use std::ops::{Add, Sub};
 use icu_properties::maps::CodePointMapData;
 use icu_properties::sets::CodePointSetData;
 use icu_properties::LineBreak;
 use icu_provider::AsDeserializingBufferProvider;
 use icu_provider_adapters::fork::ForkByKeyProvider;
@ -27,30 +28,33 @@ const MIN_RATIO: f64 = -1.0;
 const MIN_APPROX_RATIO: f64 = -0.5;
 const BOUND_EPS: f64 = 1e-3;
 /// The ICU blob data.
 fn blob() -> BlobDataProvider {
    BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap()
 }
 /// The general line break segmenter.
-static SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
+static SEGMENTER: Lazy<LineSegmenter> =
-    let provider =
+    Lazy::new(|| LineSegmenter::try_new_lstm_with_buffer_provider(&blob()).unwrap());
        BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap();
    LineSegmenter::try_new_lstm_with_buffer_provider(&provider).unwrap()
 });
 /// The line break segmenter for Chinese/Japanese text.
 static CJ_SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
    let provider =
        BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap();
    let cj_blob =
        BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU_CJ_SEGMENT)
            .unwrap();
-    let cj_provider = ForkByKeyProvider::new(cj_blob, provider);
+    let cj_provider = ForkByKeyProvider::new(cj_blob, blob());
    LineSegmenter::try_new_lstm_with_buffer_provider(&cj_provider).unwrap()
 });
 /// The Unicode line break properties for each code point.
 static LINEBREAK_DATA: Lazy<CodePointMapData<LineBreak>> = Lazy::new(|| {
-    let provider =
+    icu_properties::maps::load_line_break(&blob().as_deserializing()).unwrap()
-        BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap();
+});
-    let deser_provider = provider.as_deserializing();
+
-    icu_properties::maps::load_line_break(&deser_provider).unwrap()
+/// The set of Unicode default ignorables.
 static DEFAULT_IGNORABLE_DATA: Lazy<CodePointSetData> = Lazy::new(|| {
    icu_properties::sets::load_default_ignorable_code_point(&blob().as_deserializing())
        .unwrap()
 });
 /// A line break opportunity.
@ -64,6 +68,37 @@ pub enum Breakpoint {
    Hyphen,
 }
 impl Breakpoint {
    /// Trim a line before this breakpoint.
    pub fn trim(self, line: &str) -> &str {
        // Trim default ignorables.
        let ignorable = DEFAULT_IGNORABLE_DATA.as_borrowed();
        let line = line.trim_end_matches(|c| ignorable.contains(c));
        match self {
            // Trim whitespace.
            Self::Normal => line.trim_end_matches(char::is_whitespace),
            // Trim linebreaks.
            Self::Mandatory => {
                let lb = LINEBREAK_DATA.as_borrowed();
                line.trim_end_matches(|c| {
                    matches!(
                        lb.get(c),
                        LineBreak::MandatoryBreak
                            | LineBreak::CarriageReturn
                            | LineBreak::LineFeed
                            | LineBreak::NextLine
                    )
                })
            }
            // Trim nothing further.
            Self::Hyphen => line,
        }
    }
 }
 /// Breaks the paragraph into lines.
 pub fn linebreak<'a>(
    engine: &Engine,
@ -180,14 +215,11 @@ fn linebreak_optimized_bounded<'a>(
        pred: usize,
        total: Cost,
        line: Line<'a>,
        end: usize,
    }
    // Dynamic programming table.
-    let mut table = vec![Entry {
+    let mut table = vec![Entry { pred: 0, total: 0.0, line: Line::empty(), end: 0 }];
        pred: 0,
        total: 0.0,
        line: line(engine, p, 0..0, Breakpoint::Mandatory, None),
    }];
    let mut active = 0;
    let mut prev_end = 0;
@ -200,7 +232,7 @@ fn linebreak_optimized_bounded<'a>(
        let mut line_lower_bound = None;
        for (pred_index, pred) in table.iter().enumerate().skip(active) {
-            let start = pred.line.end;
+            let start = pred.end;
            let unbreakable = prev_end == start;
            // If the minimum cost we've established for the line is already
@ -221,6 +253,7 @@ fn linebreak_optimized_bounded<'a>(
                width,
                &pred.line,
                &attempt,
                end,
                breakpoint,
                unbreakable,
            );
@ -263,7 +296,7 @@ fn linebreak_optimized_bounded<'a>(
            // If this attempt is better than what we had before, take it!
            if best.as_ref().map_or(true, |best| best.total >= total) {
-                best = Some(Entry { pred: pred_index, total, line: attempt });
+                best = Some(Entry { pred: pred_index, total, line: attempt, end });
            }
        }
@ -282,7 +315,7 @@ fn linebreak_optimized_bounded<'a>(
    let mut idx = table.len() - 1;
    // This should only happen if our bound was faulty. Which shouldn't happen!
-    if table[idx].line.end != p.bidi.text.len() {
+    if table[idx].end != p.text.len() {
        #[cfg(debug_assertions)]
        panic!("bounded paragraph layout is incomplete");
@ -340,7 +373,7 @@ fn linebreak_optimized_approximate(
    let mut prev_end = 0;
    breakpoints(p, |end, breakpoint| {
-        let at_end = end == p.bidi.text.len();
+        let at_end = end == p.text.len();
        // Find the optimal predecessor.
        let mut best: Option<Entry> = None;
@ -362,7 +395,7 @@ fn linebreak_optimized_approximate(
            // make it the desired width. We trim at the end to not take into
            // account trailing spaces. This is, again, only an approximation of
            // the real behaviour of `line`.
-            let trimmed_end = start + p.bidi.text[start..end].trim_end().len();
+            let trimmed_end = start + p.text[start..end].trim_end().len();
            let line_ratio = raw_ratio(
                p,
                width,
@ -428,8 +461,9 @@ fn linebreak_optimized_approximate(
        idx = table[idx].pred;
    }
    let mut pred = Line::empty();
    let mut start = 0;
    let mut exact = 0.0;
    let mut pred = line(engine, p, 0..0, Breakpoint::Mandatory, None);
    // The cost that we optimized was only an approximate cost, so the layout we
    // got here is only likely to be good, not guaranteed to be the best. We now
@ -438,26 +472,36 @@ fn linebreak_optimized_approximate(
    for idx in indices.into_iter().rev() {
        let Entry { end, breakpoint, unbreakable, .. } = table[idx];
        let start = pred.end;
        let attempt = line(engine, p, start..end, breakpoint, Some(&pred));
-        let (_, line_cost) =
+        let (_, line_cost) = ratio_and_cost(
-            ratio_and_cost(p, metrics, width, &pred, &attempt, breakpoint, unbreakable);
+            p,
            metrics,
            width,
            &pred,
            &attempt,
            end,
            breakpoint,
            unbreakable,
        );
        exact += line_cost;
        pred = attempt;
        start = end;
        exact += line_cost;
    }
    exact
 }
 /// Compute the stretch ratio and cost of a line.
 #[allow(clippy::too_many_arguments)]
 fn ratio_and_cost(
    p: &Preparation,
    metrics: &CostMetrics,
    available_width: Abs,
    pred: &Line,
    attempt: &Line,
    end: usize,
    breakpoint: Breakpoint,
    unbreakable: bool,
 ) -> (f64, Cost) {
@ -474,7 +518,7 @@ fn ratio_and_cost(
        metrics,
        breakpoint,
        ratio,
-        attempt.end == p.bidi.text.len(),
+        end == p.text.len(),
        attempt.justify,
        unbreakable,
        pred.dash.is_some() && attempt.dash.is_some(),
@ -587,7 +631,14 @@ fn raw_cost(
 /// code much simpler and the consumers of this function don't need the
 /// composability and flexibility of external iteration anyway.
 fn breakpoints<'a>(p: &'a Preparation<'a>, mut f: impl FnMut(usize, Breakpoint)) {
-    let text = p.bidi.text;
+    let text = p.text;
    // Single breakpoint at the end for empty text.
    if text.is_empty() {
        f(0, Breakpoint::Mandatory);
        return;
    }
    let hyphenate = p.hyphenate != Some(false);
    let lb = LINEBREAK_DATA.as_borrowed();
    let segmenter = match p.lang {
@ -747,8 +798,9 @@ fn linebreak_link(link: &str, mut f: impl FnMut(usize)) {
 fn hyphenate_at(p: &Preparation, offset: usize) -> bool {
    p.hyphenate
        .or_else(|| {
-            let shaped = p.find(offset)?.text()?;
+            let (_, item) = p.get(offset);
-            Some(TextElem::hyphenate_in(shaped.styles))
+            let styles = item.text()?.styles;
            Some(TextElem::hyphenate_in(styles))
        })
        .unwrap_or(false)
 }
@ -756,8 +808,9 @@ fn hyphenate_at(p: &Preparation, offset: usize) -> bool {
 /// The text language at the given offset.
 fn lang_at(p: &Preparation, offset: usize) -> Option<hypher::Lang> {
    let lang = p.lang.or_else(|| {
-        let shaped = p.find(offset)?.text()?;
+        let (_, item) = p.get(offset);
-        Some(TextElem::lang_in(shaped.styles))
+        let styles = item.text()?.styles;
        Some(TextElem::lang_in(styles))
    })?;
    let bytes = lang.as_str().as_bytes().try_into().ok()?;
@ -813,17 +866,14 @@ struct Estimates {
 impl Estimates {
    /// Compute estimations for approximate Knuth-Plass layout.
    fn compute(p: &Preparation) -> Self {
-        let cap = p.bidi.text.len();
+        let cap = p.text.len();
        let mut widths = CummulativeVec::with_capacity(cap);
        let mut stretchability = CummulativeVec::with_capacity(cap);
        let mut shrinkability = CummulativeVec::with_capacity(cap);
        let mut justifiables = CummulativeVec::with_capacity(cap);
-        for item in &p.items {
+        for (range, item) in p.items.iter() {
            let textual_len = item.textual_len();
            let after = widths.len() + textual_len;
            if let Item::Text(shaped) = item {
                for g in shaped.glyphs.iter() {
                    let byte_len = g.range.len();
@ -835,13 +885,13 @@ impl Estimates {
                    justifiables.push(byte_len, g.is_justifiable() as usize);
                }
            } else {
-                widths.push(textual_len, item.width());
+                widths.push(range.len(), item.natural_width());
            }
-            widths.adjust(after);
+            widths.adjust(range.end);
-            stretchability.adjust(after);
+            stretchability.adjust(range.end);
-            shrinkability.adjust(after);
+            shrinkability.adjust(range.end);
-            justifiables.adjust(after);
+            justifiables.adjust(range.end);
        }
        Self {
@ -871,11 +921,6 @@ where
        Self { total, summed }
    }
    /// Get the covered byte length.
    fn len(&self) -> usize {
        self.summed.len()
    }
    /// Adjust to cover the given byte length.
    fn adjust(&mut self, len: usize) {
        self.summed.resize(len, self.total);
--- a/crates/typst/src/layout/inline/prepare.rs
+++ b/crates/typst/src/layout/inline/prepare.rs
@ -13,16 +13,24 @@ use crate::text::{Costs, Lang, TextElem};
 /// Only when a line break falls onto a text index that is not safe-to-break per
 /// rustybuzz, we have to reshape that portion.
 pub struct Preparation<'a> {
    /// The paragraph's full text.
    pub text: &'a str,
    /// Bidirectional text embedding levels for the paragraph.
-    pub bidi: BidiInfo<'a>,
+    ///
    /// This is `None` if the paragraph is BiDi-uniform (all the base direction).
    pub bidi: Option<BidiInfo<'a>>,
    /// Text runs, spacing and layouted elements.
-    pub items: Vec<Item<'a>>,
+    pub items: Vec<(Range, Item<'a>)>,
    /// Maps from byte indices to item indices.
    pub indices: Vec<usize>,
    /// The span mapper.
    pub spans: SpanMapper,
    /// Whether to hyphenate if it's the same for all children.
    pub hyphenate: Option<bool>,
    /// Costs for various layout decisions.
    pub costs: Costs,
    /// The dominant direction.
    pub dir: Dir,
    /// The text language if it's the same for all children.
    pub lang: Option<Lang>,
    /// The paragraph's resolved horizontal alignment.
@ -44,46 +52,18 @@ pub struct Preparation<'a> {
 }
 impl<'a> Preparation<'a> {
-    /// Find the item that contains the given `text_offset`.
+    /// Get the item that contains the given `text_offset`.
-    pub fn find(&self, text_offset: usize) -> Option<&Item<'a>> {
+    pub fn get(&self, offset: usize) -> &(Range, Item<'a>) {
-        let mut cursor = 0;
+        let idx = self.indices.get(offset).copied().unwrap_or(0);
-        for item in &self.items {
+        &self.items[idx]
            let end = cursor + item.textual_len();
            if (cursor..end).contains(&text_offset) {
                return Some(item);
            }
            cursor = end;
        }
        None
    }
-    /// Return the items that intersect the given `text_range`.
+    /// Iterate over the items that intersect the given `sliced` range.
-    ///
+    pub fn slice(&self, sliced: Range) -> impl Iterator<Item = &(Range, Item<'a>)> {
-    /// Returns the expanded range around the items and the items.
+        let start = self.indices.get(sliced.start).copied().unwrap_or(0);
-    pub fn slice(&self, text_range: Range) -> (Range, &[Item<'a>]) {
+        self.items[start..].iter().take_while(move |(range, _)| {
-        let mut cursor = 0;
+            range.start < sliced.end || range.end <= sliced.end
-        let mut start = 0;
+        })
        let mut end = 0;
        let mut expanded = text_range.clone();
        for (i, item) in self.items.iter().enumerate() {
            if cursor <= text_range.start {
                start = i;
                expanded.start = cursor;
            }
            let len = item.textual_len();
            if cursor < text_range.end || cursor + len <= text_range.end {
                end = i + 1;
                expanded.end = cursor + len;
            } else {
                break;
            }
            cursor += len;
        }
        (expanded, &self.items[start..end])
    }
 }
@ -99,42 +79,57 @@ pub fn prepare<'a>(
    spans: SpanMapper,
    styles: StyleChain<'a>,
 ) -> SourceResult<Preparation<'a>> {
-    let bidi = BidiInfo::new(
+    let dir = TextElem::dir_in(styles);
-        text,
+    let default_level = match dir {
-        match TextElem::dir_in(styles) {
+        Dir::RTL => BidiLevel::rtl(),
-            Dir::LTR => Some(BidiLevel::ltr()),
+        _ => BidiLevel::ltr(),
-            Dir::RTL => Some(BidiLevel::rtl()),
+    };
-            _ => None,
+
-        },
+    let bidi = BidiInfo::new(text, Some(default_level));
-    );
+    let is_bidi = bidi
        .levels
        .iter()
        .any(|level| level.is_ltr() != default_level.is_ltr());
    let mut cursor = 0;
    let mut items = Vec::with_capacity(segments.len());
    // Shape the text to finalize the items.
    for segment in segments {
-        let end = cursor + segment.textual_len();
+        let len = segment.textual_len();
        let end = cursor + len;
        let range = cursor..end;
        match segment {
            Segment::Text(_, styles) => {
-                shape_range(&mut items, engine, &bidi, cursor..end, &spans, styles);
+                shape_range(&mut items, engine, text, &bidi, range, styles);
            }
-            Segment::Item(item) => items.push(item),
+            Segment::Item(item) => items.push((range, item)),
        }
        cursor = end;
    }
    // Build the mapping from byte to item indices.
    let mut indices = Vec::with_capacity(text.len());
    for (i, (range, _)) in items.iter().enumerate() {
        indices.extend(range.clone().map(|_| i));
    }
    let cjk_latin_spacing = TextElem::cjk_latin_spacing_in(styles).is_auto();
    if cjk_latin_spacing {
        add_cjk_latin_spacing(&mut items);
    }
    Ok(Preparation {
-        bidi,
+        text,
        bidi: is_bidi.then_some(bidi),
        items,
        indices,
        spans,
        hyphenate: children.shared_get(styles, TextElem::hyphenate_in),
        costs: TextElem::costs_in(styles),
        dir,
        lang: children.shared_get(styles, TextElem::lang_in),
        align: AlignElem::alignment_in(styles).resolve(styles).x,
        justify: ParElem::justify_in(styles),
@ -150,10 +145,14 @@ pub fn prepare<'a>(
 /// Add some spacing between Han characters and western characters. See
 /// Requirements for Chinese Text Layout, Section 3.2.2 Mixed Text Composition
 /// in Horizontal Written Mode
-fn add_cjk_latin_spacing(items: &mut [Item]) {
+fn add_cjk_latin_spacing(items: &mut [(Range, Item)]) {
-    let mut items = items.iter_mut().filter(|x| !matches!(x, Item::Tag(_))).peekable();
+    let mut items = items
        .iter_mut()
        .filter(|(_, x)| !matches!(x, Item::Tag(_)))
        .peekable();
    let mut prev: Option<&ShapedGlyph> = None;
-    while let Some(item) = items.next() {
+    while let Some((_, item)) = items.next() {
        let Some(text) = item.text_mut() else {
            prev = None;
            continue;
@ -168,7 +167,7 @@ fn add_cjk_latin_spacing(items: &mut [Item]) {
            let next = glyphs.peek().map(|n| n as _).or_else(|| {
                items
                    .peek()
-                    .and_then(|i| i.text())
+                    .and_then(|(_, i)| i.text())
                    .and_then(|shaped| shaped.glyphs.first())
            });
--- a/crates/typst/src/layout/inline/shaping.rs
+++ b/crates/typst/src/layout/inline/shaping.rs
@ -14,7 +14,6 @@ use super::{Item, Range, SpanMapper};
 use crate::engine::Engine;
 use crate::foundations::{Smart, StyleChain};
 use crate::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size};
 use crate::syntax::Span;
 use crate::text::{
    decorate, families, features, variant, Font, FontVariant, Glyph, Lang, Region,
    TextElem, TextItem,
@ -27,6 +26,7 @@ use crate::World;
 /// This type contains owned or borrowed shaped text runs, which can be
 /// measured, used to reshape substrings more quickly and converted into a
 /// frame.
 #[derive(Clone)]
 pub struct ShapedText<'a> {
    /// The start of the text in the full paragraph.
    pub base: usize,
@ -80,8 +80,6 @@ pub struct ShapedGlyph {
    pub safe_to_break: bool,
    /// The first char in this glyph's cluster.
    pub c: char,
    /// The source code location of the glyph and its byte offset within it.
    pub span: (Span, u16),
    /// Whether this glyph is justifiable for CJK scripts.
    pub is_justifiable: bool,
    /// The script of the glyph.
@ -214,6 +212,7 @@ impl<'a> ShapedText<'a> {
    pub fn build(
        &self,
        engine: &Engine,
        spans: &SpanMapper,
        justification_ratio: f64,
        extra_justification: Abs,
    ) -> Frame {
@ -268,7 +267,7 @@ impl<'a> ShapedText<'a> {
                    // We may not be able to reach the offset completely if
                    // it exceeds u16, but better to have a roughly correct
                    // span offset than nothing.
-                    let mut span = shaped.span;
+                    let mut span = spans.span_at(shaped.range.start);
                    span.1 = span.1.saturating_add(span_offset.saturating_as());
                    // |<---- a Glyph ---->|
@ -331,7 +330,7 @@ impl<'a> ShapedText<'a> {
    }
    /// Measure the top and bottom extent of this text.
-    fn measure(&self, engine: &Engine) -> (Abs, Abs) {
+    pub fn measure(&self, engine: &Engine) -> (Abs, Abs) {
        let mut top = Abs::zero();
        let mut bottom = Abs::zero();
@ -409,12 +408,7 @@ impl<'a> ShapedText<'a> {
    /// shaping process if possible.
    ///
    /// The text `range` is relative to the whole paragraph.
-    pub fn reshape(
+    pub fn reshape(&'a self, engine: &Engine, text_range: Range) -> ShapedText<'a> {
        &'a self,
        engine: &Engine,
        spans: &SpanMapper,
        text_range: Range,
    ) -> ShapedText<'a> {
        let text = &self.text[text_range.start - self.base..text_range.end - self.base];
        if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) {
            #[cfg(debug_assertions)]
@ -436,7 +430,6 @@ impl<'a> ShapedText<'a> {
                engine,
                text_range.start,
                text,
                spans,
                self.styles,
                self.dir,
                self.lang,
@ -445,6 +438,16 @@ impl<'a> ShapedText<'a> {
        }
    }
    /// Derive an empty text run with the same properties as this one.
    pub fn empty(&self) -> Self {
        Self {
            text: "",
            width: Abs::zero(),
            glyphs: Cow::Borrowed(&[]),
            ..*self
        }
    }
    /// Push a hyphen to end of the text.
    pub fn push_hyphen(&mut self, engine: &Engine, fallback: bool) {
        self.insert_hyphen(engine, fallback, Side::Right)
@ -493,7 +496,6 @@ impl<'a> ShapedText<'a> {
                range,
                safe_to_break: true,
                c: '-',
                span: (Span::detached(), 0),
                is_justifiable: false,
                script: Script::Common,
            };
@ -592,11 +594,11 @@ impl Debug for ShapedText<'_> {
 /// Group a range of text by BiDi level and script, shape the runs and generate
 /// items for them.
 pub fn shape_range<'a>(
-    items: &mut Vec<Item<'a>>,
+    items: &mut Vec<(Range, Item<'a>)>,
    engine: &Engine,
    text: &'a str,
    bidi: &BidiInfo<'a>,
    range: Range,
    spans: &SpanMapper,
    styles: StyleChain<'a>,
 ) {
    let script = TextElem::script_in(styles);
@ -604,17 +606,9 @@ pub fn shape_range<'a>(
    let region = TextElem::region_in(styles);
    let mut process = |range: Range, level: BidiLevel| {
        let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
-        let shaped = shape(
+        let shaped =
-            engine,
+            shape(engine, range.start, &text[range.clone()], styles, dir, lang, region);
-            range.start,
+        items.push((range, Item::Text(shaped)));
            &bidi.text[range],
            spans,
            styles,
            dir,
            lang,
            region,
        );
        items.push(Item::Text(shaped));
    };
    let mut prev_level = BidiLevel::ltr();
@ -625,14 +619,14 @@ pub fn shape_range<'a>(
    // set (rather than inferred from the glyphs), we keep the script at an
    // unchanging `Script::Unknown` so that only level changes cause breaks.
    for i in range.clone() {
-        if !bidi.text.is_char_boundary(i) {
+        if !text.is_char_boundary(i) {
            continue;
        }
        let level = bidi.levels[i];
        let curr_script = match script {
            Smart::Auto => {
-                bidi.text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
+                text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
            }
            Smart::Custom(_) => Script::Unknown,
        };
@ -668,7 +662,6 @@ fn shape<'a>(
    engine: &Engine,
    base: usize,
    text: &'a str,
    spans: &SpanMapper,
    styles: StyleChain<'a>,
    dir: Dir,
    lang: Lang,
@ -677,7 +670,6 @@ fn shape<'a>(
    let size = TextElem::size_in(styles);
    let mut ctx = ShapingContext {
        engine,
        spans,
        size,
        glyphs: vec![],
        used: vec![],
@ -717,7 +709,6 @@ fn shape<'a>(
 /// Holds shaping results and metadata common to all shaped segments.
 struct ShapingContext<'a, 'v> {
    engine: &'a Engine<'v>,
    spans: &'a SpanMapper,
    glyphs: Vec<ShapedGlyph>,
    used: Vec<Font>,
    styles: StyleChain<'a>,
@ -830,7 +821,6 @@ fn shape_segment<'a>(
                range: start..end,
                safe_to_break: !info.unsafe_to_break(),
                c,
                span: ctx.spans.span_at(start),
                is_justifiable: is_justifiable(
                    c,
                    script,
@ -921,7 +911,6 @@ fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) {
            range: start..end,
            safe_to_break: true,
            c,
            span: ctx.spans.span_at(start),
            is_justifiable: is_justifiable(
                c,
                script,
--- a/crates/typst/src/model/par.rs
+++ b/crates/typst/src/model/par.rs
@ -18,9 +18,9 @@ use crate::realize::StyleVec;
 ///
 /// # Example
 /// ```example
 /// #show par: set block(spacing: 0.65em)
 /// #set par(
 ///   first-line-indent: 1em,
 ///   spacing: 0.65em,
 ///   justify: true,
 /// )
 ///
@ -115,8 +115,7 @@ pub struct ParElem {
    /// By typographic convention, paragraph breaks are indicated either by some
    /// space between paragraphs or by indented first lines. Consider reducing
    /// the [paragraph spacing]($block.spacing) to the [`leading`]($par.leading)
-    /// when using this property (e.g. using
+    /// when using this property (e.g. using `[#set par(spacing: 0.65em)]`).
    /// `[#show par: set block(spacing: 0.65em)]`).
    #[ghost]
    pub first_line_indent: Length,
--- a/docs/guides/guide-for-latex-users.md
+++ b/docs/guides/guide-for-latex-users.md
@ -593,10 +593,9 @@ The example below
 ```typ
 #set page(margin: 1.75in)
-#set par(leading: 0.55em, first-line-indent: 1.8em, justify: true)
+#set par(leading: 0.55em, spacing: 0.55em, first-line-indent: 1.8em, justify: true)
 #set text(font: "New Computer Modern")
 #show raw: set text(font: "New Computer Modern Mono")
 #show par: set block(spacing: 0.55em)
 #show heading: set block(above: 1.4em, below: 1em)
 ```
--- a/docs/reference/syntax.md
+++ b/docs/reference/syntax.md
@ -120,7 +120,7 @@ a table listing all syntax that is available in code mode:
 | Named function           | `{let f(x) = 2 * x}`          | [Function]($function)              |
 | Set rule                 | `{set text(14pt)}`            | [Styling]($styling/#set-rules)     |
 | Set-if rule              | `{set text(..) if .. }`       | [Styling]($styling/#set-rules)     |
-| Show-set rule            | `{show par: set block(..)}`   | [Styling]($styling/#show-rules)    |
+| Show-set rule            | `{show heading: set block(..)}` | [Styling]($styling/#show-rules)  |
 | Show rule with function  | `{show raw: it => {..}}`      | [Styling]($styling/#show-rules)    |
 | Show-everything rule     | `{show: columns.with(2)}`     | [Styling]($styling/#show-rules)    |
 | Context expression       | `{context text.lang}`         | [Context]($context)                |
--- a/tests/ref/bidi-whitespace-reset.png
+++ b/tests/ref/bidi-whitespace-reset.png
--- a/tests/ref/context-compatibility-locate.png
+++ b/tests/ref/context-compatibility-locate.png
--- a/tests/ref/eval-mode.png
+++ b/tests/ref/eval-mode.png
--- a/tests/ref/issue-3601-empty-raw.png
+++ b/tests/ref/issue-3601-empty-raw.png
--- a/tests/ref/issue-4278-par-trim-before-equation.png
+++ b/tests/ref/issue-4278-par-trim-before-equation.png
--- a/tests/ref/justify-basically-empty.png
+++ b/tests/ref/justify-basically-empty.png
--- a/tests/ref/par-metadata-after-trimmed-space.png
+++ b/tests/ref/par-metadata-after-trimmed-space.png
--- a/tests/ref/par-trailing-whitespace.png
+++ b/tests/ref/par-trailing-whitespace.png
--- a/tests/suite/foundations/version.typ
+++ b/tests/suite/foundations/version.typ
@ -4,7 +4,7 @@
 // Test version constructor.
 // Empty.
-#version()
+#test(array(version()), ())
 // Plain.
 #test(version(1, 2).major, 1)
--- a/tests/suite/layout/spacing.typ
+++ b/tests/suite/layout/spacing.typ
@ -47,14 +47,14 @@ Totally #h() ignored
 Hello #h(2cm, weak: true)
 --- issue-4087 ---
-// weak space at the end of the line would be removed.
+// Weak space at the end of the line is removed.
 This is the first line #h(2cm, weak: true) A new line
-// non-weak space would be consume a specified width and push next line.
+// Non-weak space consumes a specified width and pushes to next line.
 This is the first line #h(2cm, weak: false) A new line
-// similarly weak space at the beginning of the line would be removed.
+// Similarly, weak space at the beginning of the line is removed.
-This is the first line\ #h(2cm, weak: true) A new line
+This is the first line \ #h(2cm, weak: true) A new line
-// non-spacing, on the other hand, is not removed.
+// Non-weak-spacing, on the other hand, is not removed.
-This is the first line\ #h(2cm, weak: false) A new line
+This is the first line \ #h(2cm, weak: false) A new line
--- a/tests/suite/model/par.typ
+++ b/tests/suite/model/par.typ
@ -78,3 +78,22 @@ Welcome \ here. Does this work well?
 #set text(dir: rtl)
 لآن وقد أظلم الليل وبدأت النجوم
 تنضخ وجه الطبيعة التي أعْيَتْ من طول ما انبعثت في النهار
 --- par-trailing-whitespace ---
 // Ensure that trailing whitespace layouts as intended.
 #box(fill: aqua, " ")
 --- par-empty-metadata ---
 // Check that metadata still works in a zero length paragraph.
 #block(height: 0pt)[#""#metadata(false)<hi>]
 #context test(query(<hi>).first().value, false)
 --- par-metadata-after-trimmed-space ---
 // Ensure that metadata doesn't prevent trailing spaces from being trimmed.
 #set par(justify: true, linebreaks: "simple")
 #set text(hyphenate: false)
 Lorem ipsum dolor #metadata(none) nonumy eirmod tempor.
 --- issue-4278-par-trim-before-equation ---
 #set par(justify: true)
 #lorem(6) aa $a = c + b$