Split up paragraph layout into more files (#4443)

2025-07-19 02:22:53 +08:00 · 2024-06-24 20:33:41 +02:00 · 2024-06-24 20:33:41 +02:00 · e6b5314870
commit e6b5314870
parent e90c30903d
7 changed files with 1588 additions and 1546 deletions
--- a/crates/typst/src/layout/inline/collect.rs
+++ b/crates/typst/src/layout/inline/collect.rs
@ -0,0 +1,350 @@
+use super::*;
+use crate::diag::bail;
+use crate::foundations::{Packed, Resolve};
+use crate::introspection::{Tag, TagElem};
+use crate::layout::{
+    Abs, AlignElem, BoxElem, Dir, Fr, Frame, HElem, InlineElem, InlineItem, Sizing,
+    Spacing,
+};
+use crate::syntax::Span;
+use crate::text::{
+    LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes, SpaceElem, TextElem,
+};
+use crate::utils::Numeric;
+
+// The characters by which spacing, inline content and pins are replaced in the
+// paragraph's full text.
+const SPACING_REPLACE: &str = " "; // Space
+const OBJ_REPLACE: &str = "\u{FFFC}"; // Object Replacement Character
+const SPACING_REPLACE_CHAR: char = ' ';
+const OBJ_REPLACE_CHAR: char = '\u{FFFC}';
+
+// Unicode BiDi control characters.
+const LTR_EMBEDDING: &str = "\u{202A}";
+const RTL_EMBEDDING: &str = "\u{202B}";
+const POP_EMBEDDING: &str = "\u{202C}";
+const LTR_ISOLATE: &str = "\u{2066}";
+const POP_ISOLATE: &str = "\u{2069}";
+
+/// A prepared item in a paragraph layout.
+#[derive(Debug)]
+pub enum Item<'a> {
+    /// A shaped text run with consistent style and direction.
+    Text(ShapedText<'a>),
+    /// Absolute spacing between other items, and whether it is weak.
+    Absolute(Abs, bool),
+    /// Fractional spacing between other items.
+    Fractional(Fr, Option<(&'a Packed<BoxElem>, Locator<'a>, StyleChain<'a>)>),
+    /// Layouted inline-level content.
+    Frame(Frame, StyleChain<'a>),
+    /// A tag.
+    Tag(&'a Tag),
+    /// An item that is invisible and needs to be skipped, e.g. a Unicode
+    /// isolate.
+    Skip(&'static str),
+}
+
+impl<'a> Item<'a> {
+    /// If this a text item, return it.
+    pub fn text(&self) -> Option<&ShapedText<'a>> {
+        match self {
+            Self::Text(shaped) => Some(shaped),
+            _ => None,
+        }
+    }
+
+    /// If this a text item, return it mutably.
+    pub fn text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
+        match self {
+            Self::Text(shaped) => Some(shaped),
+            _ => None,
+        }
+    }
+
+    /// Return the textual representation of this item: Either just itself (for
+    /// a text item) or a replacement string (for any other item).
+    pub fn textual(&self) -> &str {
+        match self {
+            Self::Text(shaped) => shaped.text,
+            Self::Absolute(_, _) | Self::Fractional(_, _) => SPACING_REPLACE,
+            Self::Frame(_, _) => OBJ_REPLACE,
+            Self::Tag(_) => "",
+            Self::Skip(s) => s,
+        }
+    }
+
+    /// The text length of the item.
+    pub fn textual_len(&self) -> usize {
+        self.textual().len()
+    }
+
+    /// The natural layouted width of the item.
+    pub fn width(&self) -> Abs {
+        match self {
+            Self::Text(shaped) => shaped.width,
+            Self::Absolute(v, _) => *v,
+            Self::Frame(frame, _) => frame.width(),
+            Self::Fractional(_, _) | Self::Tag(_) => Abs::zero(),
+            Self::Skip(_) => Abs::zero(),
+        }
+    }
+}
+
+/// An item or not-yet shaped text. We can't shape text until we have collected
+/// all items because only then we can compute BiDi, and we need to split shape
+/// runs at level boundaries.
+#[derive(Debug)]
+pub enum Segment<'a> {
+    /// One or multiple collapsed text children. Stores how long the segment is
+    /// (in bytes of the full text string).
+    Text(usize, StyleChain<'a>),
+    /// An already prepared item.
+    Item(Item<'a>),
+}
+
+impl Segment<'_> {
+    /// The text length of the item.
+    pub fn textual_len(&self) -> usize {
+        match self {
+            Self::Text(len, _) => *len,
+            Self::Item(item) => item.textual_len(),
+        }
+    }
+}
+
+/// Collects all text of the paragraph into one string and a collection of
+/// segments that correspond to pieces of that string. This also performs
+/// string-level preprocessing like case transformations.
+#[typst_macros::time]
+pub fn collect<'a>(
+    children: &'a StyleVec,
+    engine: &mut Engine<'_>,
+    locator: Locator<'a>,
+    styles: &'a StyleChain<'a>,
+    region: Size,
+    consecutive: bool,
+) -> SourceResult<(String, Vec<Segment<'a>>, SpanMapper)> {
+    let mut collector = Collector::new(2 + children.len());
+    let mut iter = children.chain(styles).peekable();
+    let mut locator = locator.split();
+
+    let first_line_indent = ParElem::first_line_indent_in(*styles);
+    if !first_line_indent.is_zero()
+        && consecutive
+        && AlignElem::alignment_in(*styles).resolve(*styles).x
+            == TextElem::dir_in(*styles).start().into()
+    {
+        collector.push_item(Item::Absolute(first_line_indent.resolve(*styles), false));
+        collector.spans.push(1, Span::detached());
+    }
+
+    let hang = ParElem::hanging_indent_in(*styles);
+    if !hang.is_zero() {
+        collector.push_item(Item::Absolute(-hang, false));
+        collector.spans.push(1, Span::detached());
+    }
+
+    let outer_dir = TextElem::dir_in(*styles);
+
+    while let Some((child, styles)) = iter.next() {
+        let prev_len = collector.full.len();
+
+        if child.is::<SpaceElem>() {
+            collector.push_text(" ", styles);
+        } else if let Some(elem) = child.to_packed::<TextElem>() {
+            collector.build_text(styles, |full| {
+                let dir = TextElem::dir_in(styles);
+                if dir != outer_dir {
+                    // Insert "Explicit Directional Embedding".
+                    match dir {
+                        Dir::LTR => full.push_str(LTR_EMBEDDING),
+                        Dir::RTL => full.push_str(RTL_EMBEDDING),
+                        _ => {}
+                    }
+                }
+
+                if let Some(case) = TextElem::case_in(styles) {
+                    full.push_str(&case.apply(elem.text()));
+                } else {
+                    full.push_str(elem.text());
+                }
+
+                if dir != outer_dir {
+                    // Insert "Pop Directional Formatting".
+                    full.push_str(POP_EMBEDDING);
+                }
+            });
+        } else if let Some(elem) = child.to_packed::<HElem>() {
+            let amount = elem.amount();
+            if amount.is_zero() {
+                continue;
+            }
+
+            collector.push_item(match amount {
+                Spacing::Fr(fr) => Item::Fractional(*fr, None),
+                Spacing::Rel(rel) => Item::Absolute(
+                    rel.resolve(styles).relative_to(region.x),
+                    elem.weak(styles),
+                ),
+            });
+        } else if let Some(elem) = child.to_packed::<LinebreakElem>() {
+            collector
+                .push_text(if elem.justify(styles) { "\u{2028}" } else { "\n" }, styles);
+        } else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
+            let double = elem.double(styles);
+            if elem.enabled(styles) {
+                let quotes = SmartQuotes::new(
+                    elem.quotes(styles),
+                    TextElem::lang_in(styles),
+                    TextElem::region_in(styles),
+                    elem.alternative(styles),
+                );
+                let peeked = iter.peek().and_then(|(child, _)| {
+                    if let Some(elem) = child.to_packed::<TextElem>() {
+                        elem.text().chars().next()
+                    } else if child.is::<SmartQuoteElem>() {
+                        Some('"')
+                    } else if child.is::<SpaceElem>()
+                        || child.is::<HElem>()
+                        || child.is::<LinebreakElem>()
+                        // This is a temporary hack. We should rather skip these
+                        // and peek at the next child.
+                        || child.is::<TagElem>()
+                    {
+                        Some(SPACING_REPLACE_CHAR)
+                    } else {
+                        Some(OBJ_REPLACE_CHAR)
+                    }
+                });
+
+                let quote = collector.quoter.quote(&quotes, double, peeked);
+                collector.push_quote(quote, styles);
+            } else {
+                collector.push_text(if double { "\"" } else { "'" }, styles);
+            }
+        } else if let Some(elem) = child.to_packed::<InlineElem>() {
+            collector.push_item(Item::Skip(LTR_ISOLATE));
+
+            for item in elem.layout(engine, locator.next(&elem.span()), styles, region)? {
+                match item {
+                    InlineItem::Space(space, weak) => {
+                        collector.push_item(Item::Absolute(space, weak));
+                    }
+                    InlineItem::Frame(frame) => {
+                        collector.push_item(Item::Frame(frame, styles));
+                    }
+                }
+            }
+
+            collector.push_item(Item::Skip(POP_ISOLATE));
+        } else if let Some(elem) = child.to_packed::<BoxElem>() {
+            let loc = locator.next(&elem.span());
+            if let Sizing::Fr(v) = elem.width(styles) {
+                collector.push_item(Item::Fractional(v, Some((elem, loc, styles))));
+            } else {
+                let frame = elem.layout(engine, loc, styles, region)?;
+                collector.push_item(Item::Frame(frame, styles));
+            }
+        } else if let Some(elem) = child.to_packed::<TagElem>() {
+            collector.push_item(Item::Tag(&elem.tag));
+        } else {
+            bail!(child.span(), "unexpected paragraph child");
+        };
+
+        let len = collector.full.len() - prev_len;
+        collector.spans.push(len, child.span());
+    }
+
+    Ok((collector.full, collector.segments, collector.spans))
+}
+
+/// Collects segments.
+struct Collector<'a> {
+    full: String,
+    segments: Vec<Segment<'a>>,
+    spans: SpanMapper,
+    quoter: SmartQuoter,
+}
+
+impl<'a> Collector<'a> {
+    fn new(capacity: usize) -> Self {
+        Self {
+            full: String::new(),
+            segments: Vec::with_capacity(capacity),
+            spans: SpanMapper::new(),
+            quoter: SmartQuoter::new(),
+        }
+    }
+
+    fn push_text(&mut self, text: &str, styles: StyleChain<'a>) {
+        self.full.push_str(text);
+        self.push_segment(Segment::Text(text.len(), styles), false);
+    }
+
+    fn build_text<F>(&mut self, styles: StyleChain<'a>, f: F)
+    where
+        F: FnOnce(&mut String),
+    {
+        let prev = self.full.len();
+        f(&mut self.full);
+        let len = self.full.len() - prev;
+        self.push_segment(Segment::Text(len, styles), false);
+    }
+
+    fn push_quote(&mut self, quote: &str, styles: StyleChain<'a>) {
+        self.full.push_str(quote);
+        self.push_segment(Segment::Text(quote.len(), styles), true);
+    }
+
+    fn push_item(&mut self, item: Item<'a>) {
+        self.full.push_str(item.textual());
+        self.push_segment(Segment::Item(item), false);
+    }
+
+    fn push_segment(&mut self, segment: Segment<'a>, is_quote: bool) {
+        if let Some(last) = self.full.chars().last() {
+            self.quoter.last(last, is_quote);
+        }
+
+        if let (Some(Segment::Text(last_len, last_styles)), Segment::Text(len, styles)) =
+            (self.segments.last_mut(), &segment)
+        {
+            if *last_styles == *styles {
+                *last_len += *len;
+                return;
+            }
+        }
+
+        self.segments.push(segment);
+    }
+}
+
+/// Maps byte offsets back to spans.
+#[derive(Default)]
+pub struct SpanMapper(Vec<(usize, Span)>);
+
+impl SpanMapper {
+    /// Create a new span mapper.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Push a span for a segment with the given length.
+    pub fn push(&mut self, len: usize, span: Span) {
+        self.0.push((len, span));
+    }
+
+    /// Determine the span at the given byte offset.
+    ///
+    /// May return a detached span.
+    pub fn span_at(&self, offset: usize) -> (Span, u16) {
+        let mut cursor = 0;
+        for &(len, span) in &self.0 {
+            if (cursor..cursor + len).contains(&offset) {
+                return (span, u16::try_from(offset - cursor).unwrap_or(0));
+            }
+            cursor += len;
+        }
+        (Span::detached(), 0)
+    }
+}
--- a/crates/typst/src/layout/inline/finalize.rs
+++ b/crates/typst/src/layout/inline/finalize.rs
@ -0,0 +1,63 @@
+use super::*;
+use crate::layout::{Abs, Frame, Point};
+use crate::utils::Numeric;
+
+/// Turns the selected lines into frames.
+#[typst_macros::time]
+pub fn finalize(
+    engine: &mut Engine,
+    p: &Preparation,
+    lines: &[Line],
+    styles: StyleChain,
+    region: Size,
+    expand: bool,
+) -> SourceResult<Fragment> {
+    // Determine the paragraph's width: Full width of the region if we should
+    // expand or there's fractional spacing, fit-to-width otherwise.
+    let width = if !region.x.is_finite()
+        || (!expand && lines.iter().all(|line| line.fr().is_zero()))
+    {
+        region
+            .x
+            .min(p.hang + lines.iter().map(|line| line.width).max().unwrap_or_default())
+    } else {
+        region.x
+    };
+
+    // Stack the lines into one frame per region.
+    let shrink = ParElem::shrink_in(styles);
+    let mut frames: Vec<Frame> = lines
+        .iter()
+        .map(|line| commit(engine, p, line, width, region.y, shrink))
+        .collect::<SourceResult<_>>()?;
+
+    // Positive ratios enable prevention, while zero and negative ratios disable
+    // it.
+    if p.costs.orphan().get() > 0.0 {
+        // Prevent orphans.
+        if frames.len() >= 2 && !frames[1].is_empty() {
+            let second = frames.remove(1);
+            let first = &mut frames[0];
+            merge(first, second, p.leading);
+        }
+    }
+    if p.costs.widow().get() > 0.0 {
+        // Prevent widows.
+        let len = frames.len();
+        if len >= 2 && !frames[len - 2].is_empty() {
+            let second = frames.pop().unwrap();
+            let first = frames.last_mut().unwrap();
+            merge(first, second, p.leading);
+        }
+    }
+
+    Ok(Fragment::frames(frames))
+}
+
+/// Merge two line frames
+fn merge(first: &mut Frame, second: Frame, leading: Abs) {
+    let offset = first.height() + leading;
+    let total = offset + second.height();
+    first.push_frame(Point::with_y(offset), second);
+    first.size_mut().y = total;
+}
--- a/crates/typst/src/layout/inline/line.rs
+++ b/crates/typst/src/layout/inline/line.rs
@ -0,0 +1,550 @@
+use unicode_bidi::BidiInfo;
+
+use super::*;
+use crate::engine::Engine;
+use crate::layout::{Abs, Em, Fr, Frame, FrameItem, Point};
+use crate::text::TextElem;
+use crate::utils::Numeric;
+
+/// A layouted line, consisting of a sequence of layouted paragraph items that
+/// are mostly borrowed from the preparation phase. This type enables you to
+/// measure the size of a line in a range before committing to building the
+/// line's frame.
+///
+/// At most two paragraph items must be created individually for this line: The
+/// first and last one since they may be broken apart by the start or end of the
+/// line, respectively. But even those can partially reuse previous results when
+/// the break index is safe-to-break per rustybuzz.
+pub struct Line<'a> {
+    /// Bidi information about the paragraph.
+    pub bidi: &'a BidiInfo<'a>,
+    /// The trimmed range the line spans in the paragraph.
+    pub trimmed: Range,
+    /// The untrimmed end where the line ends.
+    pub end: usize,
+    /// A reshaped text item if the line sliced up a text item at the start.
+    pub first: Option<Item<'a>>,
+    /// Inner items which don't need to be reprocessed.
+    pub inner: &'a [Item<'a>],
+    /// A reshaped text item if the line sliced up a text item at the end. If
+    /// there is only one text item, this takes precedence over `first`.
+    pub last: Option<Item<'a>>,
+    /// The width of the line.
+    pub width: Abs,
+    /// Whether the line should be justified.
+    pub justify: bool,
+    /// Whether the line ends with a hyphen or dash, either naturally or through
+    /// hyphenation.
+    pub dash: Option<Dash>,
+}
+
+impl<'a> Line<'a> {
+    /// Iterate over the line's items.
+    pub fn items(&self) -> impl Iterator<Item = &Item<'a>> {
+        self.first.iter().chain(self.inner).chain(&self.last)
+    }
+
+    /// Return items that intersect the given `text_range`.
+    pub fn slice(&self, text_range: Range) -> impl Iterator<Item = &Item<'a>> {
+        let mut cursor = self.trimmed.start;
+        let mut start = 0;
+        let mut end = 0;
+
+        for (i, item) in self.items().enumerate() {
+            if cursor <= text_range.start {
+                start = i;
+            }
+
+            let len = item.textual_len();
+            if cursor < text_range.end || cursor + len <= text_range.end {
+                end = i + 1;
+            } else {
+                break;
+            }
+
+            cursor += len;
+        }
+
+        self.items().skip(start).take(end - start)
+    }
+
+    /// How many glyphs are in the text where we can insert additional
+    /// space when encountering underfull lines.
+    pub fn justifiables(&self) -> usize {
+        let mut count = 0;
+        for shaped in self.items().filter_map(Item::text) {
+            count += shaped.justifiables();
+        }
+        // CJK character at line end should not be adjusted.
+        if self
+            .items()
+            .last()
+            .and_then(Item::text)
+            .map(|s| s.cjk_justifiable_at_last())
+            .unwrap_or(false)
+        {
+            count -= 1;
+        }
+
+        count
+    }
+
+    /// How much can the line stretch
+    pub fn stretchability(&self) -> Abs {
+        self.items().filter_map(Item::text).map(|s| s.stretchability()).sum()
+    }
+
+    /// How much can the line shrink
+    pub fn shrinkability(&self) -> Abs {
+        self.items().filter_map(Item::text).map(|s| s.shrinkability()).sum()
+    }
+
+    /// The sum of fractions in the line.
+    pub fn fr(&self) -> Fr {
+        self.items()
+            .filter_map(|item| match item {
+                Item::Fractional(fr, _) => Some(*fr),
+                _ => None,
+            })
+            .sum()
+    }
+}
+
+/// A dash at the end of a line.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum Dash {
+    /// A hyphen added to break a word.
+    SoftHyphen,
+    /// Regular hyphen, present in a compound word, e.g. beija-flor.
+    HardHyphen,
+    /// An em dash.
+    Long,
+    /// An en dash.
+    Short,
+}
+
+/// Create a line which spans the given range.
+pub fn line<'a>(
+    engine: &Engine,
+    p: &'a Preparation,
+    mut range: Range,
+    breakpoint: Breakpoint,
+    prepend_hyphen: bool,
+) -> Line<'a> {
+    let end = range.end;
+    let mut justify =
+        p.justify && end < p.bidi.text.len() && breakpoint != Breakpoint::Mandatory;
+
+    if range.is_empty() {
+        return Line {
+            bidi: &p.bidi,
+            end,
+            trimmed: range,
+            first: None,
+            inner: &[],
+            last: None,
+            width: Abs::zero(),
+            justify,
+            dash: None,
+        };
+    }
+
+    // Slice out the relevant items.
+    let (mut expanded, mut inner) = p.slice(range.clone());
+    let mut width = Abs::zero();
+
+    // Weak space (`Absolute(_, true)`) is removed at the end of the line
+    while let Some((Item::Absolute(_, true), before)) = inner.split_last() {
+        inner = before;
+        range.end -= 1;
+        expanded.end -= 1;
+    }
+    // Weak space (`Absolute(_, true)`) is removed at the beginning of the line
+    while let Some((Item::Absolute(_, true), after)) = inner.split_first() {
+        inner = after;
+        range.start += 1;
+        expanded.end += 1;
+    }
+
+    // Reshape the last item if it's split in half or hyphenated.
+    let mut last = None;
+    let mut dash = None;
+    if let Some((Item::Text(shaped), before)) = inner.split_last() {
+        // Compute the range we want to shape, trimming whitespace at the
+        // end of the line.
+        let base = expanded.end - shaped.text.len();
+        let start = range.start.max(base);
+        let text = &p.bidi.text[start..range.end];
+        // U+200B ZERO WIDTH SPACE is used to provide a line break opportunity,
+        // we want to trim it too.
+        let trimmed = text.trim_end().trim_end_matches('\u{200B}');
+        range.end = start + trimmed.len();
+
+        // Deal with hyphens, dashes and justification.
+        let shy = trimmed.ends_with('\u{ad}');
+        let hyphen = breakpoint == Breakpoint::Hyphen;
+        dash = if hyphen || shy {
+            Some(Dash::SoftHyphen)
+        } else if trimmed.ends_with('-') {
+            Some(Dash::HardHyphen)
+        } else if trimmed.ends_with('–') {
+            Some(Dash::Short)
+        } else if trimmed.ends_with('—') {
+            Some(Dash::Long)
+        } else {
+            None
+        };
+        justify |= text.ends_with('\u{2028}');
+
+        // Deal with CJK punctuation at line ends.
+        let gb_style = cjk_punct_style(shaped.lang, shaped.region);
+        let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT)
+            || (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script));
+
+        // Usually, we don't want to shape an empty string because:
+        // - We don't want the height of trimmed whitespace in a different font
+        //   to be considered for the line height.
+        // - Even if it's in the same font, its unnecessary.
+        //
+        // There is one exception though. When the whole line is empty, we need
+        // the shaped empty string to make the line the appropriate height. That
+        // is the case exactly if the string is empty and there are no other
+        // items in the line.
+        if hyphen
+            || start + shaped.text.len() > range.end
+            || maybe_adjust_last_glyph
+            || prepend_hyphen
+        {
+            if hyphen || start < range.end || before.is_empty() {
+                let mut reshaped = shaped.reshape(engine, &p.spans, start..range.end);
+                if hyphen || shy {
+                    reshaped.push_hyphen(engine, p.fallback);
+                }
+
+                if let Some(last_glyph) = reshaped.glyphs.last() {
+                    if last_glyph.is_cjk_left_aligned_punctuation(gb_style) {
+                        // If the last glyph is a CJK punctuation, we want to
+                        // shrink it. See Requirements for Chinese Text Layout,
+                        // Section 3.1.6.3 Compression of punctuation marks at
+                        // line start or line end
+                        let shrink_amount = last_glyph.shrinkability().1;
+                        let punct = reshaped.glyphs.to_mut().last_mut().unwrap();
+                        punct.shrink_right(shrink_amount);
+                        reshaped.width -= shrink_amount.at(reshaped.size);
+                    } else if p.cjk_latin_spacing
+                        && last_glyph.is_cj_script()
+                        && (last_glyph.x_advance - last_glyph.x_offset) > Em::one()
+                    {
+                        // If the last glyph is a CJK character adjusted by
+                        // [`add_cjk_latin_spacing`], restore the original
+                        // width.
+                        let shrink_amount =
+                            last_glyph.x_advance - last_glyph.x_offset - Em::one();
+                        let glyph = reshaped.glyphs.to_mut().last_mut().unwrap();
+                        glyph.x_advance -= shrink_amount;
+                        glyph.adjustability.shrinkability.1 = Em::zero();
+                        reshaped.width -= shrink_amount.at(reshaped.size);
+                    }
+                }
+
+                width += reshaped.width;
+                last = Some(Item::Text(reshaped));
+            }
+
+            inner = before;
+        }
+    }
+
+    // Deal with CJ characters at line starts.
+    let text = &p.bidi.text[range.start..end];
+    let maybe_adjust_first_glyph = text.starts_with(BEGIN_PUNCT_PAT)
+        || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script));
+
+    // Reshape the start item if it's split in half.
+    let mut first = None;
+    if let Some((Item::Text(shaped), after)) = inner.split_first() {
+        // Compute the range we want to shape.
+        let base = expanded.start;
+        let end = range.end.min(base + shaped.text.len());
+
+        // Reshape if necessary.
+        if range.start + shaped.text.len() > end
+            || maybe_adjust_first_glyph
+            || prepend_hyphen
+        {
+            // If the range is empty, we don't want to push an empty text item.
+            if range.start < end {
+                let reshaped = shaped.reshape(engine, &p.spans, range.start..end);
+                width += reshaped.width;
+                first = Some(Item::Text(reshaped));
+            }
+
+            inner = after;
+        }
+    }
+
+    if prepend_hyphen {
+        let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
+        if let Some(reshaped) = reshaped {
+            let width_before = reshaped.width;
+            reshaped.prepend_hyphen(engine, p.fallback);
+            width += reshaped.width - width_before;
+        }
+    }
+
+    if maybe_adjust_first_glyph {
+        let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
+        if let Some(reshaped) = reshaped {
+            if let Some(first_glyph) = reshaped.glyphs.first() {
+                if first_glyph.is_cjk_right_aligned_punctuation() {
+                    // If the first glyph is a CJK punctuation, we want to
+                    // shrink it.
+                    let shrink_amount = first_glyph.shrinkability().0;
+                    let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
+                    glyph.shrink_left(shrink_amount);
+                    let amount_abs = shrink_amount.at(reshaped.size);
+                    reshaped.width -= amount_abs;
+                    width -= amount_abs;
+                } else if p.cjk_latin_spacing
+                    && first_glyph.is_cj_script()
+                    && first_glyph.x_offset > Em::zero()
+                {
+                    // If the first glyph is a CJK character adjusted by
+                    // [`add_cjk_latin_spacing`], restore the original width.
+                    let shrink_amount = first_glyph.x_offset;
+                    let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
+                    glyph.x_advance -= shrink_amount;
+                    glyph.x_offset = Em::zero();
+                    glyph.adjustability.shrinkability.0 = Em::zero();
+                    let amount_abs = shrink_amount.at(reshaped.size);
+                    reshaped.width -= amount_abs;
+                    width -= amount_abs;
+                }
+            }
+        }
+    }
+
+    // Measure the inner items.
+    for item in inner {
+        width += item.width();
+    }
+
+    Line {
+        bidi: &p.bidi,
+        trimmed: range,
+        end,
+        first,
+        inner,
+        last,
+        width,
+        justify,
+        dash,
+    }
+}
+
+/// Commit to a line and build its frame.
+pub fn commit(
+    engine: &mut Engine,
+    p: &Preparation,
+    line: &Line,
+    width: Abs,
+    full: Abs,
+    shrink: bool,
+) -> SourceResult<Frame> {
+    let mut remaining = width - line.width - p.hang;
+    let mut offset = Abs::zero();
+
+    // Reorder the line from logical to visual order.
+    let (reordered, starts_rtl) = reorder(line);
+    if !starts_rtl {
+        offset += p.hang;
+    }
+
+    // Handle hanging punctuation to the left.
+    if let Some(Item::Text(text)) = reordered.first() {
+        if let Some(glyph) = text.glyphs.first() {
+            if !text.dir.is_positive()
+                && TextElem::overhang_in(text.styles)
+                && (reordered.len() > 1 || text.glyphs.len() > 1)
+            {
+                let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
+                offset -= amount;
+                remaining += amount;
+            }
+        }
+    }
+
+    // Handle hanging punctuation to the right.
+    if let Some(Item::Text(text)) = reordered.last() {
+        if let Some(glyph) = text.glyphs.last() {
+            if text.dir.is_positive()
+                && TextElem::overhang_in(text.styles)
+                && (reordered.len() > 1 || text.glyphs.len() > 1)
+            {
+                let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
+                remaining += amount;
+            }
+        }
+    }
+
+    // Determine how much additional space is needed. The justification_ratio is
+    // for the first step justification, extra_justification is for the last
+    // step. For more info on multi-step justification, see Procedures for
+    // Inter- Character Space Expansion in W3C document Chinese Layout
+    // Requirements.
+    let fr = line.fr();
+    let mut justification_ratio = 0.0;
+    let mut extra_justification = Abs::zero();
+
+    let shrinkability = line.shrinkability();
+    let stretch = line.stretchability();
+    if remaining < Abs::zero() && shrinkability > Abs::zero() && shrink {
+        // Attempt to reduce the length of the line, using shrinkability.
+        justification_ratio = (remaining / shrinkability).max(-1.0);
+        remaining = (remaining + shrinkability).min(Abs::zero());
+    } else if line.justify && fr.is_zero() {
+        // Attempt to increase the length of the line, using stretchability.
+        if stretch > Abs::zero() {
+            justification_ratio = (remaining / stretch).min(1.0);
+            remaining = (remaining - stretch).max(Abs::zero());
+        }
+
+        let justifiables = line.justifiables();
+        if justifiables > 0 && remaining > Abs::zero() {
+            // Underfull line, distribute the extra space.
+            extra_justification = remaining / justifiables as f64;
+            remaining = Abs::zero();
+        }
+    }
+
+    let mut top = Abs::zero();
+    let mut bottom = Abs::zero();
+
+    // Build the frames and determine the height and baseline.
+    let mut frames = vec![];
+    for item in reordered {
+        let mut push = |offset: &mut Abs, frame: Frame| {
+            let width = frame.width();
+            top.set_max(frame.baseline());
+            bottom.set_max(frame.size().y - frame.baseline());
+            frames.push((*offset, frame));
+            *offset += width;
+        };
+
+        match item {
+            Item::Absolute(v, _) => {
+                offset += *v;
+            }
+            Item::Fractional(v, elem) => {
+                let amount = v.share(fr, remaining);
+                if let Some((elem, loc, styles)) = elem {
+                    let region = Size::new(amount, full);
+                    let mut frame =
+                        elem.layout(engine, loc.relayout(), *styles, region)?;
+                    frame.post_process(*styles);
+                    frame.translate(Point::with_y(TextElem::baseline_in(*styles)));
+                    push(&mut offset, frame);
+                } else {
+                    offset += amount;
+                }
+            }
+            Item::Text(shaped) => {
+                let mut frame =
+                    shaped.build(engine, justification_ratio, extra_justification);
+                frame.post_process(shaped.styles);
+                push(&mut offset, frame);
+            }
+            Item::Frame(frame, styles) => {
+                let mut frame = frame.clone();
+                frame.post_process(*styles);
+                frame.translate(Point::with_y(TextElem::baseline_in(*styles)));
+                push(&mut offset, frame);
+            }
+            Item::Tag(tag) => {
+                let mut frame = Frame::soft(Size::zero());
+                frame.push(Point::zero(), FrameItem::Tag((*tag).clone()));
+                frames.push((offset, frame));
+            }
+            Item::Skip(_) => {}
+        }
+    }
+
+    // Remaining space is distributed now.
+    if !fr.is_zero() {
+        remaining = Abs::zero();
+    }
+
+    let size = Size::new(width, top + bottom);
+    let mut output = Frame::soft(size);
+    output.set_baseline(top);
+
+    // Construct the line's frame.
+    for (offset, frame) in frames {
+        let x = offset + p.align.position(remaining);
+        let y = top - frame.baseline();
+        output.push_frame(Point::new(x, y), frame);
+    }
+
+    Ok(output)
+}
+
+/// Return a line's items in visual order.
+fn reorder<'a>(line: &'a Line<'a>) -> (Vec<&Item<'a>>, bool) {
+    let mut reordered = vec![];
+
+    // The bidi crate doesn't like empty lines.
+    if line.trimmed.is_empty() {
+        return (line.slice(line.trimmed.clone()).collect(), false);
+    }
+
+    // Find the paragraph that contains the line.
+    let para = line
+        .bidi
+        .paragraphs
+        .iter()
+        .find(|para| para.range.contains(&line.trimmed.start))
+        .unwrap();
+
+    // Compute the reordered ranges in visual order (left to right).
+    let (levels, runs) = line.bidi.visual_runs(para, line.trimmed.clone());
+    let starts_rtl = levels.first().is_some_and(|level| level.is_rtl());
+
+    // Collect the reordered items.
+    for run in runs {
+        // Skip reset L1 runs because handling them would require reshaping
+        // again in some cases.
+        if line.bidi.levels[run.start] != levels[run.start] {
+            continue;
+        }
+
+        let prev = reordered.len();
+        reordered.extend(line.slice(run.clone()));
+
+        if levels[run.start].is_rtl() {
+            reordered[prev..].reverse();
+        }
+    }
+
+    (reordered, starts_rtl)
+}
+
+/// How much a character should hang into the end margin.
+///
+/// For more discussion, see:
+/// <https://recoveringphysicist.com/21/>
+fn overhang(c: char) -> f64 {
+    match c {
+        // Dashes.
+        '–' | '—' => 0.2,
+        '-' => 0.55,
+
+        // Punctuation.
+        '.' | ',' => 0.8,
+        ':' | ';' => 0.3,
+
+        // Arabic
+        '\u{60C}' | '\u{6D4}' => 0.4,
+
+        _ => 0.0,
+    }
+}
--- a/crates/typst/src/layout/inline/linebreak.rs
+++ b/crates/typst/src/layout/inline/linebreak.rs
@ -6,7 +6,10 @@ use icu_provider_blob::BlobDataProvider;
 use icu_segmenter::LineSegmenter;
 use once_cell::sync::Lazy;

-use super::Preparation;
+use super::*;
+use crate::engine::Engine;
+use crate::layout::Abs;
+use crate::model::Linebreaks;
 use crate::syntax::link_prefix;
 use crate::text::{Lang, TextElem};

@ -38,7 +41,7 @@ static LINEBREAK_DATA: Lazy<CodePointMapData<LineBreak>> = Lazy::new(|| {

 /// A line break opportunity.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub(super) enum Breakpoint {
+pub enum Breakpoint {
    /// Just a normal opportunity (e.g. after a space).
    Normal,
    /// A mandatory breakpoint (after '\n' or at the end of the text).
@ -47,6 +50,247 @@ pub(super) enum Breakpoint {
    Hyphen,
 }

+/// Breaks the paragraph into lines.
+pub fn linebreak<'a>(
+    engine: &Engine,
+    p: &'a Preparation<'a>,
+    width: Abs,
+) -> Vec<Line<'a>> {
+    let linebreaks = p.linebreaks.unwrap_or_else(|| {
+        if p.justify {
+            Linebreaks::Optimized
+        } else {
+            Linebreaks::Simple
+        }
+    });
+
+    match linebreaks {
+        Linebreaks::Simple => linebreak_simple(engine, p, width),
+        Linebreaks::Optimized => linebreak_optimized(engine, p, width),
+    }
+}
+
+/// Performs line breaking in simple first-fit style. This means that we build
+/// lines greedily, always taking the longest possible line. This may lead to
+/// very unbalanced line, but is fast and simple.
+#[typst_macros::time]
+fn linebreak_simple<'a>(
+    engine: &Engine,
+    p: &'a Preparation<'a>,
+    width: Abs,
+) -> Vec<Line<'a>> {
+    let mut lines = Vec::with_capacity(16);
+    let mut start = 0;
+    let mut last = None;
+
+    breakpoints(p, |end, breakpoint| {
+        let prepend_hyphen = lines.last().map(should_repeat_hyphen).unwrap_or(false);
+
+        // Compute the line and its size.
+        let mut attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);
+
+        // If the line doesn't fit anymore, we push the last fitting attempt
+        // into the stack and rebuild the line from the attempt's end. The
+        // resulting line cannot be broken up further.
+        if !width.fits(attempt.width) {
+            if let Some((last_attempt, last_end)) = last.take() {
+                lines.push(last_attempt);
+                start = last_end;
+                attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);
+            }
+        }
+
+        // Finish the current line if there is a mandatory line break (i.e. due
+        // to "\n") or if the line doesn't fit horizontally already since then
+        // no shorter line will be possible.
+        if breakpoint == Breakpoint::Mandatory || !width.fits(attempt.width) {
+            lines.push(attempt);
+            start = end;
+            last = None;
+        } else {
+            last = Some((attempt, end));
+        }
+    });
+
+    if let Some((line, _)) = last {
+        lines.push(line);
+    }
+
+    lines
+}
+
+/// Performs line breaking in optimized Knuth-Plass style. Here, we use more
+/// context to determine the line breaks than in the simple first-fit style. For
+/// example, we might choose to cut a line short even though there is still a
+/// bit of space to improve the fit of one of the following lines. The
+/// Knuth-Plass algorithm is based on the idea of "cost". A line which has a
+/// very tight or very loose fit has a higher cost than one that is just right.
+/// Ending a line with a hyphen incurs extra cost and endings two successive
+/// lines with hyphens even more.
+///
+/// To find the layout with the minimal total cost the algorithm uses dynamic
+/// programming: For each possible breakpoint it determines the optimal
+/// paragraph layout _up to that point_. It walks over all possible start points
+/// for a line ending at that point and finds the one for which the cost of the
+/// line plus the cost of the optimal paragraph up to the start point (already
+/// computed and stored in dynamic programming table) is minimal. The final
+/// result is simply the layout determined for the last breakpoint at the end of
+/// text.
+#[typst_macros::time]
+fn linebreak_optimized<'a>(
+    engine: &Engine,
+    p: &'a Preparation<'a>,
+    width: Abs,
+) -> Vec<Line<'a>> {
+    /// The cost of a line or paragraph layout.
+    type Cost = f64;
+
+    /// An entry in the dynamic programming table.
+    struct Entry<'a> {
+        pred: usize,
+        total: Cost,
+        line: Line<'a>,
+    }
+
+    // Cost parameters.
+    const DEFAULT_HYPH_COST: Cost = 0.5;
+    const DEFAULT_RUNT_COST: Cost = 0.5;
+    const CONSECUTIVE_DASH_COST: Cost = 0.3;
+    const MAX_COST: Cost = 1_000_000.0;
+    const MIN_RATIO: f64 = -1.0;
+
+    let hyph_cost = DEFAULT_HYPH_COST * p.costs.hyphenation().get();
+    let runt_cost = DEFAULT_RUNT_COST * p.costs.runt().get();
+
+    // Dynamic programming table.
+    let mut active = 0;
+    let mut table = vec![Entry {
+        pred: 0,
+        total: 0.0,
+        line: line(engine, p, 0..0, Breakpoint::Mandatory, false),
+    }];
+
+    let em = p.size;
+    let mut lines = Vec::with_capacity(16);
+    breakpoints(p, |end, breakpoint| {
+        let k = table.len();
+        let is_end = end == p.bidi.text.len();
+        let mut best: Option<Entry> = None;
+
+        // Find the optimal predecessor.
+        for (i, pred) in table.iter().enumerate().skip(active) {
+            // Layout the line.
+            let start = pred.line.end;
+            let prepend_hyphen = should_repeat_hyphen(&pred.line);
+
+            let attempt = line(engine, p, start..end, breakpoint, prepend_hyphen);
+
+            // Determine how much the line's spaces would need to be stretched
+            // to make it the desired width.
+            let delta = width - attempt.width;
+            // Determine how much stretch are permitted.
+            let adjust = if delta >= Abs::zero() {
+                attempt.stretchability()
+            } else {
+                attempt.shrinkability()
+            };
+            // Ideally, the ratio should between -1.0 and 1.0, but sometimes a
+            // value above 1.0 is possible, in which case the line is underfull.
+            let mut ratio = delta / adjust;
+            if ratio.is_nan() {
+                // The line is not stretchable, but it just fits. This often
+                // happens with monospace fonts and CJK texts.
+                ratio = 0.0;
+            }
+            if ratio > 1.0 {
+                // We should stretch the line above its stretchability. Now
+                // calculate the extra amount. Also, don't divide by zero.
+                let extra_stretch =
+                    (delta - adjust) / attempt.justifiables().max(1) as f64;
+                // Normalize the amount by half Em size.
+                ratio = 1.0 + extra_stretch / (em / 2.0);
+            }
+
+            // Determine the cost of the line.
+            let min_ratio = if p.justify { MIN_RATIO } else { 0.0 };
+            let mut cost = if ratio < min_ratio {
+                // The line is overfull. This is the case if
+                // - justification is on, but we'd need to shrink too much
+                // - justification is off and the line just doesn't fit
+                //
+                // If this is the earliest breakpoint in the active set
+                // (active == i), remove it from the active set. If there is an
+                // earlier one (active < i), then the logically shorter line was
+                // in fact longer (can happen with negative spacing) and we
+                // can't trim the active set just yet.
+                if active == i {
+                    active += 1;
+                }
+                MAX_COST
+            } else if breakpoint == Breakpoint::Mandatory || is_end {
+                // This is a mandatory break and the line is not overfull, so
+                // all breakpoints before this one become inactive since no line
+                // can span above the mandatory break.
+                active = k;
+                // - If ratio > 0, we need to stretch the line only when justify
+                //   is needed.
+                // - If ratio < 0, we always need to shrink the line.
+                if (ratio > 0.0 && attempt.justify) || ratio < 0.0 {
+                    ratio.powi(3).abs()
+                } else {
+                    0.0
+                }
+            } else {
+                // Normal line with cost of |ratio^3|.
+                ratio.powi(3).abs()
+            };
+
+            // Penalize runts.
+            if k == i + 1 && is_end {
+                cost += runt_cost;
+            }
+
+            // Penalize hyphens.
+            if breakpoint == Breakpoint::Hyphen {
+                cost += hyph_cost;
+            }
+
+            // In Knuth paper, cost = (1 + 100|r|^3 + p)^2 + a,
+            // where r is the ratio, p=50 is the penalty, and a=3000 is
+            // consecutive the penalty. We divide the whole formula by 10,
+            // resulting (0.01 + |r|^3 + p)^2 + a, where p=0.5 and a=0.3
+            cost = (0.01 + cost).powi(2);
+
+            // Penalize two consecutive dashes (not necessarily hyphens) extra.
+            if attempt.dash.is_some() && pred.line.dash.is_some() {
+                cost += CONSECUTIVE_DASH_COST;
+            }
+
+            // The total cost of this line and its chain of predecessors.
+            let total = pred.total + cost;
+
+            // If this attempt is better than what we had before, take it!
+            if best.as_ref().map_or(true, |best| best.total >= total) {
+                best = Some(Entry { pred: i, total, line: attempt });
+            }
+        }
+
+        table.push(best.unwrap());
+    });
+
+    // Retrace the best path.
+    let mut idx = table.len() - 1;
+    while idx != 0 {
+        table.truncate(idx + 1);
+        let entry = table.pop().unwrap();
+        lines.push(entry.line);
+        idx = entry.pred;
+    }
+
+    lines.reverse();
+    lines
+}
+
 /// Calls `f` for all possible points in the text where lines can broken.
 ///
 /// Yields for each breakpoint the text index, whether the break is mandatory
@ -56,10 +300,7 @@ pub(super) enum Breakpoint {
 /// This is an internal instead of an external iterator because it makes the
 /// code much simpler and the consumers of this function don't need the
 /// composability and flexibility of external iteration anyway.
-pub(super) fn breakpoints<'a>(
-    p: &'a Preparation<'a>,
-    mut f: impl FnMut(usize, Breakpoint),
-) {
+fn breakpoints<'a>(p: &'a Preparation<'a>, mut f: impl FnMut(usize, Breakpoint)) {
    let text = p.bidi.text;
    let hyphenate = p.hyphenate != Some(false);
    let lb = LINEBREAK_DATA.as_borrowed();
@ -236,3 +477,49 @@ fn lang_at(p: &Preparation, offset: usize) -> Option<hypher::Lang> {
    let bytes = lang.as_str().as_bytes().try_into().ok()?;
    hypher::Lang::from_iso(bytes)
 }
+
+/// Whether the hyphen should repeat at the start of the next line.
+fn should_repeat_hyphen(pred_line: &Line) -> bool {
+    // If the predecessor line does not end with a Dash::HardHyphen, we shall
+    // not place a hyphen at the start of the next line.
+    if pred_line.dash != Some(Dash::HardHyphen) {
+        return false;
+    }
+
+    // If there's a trimmed out space, we needn't repeat the hyphen. That's the
+    // case of a text like "...kebab é a -melhor- comida que existe", where the
+    // hyphens are a kind of emphasis marker.
+    if pred_line.trimmed.end != pred_line.end {
+        return false;
+    }
+
+    // The hyphen should repeat only in the languages that require that feature.
+    // For more information see the discussion at https://github.com/typst/typst/issues/3235
+    let Some(Item::Text(shape)) = pred_line.last.as_ref() else { return false };
+
+    match shape.lang {
+        // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
+        // - Czech: see https://prirucka.ujc.cas.cz/?id=164
+        // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
+        // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
+        // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
+        // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
+        Lang::LOWER_SORBIAN
+        | Lang::CZECH
+        | Lang::CROATIAN
+        | Lang::POLISH
+        | Lang::PORTUGUESE
+        | Lang::SLOVAK => true,
+        // In Spanish the hyphen is required only if the word next to hyphen is
+        // not capitalized. Otherwise, the hyphen must not be repeated.
+        //
+        // See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
+        // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
+        Lang::SPANISH => pred_line.bidi.text[pred_line.end..]
+            .chars()
+            .next()
+            .map(|c| !c.is_uppercase())
+            .unwrap_or(false),
+        _ => false,
+    }
+}
--- a/crates/typst/src/layout/inline/mod.rs
+++ b/crates/typst/src/layout/inline/mod.rs
--- a/crates/typst/src/layout/inline/prepare.rs
+++ b/crates/typst/src/layout/inline/prepare.rs
@ -0,0 +1,194 @@
+use unicode_bidi::{BidiInfo, Level as BidiLevel};
+
+use super::*;
+use crate::foundations::{Resolve, Smart};
+use crate::layout::{Abs, AlignElem, Dir, Em, FixedAlignment};
+use crate::model::Linebreaks;
+use crate::text::{Costs, Lang, TextElem};
+
+/// A paragraph representation in which children are already layouted and text
+/// is already preshaped.
+///
+/// In many cases, we can directly reuse these results when constructing a line.
+/// Only when a line break falls onto a text index that is not safe-to-break per
+/// rustybuzz, we have to reshape that portion.
+pub struct Preparation<'a> {
+    /// Bidirectional text embedding levels for the paragraph.
+    pub bidi: BidiInfo<'a>,
+    /// Text runs, spacing and layouted elements.
+    pub items: Vec<Item<'a>>,
+    /// The span mapper.
+    pub spans: SpanMapper,
+    /// Whether to hyphenate if it's the same for all children.
+    pub hyphenate: Option<bool>,
+    /// Costs for various layout decisions.
+    pub costs: Costs,
+    /// The text language if it's the same for all children.
+    pub lang: Option<Lang>,
+    /// The paragraph's resolved horizontal alignment.
+    pub align: FixedAlignment,
+    /// Whether to justify the paragraph.
+    pub justify: bool,
+    /// The paragraph's hanging indent.
+    pub hang: Abs,
+    /// Whether to add spacing between CJK and Latin characters.
+    pub cjk_latin_spacing: bool,
+    /// Whether font fallback is enabled for this paragraph.
+    pub fallback: bool,
+    /// The leading of the paragraph.
+    pub leading: Abs,
+    /// How to determine line breaks.
+    pub linebreaks: Smart<Linebreaks>,
+    /// The text size.
+    pub size: Abs,
+}
+
+impl<'a> Preparation<'a> {
+    /// Find the item that contains the given `text_offset`.
+    pub fn find(&self, text_offset: usize) -> Option<&Item<'a>> {
+        let mut cursor = 0;
+        for item in &self.items {
+            let end = cursor + item.textual_len();
+            if (cursor..end).contains(&text_offset) {
+                return Some(item);
+            }
+            cursor = end;
+        }
+        None
+    }
+
+    /// Return the items that intersect the given `text_range`.
+    ///
+    /// Returns the expanded range around the items and the items.
+    pub fn slice(&self, text_range: Range) -> (Range, &[Item<'a>]) {
+        let mut cursor = 0;
+        let mut start = 0;
+        let mut end = 0;
+        let mut expanded = text_range.clone();
+
+        for (i, item) in self.items.iter().enumerate() {
+            if cursor <= text_range.start {
+                start = i;
+                expanded.start = cursor;
+            }
+
+            let len = item.textual_len();
+            if cursor < text_range.end || cursor + len <= text_range.end {
+                end = i + 1;
+                expanded.end = cursor + len;
+            } else {
+                break;
+            }
+
+            cursor += len;
+        }
+
+        (expanded, &self.items[start..end])
+    }
+}
+
+/// Performs BiDi analysis and then prepares paragraph layout by building a
+/// representation on which we can do line breaking without layouting each and
+/// every line from scratch.
+#[typst_macros::time]
+pub fn prepare<'a>(
+    engine: &mut Engine,
+    children: &'a StyleVec,
+    text: &'a str,
+    segments: Vec<Segment<'a>>,
+    spans: SpanMapper,
+    styles: StyleChain<'a>,
+) -> SourceResult<Preparation<'a>> {
+    let bidi = BidiInfo::new(
+        text,
+        match TextElem::dir_in(styles) {
+            Dir::LTR => Some(BidiLevel::ltr()),
+            Dir::RTL => Some(BidiLevel::rtl()),
+            _ => None,
+        },
+    );
+
+    let mut cursor = 0;
+    let mut items = Vec::with_capacity(segments.len());
+
+    // Shape the text to finalize the items.
+    for segment in segments {
+        let end = cursor + segment.textual_len();
+        match segment {
+            Segment::Text(_, styles) => {
+                shape_range(&mut items, engine, &bidi, cursor..end, &spans, styles);
+            }
+            Segment::Item(item) => items.push(item),
+        }
+
+        cursor = end;
+    }
+
+    let cjk_latin_spacing = TextElem::cjk_latin_spacing_in(styles).is_auto();
+    if cjk_latin_spacing {
+        add_cjk_latin_spacing(&mut items);
+    }
+
+    Ok(Preparation {
+        bidi,
+        items,
+        spans,
+        hyphenate: children.shared_get(styles, TextElem::hyphenate_in),
+        costs: TextElem::costs_in(styles),
+        lang: children.shared_get(styles, TextElem::lang_in),
+        align: AlignElem::alignment_in(styles).resolve(styles).x,
+        justify: ParElem::justify_in(styles),
+        hang: ParElem::hanging_indent_in(styles),
+        cjk_latin_spacing,
+        fallback: TextElem::fallback_in(styles),
+        leading: ParElem::leading_in(styles),
+        linebreaks: ParElem::linebreaks_in(styles),
+        size: TextElem::size_in(styles),
+    })
+}
+
+/// Add some spacing between Han characters and western characters. See
+/// Requirements for Chinese Text Layout, Section 3.2.2 Mixed Text Composition
+/// in Horizontal Written Mode
+fn add_cjk_latin_spacing(items: &mut [Item]) {
+    let mut items = items.iter_mut().filter(|x| !matches!(x, Item::Tag(_))).peekable();
+    let mut prev: Option<&ShapedGlyph> = None;
+    while let Some(item) = items.next() {
+        let Some(text) = item.text_mut() else {
+            prev = None;
+            continue;
+        };
+
+        // Since we only call this function in [`prepare`], we can assume that
+        // the Cow is owned, and `to_mut` can be called without overhead.
+        debug_assert!(matches!(text.glyphs, std::borrow::Cow::Owned(_)));
+        let mut glyphs = text.glyphs.to_mut().iter_mut().peekable();
+
+        while let Some(glyph) = glyphs.next() {
+            let next = glyphs.peek().map(|n| n as _).or_else(|| {
+                items
+                    .peek()
+                    .and_then(|i| i.text())
+                    .and_then(|shaped| shaped.glyphs.first())
+            });
+
+            // Case 1: CJ followed by a Latin character
+            if glyph.is_cj_script() && next.is_some_and(|g| g.is_letter_or_number()) {
+                // The spacing is default to 1/4 em, and can be shrunk to 1/8 em.
+                glyph.x_advance += Em::new(0.25);
+                glyph.adjustability.shrinkability.1 += Em::new(0.125);
+                text.width += Em::new(0.25).at(text.size);
+            }
+
+            // Case 2: Latin followed by a CJ character
+            if glyph.is_cj_script() && prev.is_some_and(|g| g.is_letter_or_number()) {
+                glyph.x_advance += Em::new(0.25);
+                glyph.x_offset += Em::new(0.25);
+                glyph.adjustability.shrinkability.0 += Em::new(0.125);
+                text.width += Em::new(0.25).at(text.size);
+            }
+
+            prev = Some(glyph);
+        }
+    }
+}
--- a/crates/typst/src/layout/inline/shaping.rs
+++ b/crates/typst/src/layout/inline/shaping.rs
@ -1,6 +1,5 @@
 use std::borrow::Cow;
 use std::fmt::{self, Debug, Formatter};
-use std::ops::Range;
 use std::str::FromStr;
 use std::sync::Arc;

@ -8,11 +7,12 @@ use az::SaturatingAs;
 use ecow::EcoString;
 use rustybuzz::{ShapePlan, UnicodeBuffer};
 use ttf_parser::Tag;
+use unicode_bidi::{BidiInfo, Level as BidiLevel};
 use unicode_script::{Script, UnicodeScript};

-use super::SpanMapper;
+use super::{Item, Range, SpanMapper};
 use crate::engine::Engine;
-use crate::foundations::StyleChain;
+use crate::foundations::{Smart, StyleChain};
 use crate::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size};
 use crate::syntax::Span;
 use crate::text::{
@ -27,7 +27,7 @@ use crate::World;
 /// This type contains owned or borrowed shaped text runs, which can be
 /// measured, used to reshape substrings more quickly and converted into a
 /// frame.
-pub(super) struct ShapedText<'a> {
+pub struct ShapedText<'a> {
    /// The start of the text in the full paragraph.
    pub base: usize,
    /// The text that was shaped.
@ -52,7 +52,7 @@ pub(super) struct ShapedText<'a> {

 /// A single glyph resulting from shaping.
 #[derive(Debug, Clone)]
-pub(super) struct ShapedGlyph {
+pub struct ShapedGlyph {
    /// The font the glyph is contained in.
    pub font: Font,
    /// The glyph's index in the font.
@ -69,12 +69,11 @@ pub(super) struct ShapedGlyph {
    /// is a sequence of one or multiple glyphs that cannot be separated and
    /// must always be treated as a union.
    ///
-    /// The range values of the glyphs in a [`ShapedText`] should not
-    /// overlap with each other, and they should be monotonically
-    /// increasing (for left-to-right or top-to-bottom text) or
-    /// monotonically decreasing (for right-to-left or bottom-to-top
-    /// text).
-    pub range: Range<usize>,
+    /// The range values of the glyphs in a [`ShapedText`] should not overlap
+    /// with each other, and they should be monotonically increasing (for
+    /// left-to-right or top-to-bottom text) or monotonically decreasing (for
+    /// right-to-left or bottom-to-top text).
+    pub range: Range,
    /// Whether splitting the shaping result before this glyph would yield the
    /// same results as shaping the parts to both sides of `text_index`
    /// separately.
@ -90,7 +89,7 @@ pub(super) struct ShapedGlyph {
 }

 #[derive(Debug, Clone, Default)]
-pub(super) struct Adjustability {
+pub struct Adjustability {
    /// The left and right strechability
    pub stretchability: (Em, Em),
    /// The left and right shrinkability
@ -414,7 +413,7 @@ impl<'a> ShapedText<'a> {
        &'a self,
        engine: &Engine,
        spans: &SpanMapper,
-        text_range: Range<usize>,
+        text_range: Range,
    ) -> ShapedText<'a> {
        let text = &self.text[text_range.start - self.base..text_range.end - self.base];
        if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) {
@ -508,7 +507,7 @@ impl<'a> ShapedText<'a> {

    /// Find the subslice of glyphs that represent the given text range if both
    /// sides are safe to break.
-    fn slice_safe_to_break(&self, text_range: Range<usize>) -> Option<&[ShapedGlyph]> {
+    fn slice_safe_to_break(&self, text_range: Range) -> Option<&[ShapedGlyph]> {
        let Range { mut start, mut end } = text_range;
        if !self.dir.is_positive() {
            std::mem::swap(&mut start, &mut end);
@ -552,8 +551,9 @@ impl<'a> ShapedText<'a> {
                // text_index:    ^
                // glyphs:  0     .  1
                //
-                // We will get found = Err(1), because '\n' does not have a glyph.
-                // But it's safe to break here. Thus the following condition:
+                // We will get found = Err(1), because '\n' does not have a
+                // glyph. But it's safe to break here. Thus the following
+                // condition:
                // - glyphs[0].end == text_index == 3
                // - text[3] == '\n'
                return (idx > 0
@ -589,23 +589,82 @@ impl Debug for ShapedText<'_> {
    }
 }

-/// Holds shaping results and metadata common to all shaped segments.
-struct ShapingContext<'a, 'v> {
-    engine: &'a Engine<'v>,
-    spans: &'a SpanMapper,
-    glyphs: Vec<ShapedGlyph>,
-    used: Vec<Font>,
+/// Group a range of text by BiDi level and script, shape the runs and generate
+/// items for them.
+pub fn shape_range<'a>(
+    items: &mut Vec<Item<'a>>,
+    engine: &Engine,
+    bidi: &BidiInfo<'a>,
+    range: Range,
+    spans: &SpanMapper,
    styles: StyleChain<'a>,
-    size: Abs,
-    variant: FontVariant,
-    features: Vec<rustybuzz::Feature>,
-    fallback: bool,
-    dir: Dir,
+) {
+    let script = TextElem::script_in(styles);
+    let lang = TextElem::lang_in(styles);
+    let region = TextElem::region_in(styles);
+    let mut process = |range: Range, level: BidiLevel| {
+        let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
+        let shaped = shape(
+            engine,
+            range.start,
+            &bidi.text[range],
+            spans,
+            styles,
+            dir,
+            lang,
+            region,
+        );
+        items.push(Item::Text(shaped));
+    };
+
+    let mut prev_level = BidiLevel::ltr();
+    let mut prev_script = Script::Unknown;
+    let mut cursor = range.start;
+
+    // Group by embedding level and script.  If the text's script is explicitly
+    // set (rather than inferred from the glyphs), we keep the script at an
+    // unchanging `Script::Unknown` so that only level changes cause breaks.
+    for i in range.clone() {
+        if !bidi.text.is_char_boundary(i) {
+            continue;
+        }
+
+        let level = bidi.levels[i];
+        let curr_script = match script {
+            Smart::Auto => {
+                bidi.text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
+            }
+            Smart::Custom(_) => Script::Unknown,
+        };
+
+        if level != prev_level || !is_compatible(curr_script, prev_script) {
+            if cursor < i {
+                process(cursor..i, prev_level);
+            }
+            cursor = i;
+            prev_level = level;
+            prev_script = curr_script;
+        } else if is_generic_script(prev_script) {
+            prev_script = curr_script;
+        }
+    }
+
+    process(cursor..range.end, prev_level);
+}
+
+/// Whether this is not a specific script.
+fn is_generic_script(script: Script) -> bool {
+    matches!(script, Script::Unknown | Script::Common | Script::Inherited)
+}
+
+/// Whether these script can be part of the same shape run.
+fn is_compatible(a: Script, b: Script) -> bool {
+    is_generic_script(a) || is_generic_script(b) || a == b
 }

 /// Shape text into [`ShapedText`].
 #[allow(clippy::too_many_arguments)]
-pub(super) fn shape<'a>(
+fn shape<'a>(
    engine: &Engine,
    base: usize,
    text: &'a str,
@ -655,6 +714,20 @@ pub(super) fn shape<'a>(
    }
 }

+/// Holds shaping results and metadata common to all shaped segments.
+struct ShapingContext<'a, 'v> {
+    engine: &'a Engine<'v>,
+    spans: &'a SpanMapper,
+    glyphs: Vec<ShapedGlyph>,
+    used: Vec<Font>,
+    styles: StyleChain<'a>,
+    size: Abs,
+    variant: FontVariant,
+    features: Vec<rustybuzz::Feature>,
+    fallback: bool,
+    dir: Dir,
+}
+
 /// Shape text with font fallback using the `families` iterator.
 fn shape_segment<'a>(
    ctx: &mut ShapingContext,
@ -712,8 +785,7 @@ fn shape_segment<'a>(
    buffer.guess_segment_properties();

    // Prepare the shape plan. This plan depends on direction, script, language,
-    // and features, but is independent from the text and can thus be
-    // memoized.
+    // and features, but is independent from the text and can thus be memoized.
    let plan = create_shape_plan(
        &font,
        buffer.direction(),
@ -908,7 +980,7 @@ fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option<
            continue;
        }

-        // Now we apply consecutive punctuation adjustment, specified in Chinese Layout
+        // Now we apply consecutive punctuation adjustment, specified in Chinese Layout.
        // Requirements, section 3.1.6.1 Punctuation Adjustment Space, and Japanese Layout
        // Requirements, section 3.1 Line Composition Rules for Punctuation Marks
        let Some(next) = glyphs.peek_mut() else { continue };
@ -945,7 +1017,7 @@ fn language(styles: StyleChain) -> rustybuzz::Language {

 /// Returns true if all glyphs in `glyphs` have ranges within the range `range`.
 #[cfg(debug_assertions)]
-fn assert_all_glyphs_in_range(glyphs: &[ShapedGlyph], text: &str, range: Range<usize>) {
+fn assert_all_glyphs_in_range(glyphs: &[ShapedGlyph], text: &str, range: Range) {
    if glyphs
        .iter()
        .any(|g| g.range.start < range.start || g.range.end > range.end)
@ -954,9 +1026,11 @@ fn assert_all_glyphs_in_range(glyphs: &[ShapedGlyph], text: &str, range: Range<u
    }
 }

-/// Asserts that the ranges of `glyphs` is in the proper order according to `dir`.
+/// Asserts that the ranges of `glyphs` is in the proper order according to
+/// `dir`.
 ///
-/// This asserts instead of returning a bool in order to provide a more informative message when the invariant is violated.
+/// This asserts instead of returning a bool in order to provide a more
+/// informative message when the invariant is violated.
 #[cfg(debug_assertions)]
 fn assert_glyph_ranges_in_order(glyphs: &[ShapedGlyph], dir: Dir) {
    if glyphs.is_empty() {
@ -981,15 +1055,15 @@ fn assert_glyph_ranges_in_order(glyphs: &[ShapedGlyph], dir: Dir) {
 }

 // The CJK punctuation that can appear at the beginning or end of a line.
-pub(super) const BEGIN_PUNCT_PAT: &[char] =
+pub const BEGIN_PUNCT_PAT: &[char] =
    &['“', '‘', '《', '〈', '（', '『', '「', '【', '〖', '〔', '［', '｛'];
-pub(super) const END_PUNCT_PAT: &[char] = &[
+pub const END_PUNCT_PAT: &[char] = &[
    '”', '’', '，', '．', '。', '、', '：', '；', '》', '〉', '）', '』', '」', '】',
    '〗', '〕', '］', '｝', '？', '！',
 ];

 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub(super) enum CjkPunctStyle {
+pub enum CjkPunctStyle {
    /// Standard GB/T 15834-2011, used mostly in mainland China.
    Gb,
    /// Standard by Taiwan Ministry of Education, used in Taiwan and Hong Kong.
@ -998,7 +1072,7 @@ pub(super) enum CjkPunctStyle {
    Jis,
 }

-pub(super) fn cjk_punct_style(lang: Lang, region: Option<Region>) -> CjkPunctStyle {
+pub fn cjk_punct_style(lang: Lang, region: Option<Region>) -> CjkPunctStyle {
    match (lang, region.as_ref().map(Region::as_str)) {
        (Lang::CHINESE, Some("TW" | "HK")) => CjkPunctStyle::Cns,
        (Lang::JAPANESE, _) => CjkPunctStyle::Jis,
@ -1013,7 +1087,7 @@ fn is_space(c: char) -> bool {
 }

 /// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK).
-pub(super) fn is_of_cj_script(c: char) -> bool {
+pub fn is_of_cj_script(c: char) -> bool {
    is_cj_script(c, c.script())
 }

@ -1047,8 +1121,9 @@ fn is_cjk_left_aligned_punctuation(
    }

    if matches!(style, Gb) && matches!(c, '？' | '！') {
-        // In GB style, exclamations and question marks are also left aligned and can be adjusted.
-        // Note that they are not adjustable in other styles.
+        // In GB style, exclamations and question marks are also left aligned
+        // and can be adjusted. Note that they are not adjustable in other
+        // styles.
        return true;
    }