diff --git a/Cargo.lock b/Cargo.lock index 3e99ea814..14dd36f11 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2604,8 +2604,7 @@ dependencies = [ [[package]] name = "typst-assets" version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f13f85360328da54847dd7fefaf272dfa5b6d1fdeb53f32938924c39bf5b2c6c" +source = "git+https://github.com/typst/typst-assets?rev=4ee794c#4ee794cf8fb98eb67194e757c9820ab8562d853b" [[package]] name = "typst-cli" @@ -2656,7 +2655,7 @@ dependencies = [ [[package]] name = "typst-dev-assets" version = "0.11.0" -source = "git+https://github.com/typst/typst-dev-assets?rev=48a924d9de82b631bc775124a69384c8d860db04#48a924d9de82b631bc775124a69384c8d860db04" +source = "git+https://github.com/typst/typst-dev-assets?rev=48a924d#48a924d9de82b631bc775124a69384c8d860db04" [[package]] name = "typst-docs" diff --git a/Cargo.toml b/Cargo.toml index ee50b6667..1b5bf0f4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,8 +26,8 @@ typst-svg = { path = "crates/typst-svg", version = "0.11.0" } typst-syntax = { path = "crates/typst-syntax", version = "0.11.0" } typst-timing = { path = "crates/typst-timing", version = "0.11.0" } typst-utils = { path = "crates/typst-utils", version = "0.11.0" } -typst-assets = "0.11.0" -typst-dev-assets = { git = "https://github.com/typst/typst-dev-assets", rev = "48a924d9de82b631bc775124a69384c8d860db04" } +typst-assets = { git = "https://github.com/typst/typst-assets", rev = "4ee794c" } +typst-dev-assets = { git = "https://github.com/typst/typst-dev-assets", rev = "48a924d" } az = "1.2" base64 = "0.22" bitflags = { version = "2", features = ["serde"] } diff --git a/crates/typst/src/introspection/mod.rs b/crates/typst/src/introspection/mod.rs index c9dba244a..6c982afb4 100644 --- a/crates/typst/src/introspection/mod.rs +++ b/crates/typst/src/introspection/mod.rs @@ -116,6 +116,6 @@ impl Tag { impl Debug for Tag { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "Tag({:?})", self.elem) + write!(f, "Tag({:?})", self.elem.elem().name()) } } diff --git a/crates/typst/src/layout/inline/collect.rs b/crates/typst/src/layout/inline/collect.rs index 79d0d59f6..f1607460b 100644 --- a/crates/typst/src/layout/inline/collect.rs +++ b/crates/typst/src/layout/inline/collect.rs @@ -79,7 +79,7 @@ impl<'a> Item<'a> { } /// The natural layouted width of the item. - pub fn width(&self) -> Abs { + pub fn natural_width(&self) -> Abs { match self { Self::Text(shaped) => shaped.width, Self::Absolute(v, _) => *v, diff --git a/crates/typst/src/layout/inline/line.rs b/crates/typst/src/layout/inline/line.rs index 232a1c6b7..12162ab16 100644 --- a/crates/typst/src/layout/inline/line.rs +++ b/crates/typst/src/layout/inline/line.rs @@ -1,11 +1,18 @@ -use unicode_bidi::BidiInfo; +use std::fmt::{self, Debug, Formatter}; +use std::ops::{Deref, DerefMut}; use super::*; use crate::engine::Engine; -use crate::layout::{Abs, Em, Fr, Frame, FrameItem, Point}; +use crate::layout::{Abs, Dir, Em, Fr, Frame, FrameItem, Point}; use crate::text::{Lang, TextElem}; use crate::utils::Numeric; +const SHY: char = '\u{ad}'; +const HYPHEN: char = '-'; +const EN_DASH: char = '–'; +const EM_DASH: char = '—'; +const LINE_SEPARATOR: char = '\u{2028}'; // We use LS to distinguish justified breaks. + /// A layouted line, consisting of a sequence of layouted paragraph items that /// are mostly borrowed from the preparation phase. This type enables you to /// measure the size of a line in a range before committing to building the @@ -16,20 +23,9 @@ use crate::utils::Numeric; /// line, respectively. But even those can partially reuse previous results when /// the break index is safe-to-break per rustybuzz. pub struct Line<'a> { - /// Bidi information about the paragraph. - pub bidi: &'a BidiInfo<'a>, - /// The trimmed range the line spans in the paragraph. - pub trimmed: Range, - /// The untrimmed end where the line ends. - pub end: usize, - /// A reshaped text item if the line sliced up a text item at the start. - pub first: Option>, - /// Inner items which don't need to be reprocessed. - pub inner: &'a [Item<'a>], - /// A reshaped text item if the line sliced up a text item at the end. If - /// there is only one text item, this takes precedence over `first`. - pub last: Option>, - /// The width of the line. + /// The items the line is made of. + pub items: Items<'a>, + /// The exact natural width of the line. pub width: Abs, /// Whether the line should be justified. pub justify: bool, @@ -39,45 +35,27 @@ pub struct Line<'a> { } impl<'a> Line<'a> { - /// Iterate over the line's items. - pub fn items(&self) -> impl Iterator> { - self.first.iter().chain(self.inner).chain(&self.last) - } - - /// Return items that intersect the given `text_range`. - pub fn slice(&self, text_range: Range) -> impl Iterator> { - let mut cursor = self.trimmed.start; - let mut start = 0; - let mut end = 0; - - for (i, item) in self.items().enumerate() { - if cursor <= text_range.start { - start = i; - } - - let len = item.textual_len(); - if cursor < text_range.end || cursor + len <= text_range.end { - end = i + 1; - } else { - break; - } - - cursor += len; + /// Create an empty line. + pub fn empty() -> Self { + Self { + items: Items::new(), + width: Abs::zero(), + justify: false, + dash: None, } - - self.items().skip(start).take(end - start) } /// How many glyphs are in the text where we can insert additional /// space when encountering underfull lines. pub fn justifiables(&self) -> usize { let mut count = 0; - for shaped in self.items().filter_map(Item::text) { + for shaped in self.items.iter().filter_map(Item::text) { count += shaped.justifiables(); } + // CJK character at line end should not be adjusted. if self - .items() + .items .last() .and_then(Item::text) .map(|s| s.cjk_justifiable_at_last()) @@ -89,19 +67,27 @@ impl<'a> Line<'a> { count } - /// How much can the line stretch + /// How much the line can stretch. pub fn stretchability(&self) -> Abs { - self.items().filter_map(Item::text).map(|s| s.stretchability()).sum() + self.items + .iter() + .filter_map(Item::text) + .map(|s| s.stretchability()) + .sum() } - /// How much can the line shrink + /// How much the line can shrink. pub fn shrinkability(&self) -> Abs { - self.items().filter_map(Item::text).map(|s| s.shrinkability()).sum() + self.items + .iter() + .filter_map(Item::text) + .map(|s| s.shrinkability()) + .sum() } /// Whether the line has items with negative width. pub fn has_negative_width_items(&self) -> bool { - self.items().any(|item| match item { + self.items.iter().any(|item| match item { Item::Absolute(amount, _) => *amount < Abs::zero(), Item::Frame(frame, _) => frame.width() < Abs::zero(), _ => false, @@ -110,7 +96,8 @@ impl<'a> Line<'a> { /// The sum of fractions in the line. pub fn fr(&self) -> Fr { - self.items() + self.items + .iter() .filter_map(|item| match item { Item::Fractional(fr, _) => Some(*fr), _ => None, @@ -122,234 +109,299 @@ impl<'a> Line<'a> { /// A dash at the end of a line. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Dash { - /// A hyphen added to break a word. - SoftHyphen, - /// Regular hyphen, present in a compound word, e.g. beija-flor. - HardHyphen, - /// An em dash. - Long, - /// An en dash. - Short, + /// A soft hyphen added to break a word. + Soft, + /// A regular hyphen, present in a compound word, e.g. beija-flor. + Hard, + /// Another kind of dash. Only relevant for cost computation. + Other, } /// Create a line which spans the given range. pub fn line<'a>( engine: &Engine, p: &'a Preparation, - mut range: Range, + range: Range, breakpoint: Breakpoint, pred: Option<&Line>, ) -> Line<'a> { - let end = range.end; - let mut justify = - p.justify && end < p.bidi.text.len() && breakpoint != Breakpoint::Mandatory; + // The line's full text. + let full = &p.text[range.clone()]; + // Whether the line is justified. + let justify = full.ends_with(LINE_SEPARATOR) + || (p.justify && breakpoint != Breakpoint::Mandatory); + + // Process dashes. + let dash = if breakpoint == Breakpoint::Hyphen || full.ends_with(SHY) { + Some(Dash::Soft) + } else if full.ends_with(HYPHEN) { + Some(Dash::Hard) + } else if full.ends_with([EN_DASH, EM_DASH]) { + Some(Dash::Other) + } else { + None + }; + + // Trim the line at the end, if necessary for this breakpoint. + let trim = range.start + breakpoint.trim(full).len(); + + // Collect the items for the line. + let mut items = collect_items(engine, p, range, trim); + + // Add a hyphen at the line start, if a previous dash should be repeated. + if pred.map_or(false, |pred| should_repeat_hyphen(pred, full)) { + if let Some(shaped) = items.first_text_mut() { + shaped.prepend_hyphen(engine, p.fallback); + } + } + + // Add a hyphen at the line end, if we ended on a soft hyphen. + if dash == Some(Dash::Soft) { + if let Some(shaped) = items.last_text_mut() { + shaped.push_hyphen(engine, p.fallback); + } + } + + // Deal with CJ characters at line boundaries. + adjust_cj_at_line_boundaries(p, full, &mut items); + + // Compute the line's width. + let width = items.iter().map(Item::natural_width).sum(); + + Line { items, width, justify, dash } +} + +/// Collects / reshapes all items for the line with the given `range`. +/// +/// The `trim` defines an end position to which text items are trimmed. For +/// example, the `range` may span "hello\n", but the `trim` specifies that the +/// linebreak is trimmed. +/// +/// We do not factor the `trim` diredctly into the `range` because we still want +/// to keep non-text items after the trim (e.g. tags). +fn collect_items<'a>( + engine: &Engine, + p: &'a Preparation, + range: Range, + trim: usize, +) -> Items<'a> { + let mut items = Items::new(); + let mut fallback = None; + + // Collect the items for each consecutively ordered run. + reorder(p, range.clone(), |subrange, rtl| { + let from = items.len(); + collect_range(engine, p, subrange, trim, &mut items, &mut fallback); + if rtl { + items.reorder(from); + } + }); + + // Trim weak spacing at the start of the line. + let prefix = items + .iter() + .take_while(|item| matches!(item, Item::Absolute(_, true))) + .count(); + if prefix > 0 { + items.drain(..prefix); + } + + // Trim weak spacing at the end of the line. + while matches!(items.last(), Some(Item::Absolute(_, true))) { + items.pop(); + } + + // Add fallback text to expand the line height, if necessary. + if !items.iter().any(|item| matches!(item, Item::Text(_))) { + if let Some(fallback) = fallback { + items.push(fallback); + } + } + + items +} + +/// Calls `f` for the the BiDi-reordered ranges of a line. +fn reorder(p: &Preparation, range: Range, mut f: F) +where + F: FnMut(Range, bool), +{ + // If there is nothing bidirectional going on, skip reordering. + let Some(bidi) = &p.bidi else { + f(range, p.dir == Dir::RTL); + return; + }; + + // The bidi crate panics for empty lines. if range.is_empty() { - return Line { - bidi: &p.bidi, - end, - trimmed: range, - first: None, - inner: &[], - last: None, - width: Abs::zero(), - justify, - dash: None, + f(range, p.dir == Dir::RTL); + return; + } + + // Find the paragraph that contains the line. + let para = bidi + .paragraphs + .iter() + .find(|para| para.range.contains(&range.start)) + .unwrap(); + + // Compute the reordered ranges in visual order (left to right). + let (levels, runs) = bidi.visual_runs(para, range.clone()); + + // Call `f` for each run. + for run in runs { + let rtl = levels[run.start].is_rtl(); + f(run, rtl) + } +} + +/// Collects / reshapes all items for the given `subrange` with continous +/// direction. +fn collect_range<'a>( + engine: &Engine, + p: &'a Preparation, + range: Range, + trim: usize, + items: &mut Items<'a>, + fallback: &mut Option>, +) { + for (subrange, item) in p.slice(range.clone()) { + // All non-text items are just kept, they can't be split. + let Item::Text(shaped) = item else { + items.push(item); + continue; }; - } - let prepend_hyphen = pred.map_or(false, should_insert_hyphen); + // The intersection range of the item, the subrange, and the line's + // trimming. + let sliced = + range.start.max(subrange.start)..range.end.min(subrange.end).min(trim); - // Slice out the relevant items. - let (mut expanded, mut inner) = p.slice(range.clone()); - let mut width = Abs::zero(); + // Whether the item is split by the line. + let split = subrange.start < sliced.start || sliced.end < subrange.end; - // Weak space (`Absolute(_, true)`) is removed at the end of the line - while let Some((Item::Absolute(_, true), before)) = inner.split_last() { - inner = before; - range.end -= 1; - expanded.end -= 1; - } - // Weak space (`Absolute(_, true)`) is removed at the beginning of the line - while let Some((Item::Absolute(_, true), after)) = inner.split_first() { - inner = after; - range.start += 1; - expanded.end += 1; - } - - // Reshape the last item if it's split in half or hyphenated. - let mut last = None; - let mut dash = None; - if let Some((Item::Text(shaped), before)) = inner.split_last() { - // Compute the range we want to shape, trimming whitespace at the - // end of the line. - let base = expanded.end - shaped.text.len(); - let start = range.start.max(base); - let text = &p.bidi.text[start..range.end]; - // U+200B ZERO WIDTH SPACE is used to provide a line break opportunity, - // we want to trim it too. - let trimmed = text.trim_end().trim_end_matches('\u{200B}'); - range.end = start + trimmed.len(); - - // Deal with hyphens, dashes and justification. - let shy = trimmed.ends_with('\u{ad}'); - let hyphen = breakpoint == Breakpoint::Hyphen; - dash = if hyphen || shy { - Some(Dash::SoftHyphen) - } else if trimmed.ends_with('-') { - Some(Dash::HardHyphen) - } else if trimmed.ends_with('–') { - Some(Dash::Short) - } else if trimmed.ends_with('—') { - Some(Dash::Long) + if sliced.is_empty() { + // When there is no text, still keep this as a fallback item, which + // we can use to force a non-zero line-height when the line doesn't + // contain any other text. + *fallback = Some(ItemEntry::from(Item::Text(shaped.empty()))); + } else if split { + // When the item is split in half, reshape it. + let reshaped = shaped.reshape(engine, sliced); + items.push(Item::Text(reshaped)); } else { - None - }; - justify |= text.ends_with('\u{2028}'); + // When the item is fully contained, just keep it. + items.push(item); + } + } +} - // Deal with CJK punctuation at line ends. - let gb_style = cjk_punct_style(shaped.lang, shaped.region); - let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT) - || (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script)); +/// Add spacing around punctuation marks for CJ glyphs at line boundaries. +/// +/// See Requirements for Chinese Text Layout, Section 3.1.6.3 Compression of +/// punctuation marks at line start or line end. +fn adjust_cj_at_line_boundaries(p: &Preparation, text: &str, items: &mut Items) { + if text.starts_with(BEGIN_PUNCT_PAT) + || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script)) + { + adjust_cj_at_line_start(p, items); + } - // Usually, we don't want to shape an empty string because: - // - We don't want the height of trimmed whitespace in a different font - // to be considered for the line height. - // - Even if it's in the same font, its unnecessary. + if text.ends_with(END_PUNCT_PAT) + || (p.cjk_latin_spacing && text.ends_with(is_of_cj_script)) + { + adjust_cj_at_line_end(p, items); + } +} + +/// Add spacing around punctuation marks for CJ glyphs at the line start. +fn adjust_cj_at_line_start(p: &Preparation, items: &mut Items) { + let Some(shaped) = items.first_text_mut() else { return }; + let Some(glyph) = shaped.glyphs.first() else { return }; + + if glyph.is_cjk_right_aligned_punctuation() { + // If the first glyph is a CJK punctuation, we want to + // shrink it. + let glyph = shaped.glyphs.to_mut().first_mut().unwrap(); + let shrink = glyph.shrinkability().0; + glyph.shrink_left(shrink); + shaped.width -= shrink.at(shaped.size); + } else if p.cjk_latin_spacing && glyph.is_cj_script() && glyph.x_offset > Em::zero() { + // If the first glyph is a CJK character adjusted by + // [`add_cjk_latin_spacing`], restore the original width. + let glyph = shaped.glyphs.to_mut().first_mut().unwrap(); + let shrink = glyph.x_offset; + glyph.x_advance -= shrink; + glyph.x_offset = Em::zero(); + glyph.adjustability.shrinkability.0 = Em::zero(); + shaped.width -= shrink.at(shaped.size); + } +} + +/// Add spacing around punctuation marks for CJ glyphs at the line end. +fn adjust_cj_at_line_end(p: &Preparation, items: &mut Items) { + let Some(shaped) = items.last_text_mut() else { return }; + let Some(glyph) = shaped.glyphs.last() else { return }; + + // Deal with CJK punctuation at line ends. + let style = cjk_punct_style(shaped.lang, shaped.region); + + if glyph.is_cjk_left_aligned_punctuation(style) { + // If the last glyph is a CJK punctuation, we want to + // shrink it. + let shrink = glyph.shrinkability().1; + let punct = shaped.glyphs.to_mut().last_mut().unwrap(); + punct.shrink_right(shrink); + shaped.width -= shrink.at(shaped.size); + } else if p.cjk_latin_spacing + && glyph.is_cj_script() + && (glyph.x_advance - glyph.x_offset) > Em::one() + { + // If the last glyph is a CJK character adjusted by + // [`add_cjk_latin_spacing`], restore the original width. + let shrink = glyph.x_advance - glyph.x_offset - Em::one(); + let glyph = shaped.glyphs.to_mut().last_mut().unwrap(); + glyph.x_advance -= shrink; + glyph.adjustability.shrinkability.1 = Em::zero(); + shaped.width -= shrink.at(shaped.size); + } +} + +/// Whether a hyphen should be inserted at the start of the next line. +fn should_repeat_hyphen(pred_line: &Line, text: &str) -> bool { + // If the predecessor line does not end with a `Dash::Hard`, we shall + // not place a hyphen at the start of the next line. + if pred_line.dash != Some(Dash::Hard) { + return false; + } + + // The hyphen should repeat only in the languages that require that feature. + // For more information see the discussion at https://github.com/typst/typst/issues/3235 + let Some(Item::Text(shaped)) = pred_line.items.last() else { return false }; + + match shaped.lang { + // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3 + // - Czech: see https://prirucka.ujc.cas.cz/?id=164 + // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/ + // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni + // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX) + // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/ + Lang::LOWER_SORBIAN + | Lang::CZECH + | Lang::CROATIAN + | Lang::POLISH + | Lang::PORTUGUESE + | Lang::SLOVAK => true, + + // In Spanish the hyphen is required only if the word next to hyphen is + // not capitalized. Otherwise, the hyphen must not be repeated. // - // There is one exception though. When the whole line is empty, we need - // the shaped empty string to make the line the appropriate height. That - // is the case exactly if the string is empty and there are no other - // items in the line. - if hyphen - || start + shaped.text.len() > range.end - || maybe_adjust_last_glyph - || prepend_hyphen - { - if hyphen || start < range.end || before.is_empty() { - let mut reshaped = shaped.reshape(engine, &p.spans, start..range.end); - if hyphen || shy { - reshaped.push_hyphen(engine, p.fallback); - } + // See § 4.1.1.1.2.e on the "Ortografía de la lengua española" + // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea + Lang::SPANISH => text.chars().next().map_or(false, |c| !c.is_uppercase()), - if let Some(last_glyph) = reshaped.glyphs.last() { - if last_glyph.is_cjk_left_aligned_punctuation(gb_style) { - // If the last glyph is a CJK punctuation, we want to - // shrink it. See Requirements for Chinese Text Layout, - // Section 3.1.6.3 Compression of punctuation marks at - // line start or line end - let shrink_amount = last_glyph.shrinkability().1; - let punct = reshaped.glyphs.to_mut().last_mut().unwrap(); - punct.shrink_right(shrink_amount); - reshaped.width -= shrink_amount.at(reshaped.size); - } else if p.cjk_latin_spacing - && last_glyph.is_cj_script() - && (last_glyph.x_advance - last_glyph.x_offset) > Em::one() - { - // If the last glyph is a CJK character adjusted by - // [`add_cjk_latin_spacing`], restore the original - // width. - let shrink_amount = - last_glyph.x_advance - last_glyph.x_offset - Em::one(); - let glyph = reshaped.glyphs.to_mut().last_mut().unwrap(); - glyph.x_advance -= shrink_amount; - glyph.adjustability.shrinkability.1 = Em::zero(); - reshaped.width -= shrink_amount.at(reshaped.size); - } - } - - width += reshaped.width; - last = Some(Item::Text(reshaped)); - } - - inner = before; - } - } - - // Deal with CJ characters at line starts. - let text = &p.bidi.text[range.start..end]; - let maybe_adjust_first_glyph = text.starts_with(BEGIN_PUNCT_PAT) - || (p.cjk_latin_spacing && text.starts_with(is_of_cj_script)); - - // Reshape the start item if it's split in half. - let mut first = None; - if let Some((Item::Text(shaped), after)) = inner.split_first() { - // Compute the range we want to shape. - let base = expanded.start; - let end = range.end.min(base + shaped.text.len()); - - // Reshape if necessary. - if range.start + shaped.text.len() > end - || maybe_adjust_first_glyph - || prepend_hyphen - { - // If the range is empty, we don't want to push an empty text item. - if range.start < end { - let reshaped = shaped.reshape(engine, &p.spans, range.start..end); - width += reshaped.width; - first = Some(Item::Text(reshaped)); - } - - inner = after; - } - } - - if prepend_hyphen { - let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut); - if let Some(reshaped) = reshaped { - let width_before = reshaped.width; - reshaped.prepend_hyphen(engine, p.fallback); - width += reshaped.width - width_before; - } - } - - if maybe_adjust_first_glyph { - let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut); - if let Some(reshaped) = reshaped { - if let Some(first_glyph) = reshaped.glyphs.first() { - if first_glyph.is_cjk_right_aligned_punctuation() { - // If the first glyph is a CJK punctuation, we want to - // shrink it. - let shrink_amount = first_glyph.shrinkability().0; - let glyph = reshaped.glyphs.to_mut().first_mut().unwrap(); - glyph.shrink_left(shrink_amount); - let amount_abs = shrink_amount.at(reshaped.size); - reshaped.width -= amount_abs; - width -= amount_abs; - } else if p.cjk_latin_spacing - && first_glyph.is_cj_script() - && first_glyph.x_offset > Em::zero() - { - // If the first glyph is a CJK character adjusted by - // [`add_cjk_latin_spacing`], restore the original width. - let shrink_amount = first_glyph.x_offset; - let glyph = reshaped.glyphs.to_mut().first_mut().unwrap(); - glyph.x_advance -= shrink_amount; - glyph.x_offset = Em::zero(); - glyph.adjustability.shrinkability.0 = Em::zero(); - let amount_abs = shrink_amount.at(reshaped.size); - reshaped.width -= amount_abs; - width -= amount_abs; - } - } - } - } - - // Measure the inner items. - for item in inner { - width += item.width(); - } - - Line { - bidi: &p.bidi, - trimmed: range, - end, - first, - inner, - last, - width, - justify, - dash, + _ => false, } } @@ -365,18 +417,19 @@ pub fn commit( let mut remaining = width - line.width - p.hang; let mut offset = Abs::zero(); - // Reorder the line from logical to visual order. - let (reordered, starts_rtl) = reorder(line); - if !starts_rtl { + // We always build the line from left to right. In an LTR paragraph, we must + // thus add the hanging indent to the offset. When the paragraph is RTL, the + // hanging indent arises naturally due to the line width. + if p.dir == Dir::LTR { offset += p.hang; } // Handle hanging punctuation to the left. - if let Some(Item::Text(text)) = reordered.first() { + if let Some(Item::Text(text)) = line.items.first() { if let Some(glyph) = text.glyphs.first() { if !text.dir.is_positive() && TextElem::overhang_in(text.styles) - && (reordered.len() > 1 || text.glyphs.len() > 1) + && (line.items.len() > 1 || text.glyphs.len() > 1) { let amount = overhang(glyph.c) * glyph.x_advance.at(text.size); offset -= amount; @@ -386,11 +439,11 @@ pub fn commit( } // Handle hanging punctuation to the right. - if let Some(Item::Text(text)) = reordered.last() { + if let Some(Item::Text(text)) = line.items.last() { if let Some(glyph) = text.glyphs.last() { if text.dir.is_positive() && TextElem::overhang_in(text.styles) - && (reordered.len() > 1 || text.glyphs.len() > 1) + && (line.items.len() > 1 || text.glyphs.len() > 1) { let amount = overhang(glyph.c) * glyph.x_advance.at(text.size); remaining += amount; @@ -408,16 +461,16 @@ pub fn commit( let mut extra_justification = Abs::zero(); let shrinkability = line.shrinkability(); - let stretch = line.stretchability(); + let stretchability = line.stretchability(); if remaining < Abs::zero() && shrinkability > Abs::zero() && shrink { // Attempt to reduce the length of the line, using shrinkability. justification_ratio = (remaining / shrinkability).max(-1.0); remaining = (remaining + shrinkability).min(Abs::zero()); } else if line.justify && fr.is_zero() { // Attempt to increase the length of the line, using stretchability. - if stretch > Abs::zero() { - justification_ratio = (remaining / stretch).min(1.0); - remaining = (remaining - stretch).max(Abs::zero()); + if stretchability > Abs::zero() { + justification_ratio = (remaining / stretchability).min(1.0); + remaining = (remaining - stretchability).max(Abs::zero()); } let justifiables = line.justifiables(); @@ -433,7 +486,7 @@ pub fn commit( // Build the frames and determine the height and baseline. let mut frames = vec![]; - for item in reordered { + for item in line.items.iter() { let mut push = |offset: &mut Abs, frame: Frame| { let width = frame.width(); top.set_max(frame.baseline()); @@ -460,8 +513,12 @@ pub fn commit( } } Item::Text(shaped) => { - let mut frame = - shaped.build(engine, justification_ratio, extra_justification); + let mut frame = shaped.build( + engine, + &p.spans, + justification_ratio, + extra_justification, + ); frame.post_process(shaped.styles); push(&mut offset, frame); } @@ -499,94 +556,6 @@ pub fn commit( Ok(output) } -/// Return a line's items in visual order. -fn reorder<'a>(line: &'a Line<'a>) -> (Vec<&Item<'a>>, bool) { - let mut reordered = vec![]; - - // The bidi crate doesn't like empty lines. - if line.trimmed.is_empty() { - return (line.slice(line.trimmed.clone()).collect(), false); - } - - // Find the paragraph that contains the line. - let para = line - .bidi - .paragraphs - .iter() - .find(|para| para.range.contains(&line.trimmed.start)) - .unwrap(); - - // Compute the reordered ranges in visual order (left to right). - let (levels, runs) = line.bidi.visual_runs(para, line.trimmed.clone()); - let starts_rtl = levels.first().is_some_and(|level| level.is_rtl()); - - // Collect the reordered items. - for run in runs { - // Skip reset L1 runs because handling them would require reshaping - // again in some cases. - if line.bidi.levels[run.start] != levels[run.start] { - continue; - } - - let prev = reordered.len(); - reordered.extend(line.slice(run.clone())); - - if levels[run.start].is_rtl() { - reordered[prev..].reverse(); - } - } - - (reordered, starts_rtl) -} - -/// Whether a hyphen should be inserted at the start of the next line. -fn should_insert_hyphen(pred_line: &Line) -> bool { - // If the predecessor line does not end with a Dash::HardHyphen, we shall - // not place a hyphen at the start of the next line. - if pred_line.dash != Some(Dash::HardHyphen) { - return false; - } - - // If there's a trimmed out space, we needn't repeat the hyphen. That's the - // case of a text like "...kebab é a -melhor- comida que existe", where the - // hyphens are a kind of emphasis marker. - if pred_line.trimmed.end != pred_line.end { - return false; - } - - // The hyphen should repeat only in the languages that require that feature. - // For more information see the discussion at https://github.com/typst/typst/issues/3235 - let Some(Item::Text(shape)) = pred_line.last.as_ref() else { return false }; - - match shape.lang { - // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3 - // - Czech: see https://prirucka.ujc.cas.cz/?id=164 - // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/ - // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni - // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX) - // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/ - Lang::LOWER_SORBIAN - | Lang::CZECH - | Lang::CROATIAN - | Lang::POLISH - | Lang::PORTUGUESE - | Lang::SLOVAK => true, - - // In Spanish the hyphen is required only if the word next to hyphen is - // not capitalized. Otherwise, the hyphen must not be repeated. - // - // See § 4.1.1.1.2.e on the "Ortografía de la lengua española" - // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea - Lang::SPANISH => pred_line.bidi.text[pred_line.end..] - .chars() - .next() - .map(|c| !c.is_uppercase()) - .unwrap_or(false), - - _ => false, - } -} - /// How much a character should hang into the end margin. /// /// For more discussion, see: @@ -607,3 +576,119 @@ fn overhang(c: char) -> f64 { _ => 0.0, } } + +/// A collection of owned or borrowed paragraph items. +pub struct Items<'a>(Vec>); + +impl<'a> Items<'a> { + /// Create empty items. + pub fn new() -> Self { + Self(vec![]) + } + + /// Push a new item. + pub fn push(&mut self, entry: impl Into>) { + self.0.push(entry.into()); + } + + /// Iterate over the items + pub fn iter(&self) -> impl Iterator> { + self.0.iter().map(|item| &**item) + } + + /// Access the first item. + pub fn first(&self) -> Option<&Item<'a>> { + self.0.first().map(|item| &**item) + } + + /// Access the last item. + pub fn last(&self) -> Option<&Item<'a>> { + self.0.last().map(|item| &**item) + } + + /// Access the first item mutably, if it is text. + pub fn first_text_mut(&mut self) -> Option<&mut ShapedText<'a>> { + self.0.first_mut()?.text_mut() + } + + /// Access the last item mutably, if it is text. + pub fn last_text_mut(&mut self) -> Option<&mut ShapedText<'a>> { + self.0.last_mut()?.text_mut() + } + + /// Reorder the items starting at the given index to RTL. + pub fn reorder(&mut self, from: usize) { + self.0[from..].reverse() + } +} + +impl<'a> FromIterator> for Items<'a> { + fn from_iter>>(iter: I) -> Self { + Self(iter.into_iter().collect()) + } +} + +impl<'a> Deref for Items<'a> { + type Target = Vec>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'a> DerefMut for Items<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +/// A reference to or a boxed item. +pub enum ItemEntry<'a> { + Ref(&'a Item<'a>), + Box(Box>), +} + +impl<'a> ItemEntry<'a> { + fn text_mut(&mut self) -> Option<&mut ShapedText<'a>> { + match self { + Self::Ref(item) => { + let text = item.text()?; + *self = Self::Box(Box::new(Item::Text(text.clone()))); + match self { + Self::Box(item) => item.text_mut(), + _ => unreachable!(), + } + } + Self::Box(item) => item.text_mut(), + } + } +} + +impl<'a> Deref for ItemEntry<'a> { + type Target = Item<'a>; + + fn deref(&self) -> &Self::Target { + match self { + Self::Ref(item) => item, + Self::Box(item) => item, + } + } +} + +impl Debug for ItemEntry<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + (**self).fmt(f) + } +} + +impl<'a> From<&'a Item<'a>> for ItemEntry<'a> { + fn from(item: &'a Item<'a>) -> Self { + Self::Ref(item) + } +} + +impl<'a> From> for ItemEntry<'a> { + fn from(item: Item<'a>) -> Self { + Self::Box(Box::new(item)) + } +} diff --git a/crates/typst/src/layout/inline/linebreak.rs b/crates/typst/src/layout/inline/linebreak.rs index 0555c1890..dbaa9c59a 100644 --- a/crates/typst/src/layout/inline/linebreak.rs +++ b/crates/typst/src/layout/inline/linebreak.rs @@ -1,6 +1,7 @@ use std::ops::{Add, Sub}; use icu_properties::maps::CodePointMapData; +use icu_properties::sets::CodePointSetData; use icu_properties::LineBreak; use icu_provider::AsDeserializingBufferProvider; use icu_provider_adapters::fork::ForkByKeyProvider; @@ -27,30 +28,33 @@ const MIN_RATIO: f64 = -1.0; const MIN_APPROX_RATIO: f64 = -0.5; const BOUND_EPS: f64 = 1e-3; +/// The ICU blob data. +fn blob() -> BlobDataProvider { + BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap() +} + /// The general line break segmenter. -static SEGMENTER: Lazy = Lazy::new(|| { - let provider = - BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap(); - LineSegmenter::try_new_lstm_with_buffer_provider(&provider).unwrap() -}); +static SEGMENTER: Lazy = + Lazy::new(|| LineSegmenter::try_new_lstm_with_buffer_provider(&blob()).unwrap()); /// The line break segmenter for Chinese/Japanese text. static CJ_SEGMENTER: Lazy = Lazy::new(|| { - let provider = - BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap(); let cj_blob = BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU_CJ_SEGMENT) .unwrap(); - let cj_provider = ForkByKeyProvider::new(cj_blob, provider); + let cj_provider = ForkByKeyProvider::new(cj_blob, blob()); LineSegmenter::try_new_lstm_with_buffer_provider(&cj_provider).unwrap() }); /// The Unicode line break properties for each code point. static LINEBREAK_DATA: Lazy> = Lazy::new(|| { - let provider = - BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap(); - let deser_provider = provider.as_deserializing(); - icu_properties::maps::load_line_break(&deser_provider).unwrap() + icu_properties::maps::load_line_break(&blob().as_deserializing()).unwrap() +}); + +/// The set of Unicode default ignorables. +static DEFAULT_IGNORABLE_DATA: Lazy = Lazy::new(|| { + icu_properties::sets::load_default_ignorable_code_point(&blob().as_deserializing()) + .unwrap() }); /// A line break opportunity. @@ -64,6 +68,37 @@ pub enum Breakpoint { Hyphen, } +impl Breakpoint { + /// Trim a line before this breakpoint. + pub fn trim(self, line: &str) -> &str { + // Trim default ignorables. + let ignorable = DEFAULT_IGNORABLE_DATA.as_borrowed(); + let line = line.trim_end_matches(|c| ignorable.contains(c)); + + match self { + // Trim whitespace. + Self::Normal => line.trim_end_matches(char::is_whitespace), + + // Trim linebreaks. + Self::Mandatory => { + let lb = LINEBREAK_DATA.as_borrowed(); + line.trim_end_matches(|c| { + matches!( + lb.get(c), + LineBreak::MandatoryBreak + | LineBreak::CarriageReturn + | LineBreak::LineFeed + | LineBreak::NextLine + ) + }) + } + + // Trim nothing further. + Self::Hyphen => line, + } + } +} + /// Breaks the paragraph into lines. pub fn linebreak<'a>( engine: &Engine, @@ -180,14 +215,11 @@ fn linebreak_optimized_bounded<'a>( pred: usize, total: Cost, line: Line<'a>, + end: usize, } // Dynamic programming table. - let mut table = vec![Entry { - pred: 0, - total: 0.0, - line: line(engine, p, 0..0, Breakpoint::Mandatory, None), - }]; + let mut table = vec![Entry { pred: 0, total: 0.0, line: Line::empty(), end: 0 }]; let mut active = 0; let mut prev_end = 0; @@ -200,7 +232,7 @@ fn linebreak_optimized_bounded<'a>( let mut line_lower_bound = None; for (pred_index, pred) in table.iter().enumerate().skip(active) { - let start = pred.line.end; + let start = pred.end; let unbreakable = prev_end == start; // If the minimum cost we've established for the line is already @@ -221,6 +253,7 @@ fn linebreak_optimized_bounded<'a>( width, &pred.line, &attempt, + end, breakpoint, unbreakable, ); @@ -263,7 +296,7 @@ fn linebreak_optimized_bounded<'a>( // If this attempt is better than what we had before, take it! if best.as_ref().map_or(true, |best| best.total >= total) { - best = Some(Entry { pred: pred_index, total, line: attempt }); + best = Some(Entry { pred: pred_index, total, line: attempt, end }); } } @@ -282,7 +315,7 @@ fn linebreak_optimized_bounded<'a>( let mut idx = table.len() - 1; // This should only happen if our bound was faulty. Which shouldn't happen! - if table[idx].line.end != p.bidi.text.len() { + if table[idx].end != p.text.len() { #[cfg(debug_assertions)] panic!("bounded paragraph layout is incomplete"); @@ -340,7 +373,7 @@ fn linebreak_optimized_approximate( let mut prev_end = 0; breakpoints(p, |end, breakpoint| { - let at_end = end == p.bidi.text.len(); + let at_end = end == p.text.len(); // Find the optimal predecessor. let mut best: Option = None; @@ -362,7 +395,7 @@ fn linebreak_optimized_approximate( // make it the desired width. We trim at the end to not take into // account trailing spaces. This is, again, only an approximation of // the real behaviour of `line`. - let trimmed_end = start + p.bidi.text[start..end].trim_end().len(); + let trimmed_end = start + p.text[start..end].trim_end().len(); let line_ratio = raw_ratio( p, width, @@ -428,8 +461,9 @@ fn linebreak_optimized_approximate( idx = table[idx].pred; } + let mut pred = Line::empty(); + let mut start = 0; let mut exact = 0.0; - let mut pred = line(engine, p, 0..0, Breakpoint::Mandatory, None); // The cost that we optimized was only an approximate cost, so the layout we // got here is only likely to be good, not guaranteed to be the best. We now @@ -438,26 +472,36 @@ fn linebreak_optimized_approximate( for idx in indices.into_iter().rev() { let Entry { end, breakpoint, unbreakable, .. } = table[idx]; - let start = pred.end; let attempt = line(engine, p, start..end, breakpoint, Some(&pred)); - let (_, line_cost) = - ratio_and_cost(p, metrics, width, &pred, &attempt, breakpoint, unbreakable); + let (_, line_cost) = ratio_and_cost( + p, + metrics, + width, + &pred, + &attempt, + end, + breakpoint, + unbreakable, + ); - exact += line_cost; pred = attempt; + start = end; + exact += line_cost; } exact } /// Compute the stretch ratio and cost of a line. +#[allow(clippy::too_many_arguments)] fn ratio_and_cost( p: &Preparation, metrics: &CostMetrics, available_width: Abs, pred: &Line, attempt: &Line, + end: usize, breakpoint: Breakpoint, unbreakable: bool, ) -> (f64, Cost) { @@ -474,7 +518,7 @@ fn ratio_and_cost( metrics, breakpoint, ratio, - attempt.end == p.bidi.text.len(), + end == p.text.len(), attempt.justify, unbreakable, pred.dash.is_some() && attempt.dash.is_some(), @@ -587,7 +631,14 @@ fn raw_cost( /// code much simpler and the consumers of this function don't need the /// composability and flexibility of external iteration anyway. fn breakpoints<'a>(p: &'a Preparation<'a>, mut f: impl FnMut(usize, Breakpoint)) { - let text = p.bidi.text; + let text = p.text; + + // Single breakpoint at the end for empty text. + if text.is_empty() { + f(0, Breakpoint::Mandatory); + return; + } + let hyphenate = p.hyphenate != Some(false); let lb = LINEBREAK_DATA.as_borrowed(); let segmenter = match p.lang { @@ -747,8 +798,9 @@ fn linebreak_link(link: &str, mut f: impl FnMut(usize)) { fn hyphenate_at(p: &Preparation, offset: usize) -> bool { p.hyphenate .or_else(|| { - let shaped = p.find(offset)?.text()?; - Some(TextElem::hyphenate_in(shaped.styles)) + let (_, item) = p.get(offset); + let styles = item.text()?.styles; + Some(TextElem::hyphenate_in(styles)) }) .unwrap_or(false) } @@ -756,8 +808,9 @@ fn hyphenate_at(p: &Preparation, offset: usize) -> bool { /// The text language at the given offset. fn lang_at(p: &Preparation, offset: usize) -> Option { let lang = p.lang.or_else(|| { - let shaped = p.find(offset)?.text()?; - Some(TextElem::lang_in(shaped.styles)) + let (_, item) = p.get(offset); + let styles = item.text()?.styles; + Some(TextElem::lang_in(styles)) })?; let bytes = lang.as_str().as_bytes().try_into().ok()?; @@ -813,17 +866,14 @@ struct Estimates { impl Estimates { /// Compute estimations for approximate Knuth-Plass layout. fn compute(p: &Preparation) -> Self { - let cap = p.bidi.text.len(); + let cap = p.text.len(); let mut widths = CummulativeVec::with_capacity(cap); let mut stretchability = CummulativeVec::with_capacity(cap); let mut shrinkability = CummulativeVec::with_capacity(cap); let mut justifiables = CummulativeVec::with_capacity(cap); - for item in &p.items { - let textual_len = item.textual_len(); - let after = widths.len() + textual_len; - + for (range, item) in p.items.iter() { if let Item::Text(shaped) = item { for g in shaped.glyphs.iter() { let byte_len = g.range.len(); @@ -835,13 +885,13 @@ impl Estimates { justifiables.push(byte_len, g.is_justifiable() as usize); } } else { - widths.push(textual_len, item.width()); + widths.push(range.len(), item.natural_width()); } - widths.adjust(after); - stretchability.adjust(after); - shrinkability.adjust(after); - justifiables.adjust(after); + widths.adjust(range.end); + stretchability.adjust(range.end); + shrinkability.adjust(range.end); + justifiables.adjust(range.end); } Self { @@ -871,11 +921,6 @@ where Self { total, summed } } - /// Get the covered byte length. - fn len(&self) -> usize { - self.summed.len() - } - /// Adjust to cover the given byte length. fn adjust(&mut self, len: usize) { self.summed.resize(len, self.total); diff --git a/crates/typst/src/layout/inline/prepare.rs b/crates/typst/src/layout/inline/prepare.rs index 90d8d5a47..59682b2c8 100644 --- a/crates/typst/src/layout/inline/prepare.rs +++ b/crates/typst/src/layout/inline/prepare.rs @@ -13,16 +13,24 @@ use crate::text::{Costs, Lang, TextElem}; /// Only when a line break falls onto a text index that is not safe-to-break per /// rustybuzz, we have to reshape that portion. pub struct Preparation<'a> { + /// The paragraph's full text. + pub text: &'a str, /// Bidirectional text embedding levels for the paragraph. - pub bidi: BidiInfo<'a>, + /// + /// This is `None` if the paragraph is BiDi-uniform (all the base direction). + pub bidi: Option>, /// Text runs, spacing and layouted elements. - pub items: Vec>, + pub items: Vec<(Range, Item<'a>)>, + /// Maps from byte indices to item indices. + pub indices: Vec, /// The span mapper. pub spans: SpanMapper, /// Whether to hyphenate if it's the same for all children. pub hyphenate: Option, /// Costs for various layout decisions. pub costs: Costs, + /// The dominant direction. + pub dir: Dir, /// The text language if it's the same for all children. pub lang: Option, /// The paragraph's resolved horizontal alignment. @@ -44,46 +52,18 @@ pub struct Preparation<'a> { } impl<'a> Preparation<'a> { - /// Find the item that contains the given `text_offset`. - pub fn find(&self, text_offset: usize) -> Option<&Item<'a>> { - let mut cursor = 0; - for item in &self.items { - let end = cursor + item.textual_len(); - if (cursor..end).contains(&text_offset) { - return Some(item); - } - cursor = end; - } - None + /// Get the item that contains the given `text_offset`. + pub fn get(&self, offset: usize) -> &(Range, Item<'a>) { + let idx = self.indices.get(offset).copied().unwrap_or(0); + &self.items[idx] } - /// Return the items that intersect the given `text_range`. - /// - /// Returns the expanded range around the items and the items. - pub fn slice(&self, text_range: Range) -> (Range, &[Item<'a>]) { - let mut cursor = 0; - let mut start = 0; - let mut end = 0; - let mut expanded = text_range.clone(); - - for (i, item) in self.items.iter().enumerate() { - if cursor <= text_range.start { - start = i; - expanded.start = cursor; - } - - let len = item.textual_len(); - if cursor < text_range.end || cursor + len <= text_range.end { - end = i + 1; - expanded.end = cursor + len; - } else { - break; - } - - cursor += len; - } - - (expanded, &self.items[start..end]) + /// Iterate over the items that intersect the given `sliced` range. + pub fn slice(&self, sliced: Range) -> impl Iterator)> { + let start = self.indices.get(sliced.start).copied().unwrap_or(0); + self.items[start..].iter().take_while(move |(range, _)| { + range.start < sliced.end || range.end <= sliced.end + }) } } @@ -99,42 +79,57 @@ pub fn prepare<'a>( spans: SpanMapper, styles: StyleChain<'a>, ) -> SourceResult> { - let bidi = BidiInfo::new( - text, - match TextElem::dir_in(styles) { - Dir::LTR => Some(BidiLevel::ltr()), - Dir::RTL => Some(BidiLevel::rtl()), - _ => None, - }, - ); + let dir = TextElem::dir_in(styles); + let default_level = match dir { + Dir::RTL => BidiLevel::rtl(), + _ => BidiLevel::ltr(), + }; + + let bidi = BidiInfo::new(text, Some(default_level)); + let is_bidi = bidi + .levels + .iter() + .any(|level| level.is_ltr() != default_level.is_ltr()); let mut cursor = 0; let mut items = Vec::with_capacity(segments.len()); // Shape the text to finalize the items. for segment in segments { - let end = cursor + segment.textual_len(); + let len = segment.textual_len(); + let end = cursor + len; + let range = cursor..end; + match segment { Segment::Text(_, styles) => { - shape_range(&mut items, engine, &bidi, cursor..end, &spans, styles); + shape_range(&mut items, engine, text, &bidi, range, styles); } - Segment::Item(item) => items.push(item), + Segment::Item(item) => items.push((range, item)), } cursor = end; } + // Build the mapping from byte to item indices. + let mut indices = Vec::with_capacity(text.len()); + for (i, (range, _)) in items.iter().enumerate() { + indices.extend(range.clone().map(|_| i)); + } + let cjk_latin_spacing = TextElem::cjk_latin_spacing_in(styles).is_auto(); if cjk_latin_spacing { add_cjk_latin_spacing(&mut items); } Ok(Preparation { - bidi, + text, + bidi: is_bidi.then_some(bidi), items, + indices, spans, hyphenate: children.shared_get(styles, TextElem::hyphenate_in), costs: TextElem::costs_in(styles), + dir, lang: children.shared_get(styles, TextElem::lang_in), align: AlignElem::alignment_in(styles).resolve(styles).x, justify: ParElem::justify_in(styles), @@ -150,10 +145,14 @@ pub fn prepare<'a>( /// Add some spacing between Han characters and western characters. See /// Requirements for Chinese Text Layout, Section 3.2.2 Mixed Text Composition /// in Horizontal Written Mode -fn add_cjk_latin_spacing(items: &mut [Item]) { - let mut items = items.iter_mut().filter(|x| !matches!(x, Item::Tag(_))).peekable(); +fn add_cjk_latin_spacing(items: &mut [(Range, Item)]) { + let mut items = items + .iter_mut() + .filter(|(_, x)| !matches!(x, Item::Tag(_))) + .peekable(); + let mut prev: Option<&ShapedGlyph> = None; - while let Some(item) = items.next() { + while let Some((_, item)) = items.next() { let Some(text) = item.text_mut() else { prev = None; continue; @@ -168,7 +167,7 @@ fn add_cjk_latin_spacing(items: &mut [Item]) { let next = glyphs.peek().map(|n| n as _).or_else(|| { items .peek() - .and_then(|i| i.text()) + .and_then(|(_, i)| i.text()) .and_then(|shaped| shaped.glyphs.first()) }); diff --git a/crates/typst/src/layout/inline/shaping.rs b/crates/typst/src/layout/inline/shaping.rs index 44b653917..43dc351a5 100644 --- a/crates/typst/src/layout/inline/shaping.rs +++ b/crates/typst/src/layout/inline/shaping.rs @@ -14,7 +14,6 @@ use super::{Item, Range, SpanMapper}; use crate::engine::Engine; use crate::foundations::{Smart, StyleChain}; use crate::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size}; -use crate::syntax::Span; use crate::text::{ decorate, families, features, variant, Font, FontVariant, Glyph, Lang, Region, TextElem, TextItem, @@ -27,6 +26,7 @@ use crate::World; /// This type contains owned or borrowed shaped text runs, which can be /// measured, used to reshape substrings more quickly and converted into a /// frame. +#[derive(Clone)] pub struct ShapedText<'a> { /// The start of the text in the full paragraph. pub base: usize, @@ -80,8 +80,6 @@ pub struct ShapedGlyph { pub safe_to_break: bool, /// The first char in this glyph's cluster. pub c: char, - /// The source code location of the glyph and its byte offset within it. - pub span: (Span, u16), /// Whether this glyph is justifiable for CJK scripts. pub is_justifiable: bool, /// The script of the glyph. @@ -214,6 +212,7 @@ impl<'a> ShapedText<'a> { pub fn build( &self, engine: &Engine, + spans: &SpanMapper, justification_ratio: f64, extra_justification: Abs, ) -> Frame { @@ -268,7 +267,7 @@ impl<'a> ShapedText<'a> { // We may not be able to reach the offset completely if // it exceeds u16, but better to have a roughly correct // span offset than nothing. - let mut span = shaped.span; + let mut span = spans.span_at(shaped.range.start); span.1 = span.1.saturating_add(span_offset.saturating_as()); // |<---- a Glyph ---->| @@ -331,7 +330,7 @@ impl<'a> ShapedText<'a> { } /// Measure the top and bottom extent of this text. - fn measure(&self, engine: &Engine) -> (Abs, Abs) { + pub fn measure(&self, engine: &Engine) -> (Abs, Abs) { let mut top = Abs::zero(); let mut bottom = Abs::zero(); @@ -409,12 +408,7 @@ impl<'a> ShapedText<'a> { /// shaping process if possible. /// /// The text `range` is relative to the whole paragraph. - pub fn reshape( - &'a self, - engine: &Engine, - spans: &SpanMapper, - text_range: Range, - ) -> ShapedText<'a> { + pub fn reshape(&'a self, engine: &Engine, text_range: Range) -> ShapedText<'a> { let text = &self.text[text_range.start - self.base..text_range.end - self.base]; if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) { #[cfg(debug_assertions)] @@ -436,7 +430,6 @@ impl<'a> ShapedText<'a> { engine, text_range.start, text, - spans, self.styles, self.dir, self.lang, @@ -445,6 +438,16 @@ impl<'a> ShapedText<'a> { } } + /// Derive an empty text run with the same properties as this one. + pub fn empty(&self) -> Self { + Self { + text: "", + width: Abs::zero(), + glyphs: Cow::Borrowed(&[]), + ..*self + } + } + /// Push a hyphen to end of the text. pub fn push_hyphen(&mut self, engine: &Engine, fallback: bool) { self.insert_hyphen(engine, fallback, Side::Right) @@ -493,7 +496,6 @@ impl<'a> ShapedText<'a> { range, safe_to_break: true, c: '-', - span: (Span::detached(), 0), is_justifiable: false, script: Script::Common, }; @@ -592,11 +594,11 @@ impl Debug for ShapedText<'_> { /// Group a range of text by BiDi level and script, shape the runs and generate /// items for them. pub fn shape_range<'a>( - items: &mut Vec>, + items: &mut Vec<(Range, Item<'a>)>, engine: &Engine, + text: &'a str, bidi: &BidiInfo<'a>, range: Range, - spans: &SpanMapper, styles: StyleChain<'a>, ) { let script = TextElem::script_in(styles); @@ -604,17 +606,9 @@ pub fn shape_range<'a>( let region = TextElem::region_in(styles); let mut process = |range: Range, level: BidiLevel| { let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL }; - let shaped = shape( - engine, - range.start, - &bidi.text[range], - spans, - styles, - dir, - lang, - region, - ); - items.push(Item::Text(shaped)); + let shaped = + shape(engine, range.start, &text[range.clone()], styles, dir, lang, region); + items.push((range, Item::Text(shaped))); }; let mut prev_level = BidiLevel::ltr(); @@ -625,14 +619,14 @@ pub fn shape_range<'a>( // set (rather than inferred from the glyphs), we keep the script at an // unchanging `Script::Unknown` so that only level changes cause breaks. for i in range.clone() { - if !bidi.text.is_char_boundary(i) { + if !text.is_char_boundary(i) { continue; } let level = bidi.levels[i]; let curr_script = match script { Smart::Auto => { - bidi.text[i..].chars().next().map_or(Script::Unknown, |c| c.script()) + text[i..].chars().next().map_or(Script::Unknown, |c| c.script()) } Smart::Custom(_) => Script::Unknown, }; @@ -668,7 +662,6 @@ fn shape<'a>( engine: &Engine, base: usize, text: &'a str, - spans: &SpanMapper, styles: StyleChain<'a>, dir: Dir, lang: Lang, @@ -677,7 +670,6 @@ fn shape<'a>( let size = TextElem::size_in(styles); let mut ctx = ShapingContext { engine, - spans, size, glyphs: vec![], used: vec![], @@ -717,7 +709,6 @@ fn shape<'a>( /// Holds shaping results and metadata common to all shaped segments. struct ShapingContext<'a, 'v> { engine: &'a Engine<'v>, - spans: &'a SpanMapper, glyphs: Vec, used: Vec, styles: StyleChain<'a>, @@ -830,7 +821,6 @@ fn shape_segment<'a>( range: start..end, safe_to_break: !info.unsafe_to_break(), c, - span: ctx.spans.span_at(start), is_justifiable: is_justifiable( c, script, @@ -921,7 +911,6 @@ fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) { range: start..end, safe_to_break: true, c, - span: ctx.spans.span_at(start), is_justifiable: is_justifiable( c, script, diff --git a/crates/typst/src/model/par.rs b/crates/typst/src/model/par.rs index 7f65a00fb..2110995f3 100644 --- a/crates/typst/src/model/par.rs +++ b/crates/typst/src/model/par.rs @@ -18,9 +18,9 @@ use crate::realize::StyleVec; /// /// # Example /// ```example -/// #show par: set block(spacing: 0.65em) /// #set par( /// first-line-indent: 1em, +/// spacing: 0.65em, /// justify: true, /// ) /// @@ -115,8 +115,7 @@ pub struct ParElem { /// By typographic convention, paragraph breaks are indicated either by some /// space between paragraphs or by indented first lines. Consider reducing /// the [paragraph spacing]($block.spacing) to the [`leading`]($par.leading) - /// when using this property (e.g. using - /// `[#show par: set block(spacing: 0.65em)]`). + /// when using this property (e.g. using `[#set par(spacing: 0.65em)]`). #[ghost] pub first_line_indent: Length, diff --git a/docs/guides/guide-for-latex-users.md b/docs/guides/guide-for-latex-users.md index 1f3caef98..8c3b56013 100644 --- a/docs/guides/guide-for-latex-users.md +++ b/docs/guides/guide-for-latex-users.md @@ -593,10 +593,9 @@ The example below ```typ #set page(margin: 1.75in) -#set par(leading: 0.55em, first-line-indent: 1.8em, justify: true) +#set par(leading: 0.55em, spacing: 0.55em, first-line-indent: 1.8em, justify: true) #set text(font: "New Computer Modern") #show raw: set text(font: "New Computer Modern Mono") -#show par: set block(spacing: 0.55em) #show heading: set block(above: 1.4em, below: 1em) ``` diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index 9a7dc3733..b63d17760 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -120,7 +120,7 @@ a table listing all syntax that is available in code mode: | Named function | `{let f(x) = 2 * x}` | [Function]($function) | | Set rule | `{set text(14pt)}` | [Styling]($styling/#set-rules) | | Set-if rule | `{set text(..) if .. }` | [Styling]($styling/#set-rules) | -| Show-set rule | `{show par: set block(..)}` | [Styling]($styling/#show-rules) | +| Show-set rule | `{show heading: set block(..)}` | [Styling]($styling/#show-rules) | | Show rule with function | `{show raw: it => {..}}` | [Styling]($styling/#show-rules) | | Show-everything rule | `{show: columns.with(2)}` | [Styling]($styling/#show-rules) | | Context expression | `{context text.lang}` | [Context]($context) | diff --git a/tests/ref/bidi-whitespace-reset.png b/tests/ref/bidi-whitespace-reset.png index 7d64012f9..e9973798b 100644 Binary files a/tests/ref/bidi-whitespace-reset.png and b/tests/ref/bidi-whitespace-reset.png differ diff --git a/tests/ref/context-compatibility-locate.png b/tests/ref/context-compatibility-locate.png index 4c8944ab4..32516c00f 100644 Binary files a/tests/ref/context-compatibility-locate.png and b/tests/ref/context-compatibility-locate.png differ diff --git a/tests/ref/eval-mode.png b/tests/ref/eval-mode.png index 5edfa62dc..94357ff4f 100644 Binary files a/tests/ref/eval-mode.png and b/tests/ref/eval-mode.png differ diff --git a/tests/ref/issue-3601-empty-raw.png b/tests/ref/issue-3601-empty-raw.png new file mode 100644 index 000000000..be5ea8fc2 Binary files /dev/null and b/tests/ref/issue-3601-empty-raw.png differ diff --git a/tests/ref/issue-4278-par-trim-before-equation.png b/tests/ref/issue-4278-par-trim-before-equation.png new file mode 100644 index 000000000..b05537190 Binary files /dev/null and b/tests/ref/issue-4278-par-trim-before-equation.png differ diff --git a/tests/ref/justify-basically-empty.png b/tests/ref/justify-basically-empty.png new file mode 100644 index 000000000..3d1b50c13 Binary files /dev/null and b/tests/ref/justify-basically-empty.png differ diff --git a/tests/ref/par-metadata-after-trimmed-space.png b/tests/ref/par-metadata-after-trimmed-space.png new file mode 100644 index 000000000..b0de98eab Binary files /dev/null and b/tests/ref/par-metadata-after-trimmed-space.png differ diff --git a/tests/ref/par-trailing-whitespace.png b/tests/ref/par-trailing-whitespace.png new file mode 100644 index 000000000..10c22da5a Binary files /dev/null and b/tests/ref/par-trailing-whitespace.png differ diff --git a/tests/suite/foundations/version.typ b/tests/suite/foundations/version.typ index bf2cadb18..a4be7f13e 100644 --- a/tests/suite/foundations/version.typ +++ b/tests/suite/foundations/version.typ @@ -4,7 +4,7 @@ // Test version constructor. // Empty. -#version() +#test(array(version()), ()) // Plain. #test(version(1, 2).major, 1) diff --git a/tests/suite/layout/spacing.typ b/tests/suite/layout/spacing.typ index dd0fced55..c32e6c8f9 100644 --- a/tests/suite/layout/spacing.typ +++ b/tests/suite/layout/spacing.typ @@ -47,14 +47,14 @@ Totally #h() ignored Hello #h(2cm, weak: true) --- issue-4087 --- -// weak space at the end of the line would be removed. +// Weak space at the end of the line is removed. This is the first line #h(2cm, weak: true) A new line -// non-weak space would be consume a specified width and push next line. +// Non-weak space consumes a specified width and pushes to next line. This is the first line #h(2cm, weak: false) A new line -// similarly weak space at the beginning of the line would be removed. -This is the first line\ #h(2cm, weak: true) A new line +// Similarly, weak space at the beginning of the line is removed. +This is the first line \ #h(2cm, weak: true) A new line -// non-spacing, on the other hand, is not removed. -This is the first line\ #h(2cm, weak: false) A new line +// Non-weak-spacing, on the other hand, is not removed. +This is the first line \ #h(2cm, weak: false) A new line diff --git a/tests/suite/model/par.typ b/tests/suite/model/par.typ index f07c4c6ce..80bc9f3e1 100644 --- a/tests/suite/model/par.typ +++ b/tests/suite/model/par.typ @@ -78,3 +78,22 @@ Welcome \ here. Does this work well? #set text(dir: rtl) لآن وقد أظلم الليل وبدأت النجوم تنضخ وجه الطبيعة التي أعْيَتْ من طول ما انبعثت في النهار + +--- par-trailing-whitespace --- +// Ensure that trailing whitespace layouts as intended. +#box(fill: aqua, " ") + +--- par-empty-metadata --- +// Check that metadata still works in a zero length paragraph. +#block(height: 0pt)[#""#metadata(false)] +#context test(query().first().value, false) + +--- par-metadata-after-trimmed-space --- +// Ensure that metadata doesn't prevent trailing spaces from being trimmed. +#set par(justify: true, linebreaks: "simple") +#set text(hyphenate: false) +Lorem ipsum dolor #metadata(none) nonumy eirmod tempor. + +--- issue-4278-par-trim-before-equation --- +#set par(justify: true) +#lorem(6) aa $a = c + b$