diff --git a/crates/typst-layout/src/inline/line.rs b/crates/typst-layout/src/inline/line.rs index 35505c4b0..9b5b656e5 100644 --- a/crates/typst-layout/src/inline/line.rs +++ b/crates/typst-layout/src/inline/line.rs @@ -18,6 +18,25 @@ const EN_DASH: char = '–'; const EM_DASH: char = '—'; const LINE_SEPARATOR: char = '\u{2028}'; // We use LS to distinguish justified breaks. +// We use indices to remember the logical (as opposed to visual) order of items. +// During line building, the items are stored in visual (BiDi-reordered) order. +// When committing to a line and building its frame, we sort by logical index. +// +// - Special layout-generated items have custom indices that ensure correct +// ordering w.r.t. to each other and normal elements, listed below. +// - Normal items have their position in `p.items` plus the number of special +// reserved prefix indices. +// +// Logical indices must be unique within a line because we use an unstable sort. +const START_HYPHEN_IDX: usize = 0; +const fn logical_item_idx(i: usize) -> usize { + // This won't overflow because the `idx` comes from a vector which is + // limited to `isize::MAX` elements. + i + 1 +} +const FALLBACK_TEXT_IDX: usize = usize::MAX - 1; +const END_HYPHEN_IDX: usize = usize::MAX; + /// A layouted line, consisting of a sequence of layouted inline items that are /// mostly borrowed from the preparation phase. This type enables you to measure /// the size of a line in a range before committing to building the line's @@ -128,7 +147,7 @@ pub fn line<'a>( p: &'a Preparation, range: Range, breakpoint: Breakpoint, - pred: Option<&Line>, + pred: Option<&Line<'a>>, ) -> Line<'a> { // The line's full text. let full = &p.text[range.clone()]; @@ -152,20 +171,26 @@ pub fn line<'a>( let trim = range.start + breakpoint.trim(full).len(); // Collect the items for the line. - let mut items = collect_items(engine, p, range, trim); + let mut items = Items::new(); // Add a hyphen at the line start, if a previous dash should be repeated. - if pred.is_some_and(|pred| should_repeat_hyphen(pred, full)) - && let Some(shaped) = items.first_text_mut() + if let Some(pred) = pred + && pred.dash == Some(Dash::Hard) + && let Some(base) = pred.items.last_text() + && should_repeat_hyphen(base.lang, full) + && let Some(hyphen) = ShapedText::hyphen(engine, p.config.fallback, base, trim) { - shaped.prepend_hyphen(engine, p.config.fallback); + items.push(Item::Text(hyphen), START_HYPHEN_IDX); } + collect_items(&mut items, engine, p, range, trim); + // Add a hyphen at the line end, if we ended on a soft hyphen. if dash == Some(Dash::Soft) - && let Some(shaped) = items.last_text_mut() + && let Some(base) = items.last_text() + && let Some(hyphen) = ShapedText::hyphen(engine, p.config.fallback, base, trim) { - shaped.push_hyphen(engine, p.config.fallback); + items.push(Item::Text(hyphen), END_HYPHEN_IDX); } // Ensure that there is no weak spacing at the start and end of the line. @@ -189,18 +214,18 @@ pub fn line<'a>( /// We do not factor the `trim` directly into the `range` because we still want /// to keep non-text items after the trim (e.g. tags). fn collect_items<'a>( + items: &mut Items<'a>, engine: &Engine, p: &'a Preparation, range: Range, trim: usize, -) -> Items<'a> { - let mut items = Items::new(); +) { let mut fallback = None; // Collect the items for each consecutively ordered run. reorder(p, range.clone(), |subrange, rtl| { let from = items.len(); - collect_range(engine, p, subrange, trim, &mut items, &mut fallback); + collect_range(engine, p, subrange, trim, items, &mut fallback); if rtl { items.reorder(from); } @@ -210,10 +235,8 @@ fn collect_items<'a>( if !items.iter().any(|item| matches!(item, Item::Text(_))) && let Some(fallback) = fallback { - items.push(fallback, usize::MAX); + items.push(fallback, FALLBACK_TEXT_IDX); } - - items } /// Trims weak spacing from the start and end of the line. @@ -277,7 +300,9 @@ fn collect_range<'a>( items: &mut Items<'a>, fallback: &mut Option>, ) { - for (idx, (subrange, item)) in p.slice(range.clone()) { + for (i, (subrange, item)) in p.slice(range.clone()) { + let idx = logical_item_idx(i); + // All non-text items are just kept, they can't be split. let Item::Text(shaped) = item else { items.push(item, idx); @@ -382,19 +407,10 @@ fn adjust_cj_at_line_end(p: &Preparation, items: &mut Items) { } } -/// Whether a hyphen should be inserted at the start of the next line. -fn should_repeat_hyphen(pred_line: &Line, text: &str) -> bool { - // If the predecessor line does not end with a `Dash::Hard`, we shall - // not place a hyphen at the start of the next line. - if pred_line.dash != Some(Dash::Hard) { - return false; - } - - // The hyphen should repeat only in the languages that require that feature. - // For more information see the discussion at https://github.com/typst/typst/issues/3235 - let Some(Item::Text(shaped)) = pred_line.items.last() else { return false }; - - match shaped.lang { +/// Whether a hyphen should be repeated at the start of the line in the given +/// language, when the following text is the given one. +fn should_repeat_hyphen(lang: Lang, following_text: &str) -> bool { + match lang { // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3 // - Czech: see https://prirucka.ujc.cas.cz/?id=164 // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/ @@ -413,7 +429,7 @@ fn should_repeat_hyphen(pred_line: &Line, text: &str) -> bool { // // See § 4.1.1.1.2.e on the "Ortografía de la lengua española" // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea - Lang::SPANISH => text.chars().next().is_some_and(|c| !c.is_uppercase()), + Lang::SPANISH => following_text.chars().next().is_some_and(|c| !c.is_uppercase()), _ => false, } @@ -675,7 +691,11 @@ impl<'a> Items<'a> { self.0.iter().map(|(_, item)| &**item) } - /// Iterate over the items with indices + /// Iterate over the items with the indices that define their logical order. + /// See the docs above `logical_item_idx` for more details. + /// + /// Note that this is different from `.iter().enumerate()` which would + /// provide the indices in visual order! pub fn indexed_iter(&self) -> impl Iterator)> { self.0.iter() } @@ -690,6 +710,11 @@ impl<'a> Items<'a> { self.0.last().map(|(_, item)| &**item) } + /// Access the last item, if it is text. + pub fn last_text(&self) -> Option<&ShapedText<'a>> { + self.0.last()?.1.text() + } + /// Access the first item mutably, if it is text. pub fn first_text_mut(&mut self) -> Option<&mut ShapedText<'a>> { self.0.first_mut()?.1.text_mut() @@ -706,12 +731,6 @@ impl<'a> Items<'a> { } } -impl<'a> FromIterator> for Items<'a> { - fn from_iter>>(iter: I) -> Self { - Self(iter.into_iter().enumerate().collect()) - } -} - impl<'a> Deref for Items<'a> { type Target = Vec<(usize, ItemEntry<'a>)>; diff --git a/crates/typst-layout/src/inline/shaping.rs b/crates/typst-layout/src/inline/shaping.rs index ded5fddc0..287bca9a5 100644 --- a/crates/typst-layout/src/inline/shaping.rs +++ b/crates/typst-layout/src/inline/shaping.rs @@ -196,14 +196,6 @@ impl ShapedGlyph { } } -/// A side you can go toward. -enum Side { - /// To the left-hand side. - Left, - /// To the right-hand side. - Right, -} - impl<'a> ShapedText<'a> { /// Build the shaped text's frame. /// @@ -448,27 +440,23 @@ impl<'a> ShapedText<'a> { } } - /// Push a hyphen to end of the text. - pub fn push_hyphen(&mut self, engine: &Engine, fallback: bool) { - self.insert_hyphen(engine, fallback, Side::Right) - } - - /// Prepend a hyphen to start of the text. - pub fn prepend_hyphen(&mut self, engine: &Engine, fallback: bool) { - self.insert_hyphen(engine, fallback, Side::Left) - } - - fn insert_hyphen(&mut self, engine: &Engine, fallback: bool, side: Side) { + /// Creates shaped text containing a hyphen. + pub fn hyphen( + engine: &Engine, + fallback: bool, + base: &ShapedText<'a>, + pos: usize, + ) -> Option { let world = engine.world; let book = world.book(); let fallback_func = if fallback { - Some(|| book.select_fallback(None, self.variant, "-")) + Some(|| book.select_fallback(None, base.variant, "-")) } else { None }; - let mut chain = families(self.styles) + let mut chain = families(base.styles) .filter(|family| family.covers().is_none_or(|c| c.is_match("-"))) - .map(|family| book.select(family.as_str(), self.variant)) + .map(|family| book.select(family.as_str(), base.variant)) .chain(fallback_func.iter().map(|f| f())) .flatten(); @@ -477,37 +465,33 @@ impl<'a> ShapedText<'a> { let ttf = font.ttf(); let glyph_id = ttf.glyph_index('-')?; let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?); - let range = match side { - Side::Left => self.glyphs.first().map(|g| g.range.start..g.range.start), - Side::Right => self.glyphs.last().map(|g| g.range.end..g.range.end), - } - // In the unlikely chance that we hyphenate after an empty line, - // ensure that the glyph range still falls after self.base so - // that subtracting either of the endpoints by self.base doesn't - // underflow. See . - .unwrap_or_else(|| self.base..self.base); - let size = self.styles.resolve(TextElem::size); - self.width += x_advance.at(size); - let glyph = ShapedGlyph { - font, - glyph_id: glyph_id.0, - x_advance, - x_offset: Em::zero(), - y_offset: Em::zero(), - size, - adjustability: Adjustability::default(), - range, - safe_to_break: true, - c: '-', - is_justifiable: false, - script: Script::Common, - }; - match side { - Side::Left => self.glyphs.to_mut().insert(0, glyph), - Side::Right => self.glyphs.to_mut().push(glyph), - } - Some(()) - }); + let size = base.styles.resolve(TextElem::size); + + Some(ShapedText { + base: pos, + text: "", + dir: base.dir, + lang: base.lang, + region: base.region, + styles: base.styles, + variant: base.variant, + width: x_advance.at(size), + glyphs: Cow::Owned(vec![ShapedGlyph { + font, + glyph_id: glyph_id.0, + x_advance, + x_offset: Em::zero(), + y_offset: Em::zero(), + size, + adjustability: Adjustability::default(), + range: pos..pos, + safe_to_break: true, + c: '-', + is_justifiable: false, + script: Script::Common, + }]), + }) + }) } /// Find the subslice of glyphs that represent the given text range if both diff --git a/tests/ref/hyphenate-repeat-style.png b/tests/ref/hyphenate-repeat-style.png new file mode 100644 index 000000000..5ce57682a Binary files /dev/null and b/tests/ref/hyphenate-repeat-style.png differ diff --git a/tests/suite/layout/inline/hyphenate.typ b/tests/suite/layout/inline/hyphenate.typ index dcacb4f0a..892a0d328 100644 --- a/tests/suite/layout/inline/hyphenate.typ +++ b/tests/suite/layout/inline/hyphenate.typ @@ -112,6 +112,12 @@ Tras el estallido de la contienda Ruiz-Giménez fue detenido junto a sus dos hermanos y puesto bajo custodia por las autoridades republicanas, con el objetivo de protegerle de las patrullas de milicianos. +--- hyphenate-repeat-style --- +// Ensure that a repeated hard hyphen keeps its styles. +#set page(width: 2cm) +#set text(lang: "es") +Hello-#text(red)[world] + --- costs-widow-orphan --- #set page(height: 60pt)