Separate items for hyphens, fixing style of repeated hyphen (#6798)

This commit is contained in:
Laurenz 2025-08-21 16:03:06 +02:00 committed by GitHub
parent c163c46b3a
commit 727df723df
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 97 additions and 88 deletions

View File

@ -18,6 +18,25 @@ const EN_DASH: char = '';
const EM_DASH: char = '—'; const EM_DASH: char = '—';
const LINE_SEPARATOR: char = '\u{2028}'; // We use LS to distinguish justified breaks. const LINE_SEPARATOR: char = '\u{2028}'; // We use LS to distinguish justified breaks.
// We use indices to remember the logical (as opposed to visual) order of items.
// During line building, the items are stored in visual (BiDi-reordered) order.
// When committing to a line and building its frame, we sort by logical index.
//
// - Special layout-generated items have custom indices that ensure correct
// ordering w.r.t. to each other and normal elements, listed below.
// - Normal items have their position in `p.items` plus the number of special
// reserved prefix indices.
//
// Logical indices must be unique within a line because we use an unstable sort.
const START_HYPHEN_IDX: usize = 0;
const fn logical_item_idx(i: usize) -> usize {
// This won't overflow because the `idx` comes from a vector which is
// limited to `isize::MAX` elements.
i + 1
}
const FALLBACK_TEXT_IDX: usize = usize::MAX - 1;
const END_HYPHEN_IDX: usize = usize::MAX;
/// A layouted line, consisting of a sequence of layouted inline items that are /// A layouted line, consisting of a sequence of layouted inline items that are
/// mostly borrowed from the preparation phase. This type enables you to measure /// mostly borrowed from the preparation phase. This type enables you to measure
/// the size of a line in a range before committing to building the line's /// the size of a line in a range before committing to building the line's
@ -128,7 +147,7 @@ pub fn line<'a>(
p: &'a Preparation, p: &'a Preparation,
range: Range, range: Range,
breakpoint: Breakpoint, breakpoint: Breakpoint,
pred: Option<&Line>, pred: Option<&Line<'a>>,
) -> Line<'a> { ) -> Line<'a> {
// The line's full text. // The line's full text.
let full = &p.text[range.clone()]; let full = &p.text[range.clone()];
@ -152,20 +171,26 @@ pub fn line<'a>(
let trim = range.start + breakpoint.trim(full).len(); let trim = range.start + breakpoint.trim(full).len();
// Collect the items for the line. // Collect the items for the line.
let mut items = collect_items(engine, p, range, trim); let mut items = Items::new();
// Add a hyphen at the line start, if a previous dash should be repeated. // Add a hyphen at the line start, if a previous dash should be repeated.
if pred.is_some_and(|pred| should_repeat_hyphen(pred, full)) if let Some(pred) = pred
&& let Some(shaped) = items.first_text_mut() && pred.dash == Some(Dash::Hard)
&& let Some(base) = pred.items.last_text()
&& should_repeat_hyphen(base.lang, full)
&& let Some(hyphen) = ShapedText::hyphen(engine, p.config.fallback, base, trim)
{ {
shaped.prepend_hyphen(engine, p.config.fallback); items.push(Item::Text(hyphen), START_HYPHEN_IDX);
} }
collect_items(&mut items, engine, p, range, trim);
// Add a hyphen at the line end, if we ended on a soft hyphen. // Add a hyphen at the line end, if we ended on a soft hyphen.
if dash == Some(Dash::Soft) if dash == Some(Dash::Soft)
&& let Some(shaped) = items.last_text_mut() && let Some(base) = items.last_text()
&& let Some(hyphen) = ShapedText::hyphen(engine, p.config.fallback, base, trim)
{ {
shaped.push_hyphen(engine, p.config.fallback); items.push(Item::Text(hyphen), END_HYPHEN_IDX);
} }
// Ensure that there is no weak spacing at the start and end of the line. // Ensure that there is no weak spacing at the start and end of the line.
@ -189,18 +214,18 @@ pub fn line<'a>(
/// We do not factor the `trim` directly into the `range` because we still want /// We do not factor the `trim` directly into the `range` because we still want
/// to keep non-text items after the trim (e.g. tags). /// to keep non-text items after the trim (e.g. tags).
fn collect_items<'a>( fn collect_items<'a>(
items: &mut Items<'a>,
engine: &Engine, engine: &Engine,
p: &'a Preparation, p: &'a Preparation,
range: Range, range: Range,
trim: usize, trim: usize,
) -> Items<'a> { ) {
let mut items = Items::new();
let mut fallback = None; let mut fallback = None;
// Collect the items for each consecutively ordered run. // Collect the items for each consecutively ordered run.
reorder(p, range.clone(), |subrange, rtl| { reorder(p, range.clone(), |subrange, rtl| {
let from = items.len(); let from = items.len();
collect_range(engine, p, subrange, trim, &mut items, &mut fallback); collect_range(engine, p, subrange, trim, items, &mut fallback);
if rtl { if rtl {
items.reorder(from); items.reorder(from);
} }
@ -210,10 +235,8 @@ fn collect_items<'a>(
if !items.iter().any(|item| matches!(item, Item::Text(_))) if !items.iter().any(|item| matches!(item, Item::Text(_)))
&& let Some(fallback) = fallback && let Some(fallback) = fallback
{ {
items.push(fallback, usize::MAX); items.push(fallback, FALLBACK_TEXT_IDX);
} }
items
} }
/// Trims weak spacing from the start and end of the line. /// Trims weak spacing from the start and end of the line.
@ -277,7 +300,9 @@ fn collect_range<'a>(
items: &mut Items<'a>, items: &mut Items<'a>,
fallback: &mut Option<ItemEntry<'a>>, fallback: &mut Option<ItemEntry<'a>>,
) { ) {
for (idx, (subrange, item)) in p.slice(range.clone()) { for (i, (subrange, item)) in p.slice(range.clone()) {
let idx = logical_item_idx(i);
// All non-text items are just kept, they can't be split. // All non-text items are just kept, they can't be split.
let Item::Text(shaped) = item else { let Item::Text(shaped) = item else {
items.push(item, idx); items.push(item, idx);
@ -382,19 +407,10 @@ fn adjust_cj_at_line_end(p: &Preparation, items: &mut Items) {
} }
} }
/// Whether a hyphen should be inserted at the start of the next line. /// Whether a hyphen should be repeated at the start of the line in the given
fn should_repeat_hyphen(pred_line: &Line, text: &str) -> bool { /// language, when the following text is the given one.
// If the predecessor line does not end with a `Dash::Hard`, we shall fn should_repeat_hyphen(lang: Lang, following_text: &str) -> bool {
// not place a hyphen at the start of the next line. match lang {
if pred_line.dash != Some(Dash::Hard) {
return false;
}
// The hyphen should repeat only in the languages that require that feature.
// For more information see the discussion at https://github.com/typst/typst/issues/3235
let Some(Item::Text(shaped)) = pred_line.items.last() else { return false };
match shaped.lang {
// - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3 // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
// - Czech: see https://prirucka.ujc.cas.cz/?id=164 // - Czech: see https://prirucka.ujc.cas.cz/?id=164
// - Croatian: see http://pravopis.hr/pravilo/spojnica/68/ // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
@ -413,7 +429,7 @@ fn should_repeat_hyphen(pred_line: &Line, text: &str) -> bool {
// //
// See § 4.1.1.1.2.e on the "Ortografía de la lengua española" // See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
// https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
Lang::SPANISH => text.chars().next().is_some_and(|c| !c.is_uppercase()), Lang::SPANISH => following_text.chars().next().is_some_and(|c| !c.is_uppercase()),
_ => false, _ => false,
} }
@ -675,7 +691,11 @@ impl<'a> Items<'a> {
self.0.iter().map(|(_, item)| &**item) self.0.iter().map(|(_, item)| &**item)
} }
/// Iterate over the items with indices /// Iterate over the items with the indices that define their logical order.
/// See the docs above `logical_item_idx` for more details.
///
/// Note that this is different from `.iter().enumerate()` which would
/// provide the indices in visual order!
pub fn indexed_iter(&self) -> impl Iterator<Item = &(usize, ItemEntry<'a>)> { pub fn indexed_iter(&self) -> impl Iterator<Item = &(usize, ItemEntry<'a>)> {
self.0.iter() self.0.iter()
} }
@ -690,6 +710,11 @@ impl<'a> Items<'a> {
self.0.last().map(|(_, item)| &**item) self.0.last().map(|(_, item)| &**item)
} }
/// Access the last item, if it is text.
pub fn last_text(&self) -> Option<&ShapedText<'a>> {
self.0.last()?.1.text()
}
/// Access the first item mutably, if it is text. /// Access the first item mutably, if it is text.
pub fn first_text_mut(&mut self) -> Option<&mut ShapedText<'a>> { pub fn first_text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
self.0.first_mut()?.1.text_mut() self.0.first_mut()?.1.text_mut()
@ -706,12 +731,6 @@ impl<'a> Items<'a> {
} }
} }
impl<'a> FromIterator<ItemEntry<'a>> for Items<'a> {
fn from_iter<I: IntoIterator<Item = ItemEntry<'a>>>(iter: I) -> Self {
Self(iter.into_iter().enumerate().collect())
}
}
impl<'a> Deref for Items<'a> { impl<'a> Deref for Items<'a> {
type Target = Vec<(usize, ItemEntry<'a>)>; type Target = Vec<(usize, ItemEntry<'a>)>;

View File

@ -196,14 +196,6 @@ impl ShapedGlyph {
} }
} }
/// A side you can go toward.
enum Side {
/// To the left-hand side.
Left,
/// To the right-hand side.
Right,
}
impl<'a> ShapedText<'a> { impl<'a> ShapedText<'a> {
/// Build the shaped text's frame. /// Build the shaped text's frame.
/// ///
@ -448,27 +440,23 @@ impl<'a> ShapedText<'a> {
} }
} }
/// Push a hyphen to end of the text. /// Creates shaped text containing a hyphen.
pub fn push_hyphen(&mut self, engine: &Engine, fallback: bool) { pub fn hyphen(
self.insert_hyphen(engine, fallback, Side::Right) engine: &Engine,
} fallback: bool,
base: &ShapedText<'a>,
/// Prepend a hyphen to start of the text. pos: usize,
pub fn prepend_hyphen(&mut self, engine: &Engine, fallback: bool) { ) -> Option<Self> {
self.insert_hyphen(engine, fallback, Side::Left)
}
fn insert_hyphen(&mut self, engine: &Engine, fallback: bool, side: Side) {
let world = engine.world; let world = engine.world;
let book = world.book(); let book = world.book();
let fallback_func = if fallback { let fallback_func = if fallback {
Some(|| book.select_fallback(None, self.variant, "-")) Some(|| book.select_fallback(None, base.variant, "-"))
} else { } else {
None None
}; };
let mut chain = families(self.styles) let mut chain = families(base.styles)
.filter(|family| family.covers().is_none_or(|c| c.is_match("-"))) .filter(|family| family.covers().is_none_or(|c| c.is_match("-")))
.map(|family| book.select(family.as_str(), self.variant)) .map(|family| book.select(family.as_str(), base.variant))
.chain(fallback_func.iter().map(|f| f())) .chain(fallback_func.iter().map(|f| f()))
.flatten(); .flatten();
@ -477,37 +465,33 @@ impl<'a> ShapedText<'a> {
let ttf = font.ttf(); let ttf = font.ttf();
let glyph_id = ttf.glyph_index('-')?; let glyph_id = ttf.glyph_index('-')?;
let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?); let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?);
let range = match side { let size = base.styles.resolve(TextElem::size);
Side::Left => self.glyphs.first().map(|g| g.range.start..g.range.start),
Side::Right => self.glyphs.last().map(|g| g.range.end..g.range.end), Some(ShapedText {
} base: pos,
// In the unlikely chance that we hyphenate after an empty line, text: "",
// ensure that the glyph range still falls after self.base so dir: base.dir,
// that subtracting either of the endpoints by self.base doesn't lang: base.lang,
// underflow. See <https://github.com/typst/typst/issues/2283>. region: base.region,
.unwrap_or_else(|| self.base..self.base); styles: base.styles,
let size = self.styles.resolve(TextElem::size); variant: base.variant,
self.width += x_advance.at(size); width: x_advance.at(size),
let glyph = ShapedGlyph { glyphs: Cow::Owned(vec![ShapedGlyph {
font, font,
glyph_id: glyph_id.0, glyph_id: glyph_id.0,
x_advance, x_advance,
x_offset: Em::zero(), x_offset: Em::zero(),
y_offset: Em::zero(), y_offset: Em::zero(),
size, size,
adjustability: Adjustability::default(), adjustability: Adjustability::default(),
range, range: pos..pos,
safe_to_break: true, safe_to_break: true,
c: '-', c: '-',
is_justifiable: false, is_justifiable: false,
script: Script::Common, script: Script::Common,
}; }]),
match side { })
Side::Left => self.glyphs.to_mut().insert(0, glyph), })
Side::Right => self.glyphs.to_mut().push(glyph),
}
Some(())
});
} }
/// Find the subslice of glyphs that represent the given text range if both /// Find the subslice of glyphs that represent the given text range if both

Binary file not shown.

After

Width:  |  Height:  |  Size: 781 B

View File

@ -112,6 +112,12 @@ Tras el estallido de la contienda Ruiz-Giménez fue detenido junto a sus
dos hermanos y puesto bajo custodia por las autoridades republicanas, con dos hermanos y puesto bajo custodia por las autoridades republicanas, con
el objetivo de protegerle de las patrullas de milicianos. el objetivo de protegerle de las patrullas de milicianos.
--- hyphenate-repeat-style ---
// Ensure that a repeated hard hyphen keeps its styles.
#set page(width: 2cm)
#set text(lang: "es")
Hello-#text(red)[world]
--- costs-widow-orphan --- --- costs-widow-orphan ---
#set page(height: 60pt) #set page(height: 60pt)