use std::borrow::Cow; use std::fmt::{self, Debug, Formatter}; use std::str::FromStr; use std::sync::Arc; use az::SaturatingAs; use ecow::EcoString; use rustybuzz::{BufferFlags, ShapePlan, UnicodeBuffer}; use ttf_parser::Tag; use typst_library::engine::Engine; use typst_library::foundations::{Smart, StyleChain}; use typst_library::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size}; use typst_library::text::{ families, features, is_default_ignorable, variant, Font, FontFamily, FontVariant, Glyph, Lang, Region, TextEdgeBounds, TextElem, TextItem, }; use typst_library::World; use typst_utils::SliceExt; use unicode_bidi::{BidiInfo, Level as BidiLevel}; use unicode_script::{Script, UnicodeScript}; use super::{decorate, Item, Range, SpanMapper}; /// The result of shaping text. /// /// This type contains owned or borrowed shaped text runs, which can be /// measured, used to reshape substrings more quickly and converted into a /// frame. #[derive(Clone)] pub struct ShapedText<'a> { /// The start of the text in the full paragraph. pub base: usize, /// The text that was shaped. pub text: &'a str, /// The text direction. pub dir: Dir, /// The text language. pub lang: Lang, /// The text region. pub region: Option, /// The text's style properties. pub styles: StyleChain<'a>, /// The font variant. pub variant: FontVariant, /// The font size. pub size: Abs, /// The width of the text's bounding box. pub width: Abs, /// The shaped glyphs. pub glyphs: Cow<'a, [ShapedGlyph]>, } /// A single glyph resulting from shaping. #[derive(Debug, Clone)] pub struct ShapedGlyph { /// The font the glyph is contained in. pub font: Font, /// The glyph's index in the font. pub glyph_id: u16, /// The advance width of the glyph. pub x_advance: Em, /// The horizontal offset of the glyph. pub x_offset: Em, /// The vertical offset of the glyph. pub y_offset: Em, /// The adjustability of the glyph. pub adjustability: Adjustability, /// The byte range of this glyph's cluster in the full paragraph. A cluster /// is a sequence of one or multiple glyphs that cannot be separated and /// must always be treated as a union. /// /// The range values of the glyphs in a [`ShapedText`] should not overlap /// with each other, and they should be monotonically increasing (for /// left-to-right or top-to-bottom text) or monotonically decreasing (for /// right-to-left or bottom-to-top text). pub range: Range, /// Whether splitting the shaping result before this glyph would yield the /// same results as shaping the parts to both sides of `text_index` /// separately. pub safe_to_break: bool, /// The first char in this glyph's cluster. pub c: char, /// Whether this glyph is justifiable for CJK scripts. pub is_justifiable: bool, /// The script of the glyph. pub script: Script, } #[derive(Debug, Clone, Default)] pub struct Adjustability { /// The left and right stretchability pub stretchability: (Em, Em), /// The left and right shrinkability pub shrinkability: (Em, Em), } impl ShapedGlyph { /// Whether the glyph is a space. pub fn is_space(&self) -> bool { is_space(self.c) } /// Whether the glyph is justifiable. pub fn is_justifiable(&self) -> bool { // GB style is not relevant here. self.is_justifiable } /// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK). pub fn is_cj_script(&self) -> bool { is_cj_script(self.c, self.script) } pub fn is_cjk_punctuation(&self) -> bool { self.is_cjk_left_aligned_punctuation(CjkPunctStyle::Gb) || self.is_cjk_right_aligned_punctuation() || self.is_cjk_center_aligned_punctuation(CjkPunctStyle::Gb) } /// See pub fn is_cjk_left_aligned_punctuation(&self, style: CjkPunctStyle) -> bool { is_cjk_left_aligned_punctuation( self.c, self.x_advance, self.stretchability(), style, ) } /// See pub fn is_cjk_right_aligned_punctuation(&self) -> bool { is_cjk_right_aligned_punctuation(self.c, self.x_advance, self.stretchability()) } /// See pub fn is_cjk_center_aligned_punctuation(&self, style: CjkPunctStyle) -> bool { is_cjk_center_aligned_punctuation(self.c, style) } /// Whether the glyph is a western letter or number. pub fn is_letter_or_number(&self) -> bool { matches!(self.c.script(), Script::Latin | Script::Greek | Script::Cyrillic) || matches!(self.c, '#' | '$' | '%' | '&') || self.c.is_ascii_digit() } pub fn base_adjustability(&self, style: CjkPunctStyle) -> Adjustability { let width = self.x_advance; if self.is_space() { Adjustability { // The number for spaces is from Knuth-Plass' paper stretchability: (Em::zero(), width / 2.0), shrinkability: (Em::zero(), width / 3.0), } } else if self.is_cjk_left_aligned_punctuation(style) { Adjustability { stretchability: (Em::zero(), Em::zero()), shrinkability: (Em::zero(), width / 2.0), } } else if self.is_cjk_right_aligned_punctuation() { Adjustability { stretchability: (Em::zero(), Em::zero()), shrinkability: (width / 2.0, Em::zero()), } } else if self.is_cjk_center_aligned_punctuation(style) { Adjustability { stretchability: (Em::zero(), Em::zero()), shrinkability: (width / 4.0, width / 4.0), } } else { Adjustability::default() } } /// The stretchability of the character. pub fn stretchability(&self) -> (Em, Em) { self.adjustability.stretchability } /// The shrinkability of the character. pub fn shrinkability(&self) -> (Em, Em) { self.adjustability.shrinkability } /// Shrink the width of glyph on the left side. pub fn shrink_left(&mut self, amount: Em) { self.x_offset -= amount; self.x_advance -= amount; self.adjustability.shrinkability.0 -= amount; } /// Shrink the width of glyph on the right side. pub fn shrink_right(&mut self, amount: Em) { self.x_advance -= amount; self.adjustability.shrinkability.1 -= amount; } } /// A side you can go toward. enum Side { /// To the left-hand side. Left, /// To the right-hand side. Right, } impl<'a> ShapedText<'a> { /// Build the shaped text's frame. /// /// The `justification` defines how much extra advance width each /// [justifiable glyph](ShapedGlyph::is_justifiable) will get. pub fn build( &self, engine: &Engine, spans: &SpanMapper, justification_ratio: f64, extra_justification: Abs, ) -> Frame { let (top, bottom) = self.measure(engine); let size = Size::new(self.width, top + bottom); let mut offset = Abs::zero(); let mut frame = Frame::soft(size); frame.set_baseline(top); let shift = TextElem::baseline_in(self.styles); let decos = TextElem::deco_in(self.styles); let fill = TextElem::fill_in(self.styles); let stroke = TextElem::stroke_in(self.styles); let span_offset = TextElem::span_offset_in(self.styles); for ((font, y_offset), group) in self.glyphs.as_ref().group_by_key(|g| (g.font.clone(), g.y_offset)) { let mut range = group[0].range.clone(); for glyph in group { range.start = range.start.min(glyph.range.start); range.end = range.end.max(glyph.range.end); } let pos = Point::new(offset, top + shift - y_offset.at(self.size)); let glyphs: Vec = group .iter() .map(|shaped: &ShapedGlyph| { let adjustability_left = if justification_ratio < 0.0 { shaped.shrinkability().0 } else { shaped.stretchability().0 }; let adjustability_right = if justification_ratio < 0.0 { shaped.shrinkability().1 } else { shaped.stretchability().1 }; let justification_left = adjustability_left * justification_ratio; let mut justification_right = adjustability_right * justification_ratio; if shaped.is_justifiable() { justification_right += Em::from_length(extra_justification, self.size) } frame.size_mut().x += justification_left.at(self.size) + justification_right.at(self.size); // We may not be able to reach the offset completely if // it exceeds u16, but better to have a roughly correct // span offset than nothing. let mut span = spans.span_at(shaped.range.start); span.1 = span.1.saturating_add(span_offset.saturating_as()); // |<---- a Glyph ---->| // -->|ShapedGlyph|<-- // +---+-----------+---+ // | | *********| | // | | * | | // | | * ****| | // | | * *| | // | | *********| | // +---+--+--------+---+ // A B C D // Note A, B, D could be positive, zero, or negative. // A: justification_left // B: ShapedGlyph's x_offset // (though a small part of the glyph may go inside B) // B+C: ShapedGlyph's x_advance // D: justification_right // A+B: Glyph's x_offset // A+B+C+D: Glyph's x_advance Glyph { id: shaped.glyph_id, x_advance: shaped.x_advance + justification_left + justification_right, x_offset: shaped.x_offset + justification_left, range: (shaped.range.start - range.start).saturating_as() ..(shaped.range.end - range.start).saturating_as(), span, } }) .collect(); let item = TextItem { font, size: self.size, lang: self.lang, region: self.region, fill: fill.clone(), stroke: stroke.clone().map(|s| s.unwrap_or_default()), text: self.text[range.start - self.base..range.end - self.base].into(), glyphs, }; let width = item.width(); if decos.is_empty() { frame.push(pos, FrameItem::Text(item)); } else { // Apply line decorations. frame.push(pos, FrameItem::Text(item.clone())); for deco in &decos { decorate(&mut frame, deco, &item, width, shift, pos); } } offset += width; } frame } /// Measure the top and bottom extent of this text. pub fn measure(&self, engine: &Engine) -> (Abs, Abs) { let mut top = Abs::zero(); let mut bottom = Abs::zero(); let top_edge = TextElem::top_edge_in(self.styles); let bottom_edge = TextElem::bottom_edge_in(self.styles); // Expand top and bottom by reading the font's vertical metrics. let mut expand = |font: &Font, bounds: TextEdgeBounds| { let (t, b) = font.edges(top_edge, bottom_edge, self.size, bounds); top.set_max(t); bottom.set_max(b); }; if self.glyphs.is_empty() { // When there are no glyphs, we just use the vertical metrics of the // first available font. let world = engine.world; for family in families(self.styles) { if let Some(font) = world .book() .select(family.as_str(), self.variant) .and_then(|id| world.font(id)) { expand(&font, TextEdgeBounds::Zero); break; } } } else { for g in self.glyphs.iter() { expand(&g.font, TextEdgeBounds::Glyph(g.glyph_id)); } } (top, bottom) } /// How many glyphs are in the text where we can insert additional /// space when encountering underfull lines. pub fn justifiables(&self) -> usize { self.glyphs.iter().filter(|g| g.is_justifiable()).count() } /// Whether the last glyph is a CJK character which should not be justified /// on line end. pub fn cjk_justifiable_at_last(&self) -> bool { self.glyphs .last() .map(|g| g.is_cj_script() || g.is_cjk_punctuation()) .unwrap_or(false) } /// The stretchability of the text. pub fn stretchability(&self) -> Abs { self.glyphs .iter() .map(|g| g.stretchability().0 + g.stretchability().1) .sum::() .at(self.size) } /// The shrinkability of the text pub fn shrinkability(&self) -> Abs { self.glyphs .iter() .map(|g| g.shrinkability().0 + g.shrinkability().1) .sum::() .at(self.size) } /// Reshape a range of the shaped text, reusing information from this /// shaping process if possible. /// /// The text `range` is relative to the whole paragraph. pub fn reshape(&'a self, engine: &Engine, text_range: Range) -> ShapedText<'a> { let text = &self.text[text_range.start - self.base..text_range.end - self.base]; if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) { #[cfg(debug_assertions)] assert_all_glyphs_in_range(glyphs, text, text_range.clone()); Self { base: text_range.start, text, dir: self.dir, lang: self.lang, region: self.region, styles: self.styles, size: self.size, variant: self.variant, width: glyphs.iter().map(|g| g.x_advance).sum::().at(self.size), glyphs: Cow::Borrowed(glyphs), } } else { shape( engine, text_range.start, text, self.styles, self.dir, self.lang, self.region, ) } } /// Derive an empty text run with the same properties as this one. pub fn empty(&self) -> Self { Self { text: "", width: Abs::zero(), glyphs: Cow::Borrowed(&[]), ..*self } } /// Push a hyphen to end of the text. pub fn push_hyphen(&mut self, engine: &Engine, fallback: bool) { self.insert_hyphen(engine, fallback, Side::Right) } /// Prepend a hyphen to start of the text. pub fn prepend_hyphen(&mut self, engine: &Engine, fallback: bool) { self.insert_hyphen(engine, fallback, Side::Left) } fn insert_hyphen(&mut self, engine: &Engine, fallback: bool, side: Side) { let world = engine.world; let book = world.book(); let fallback_func = if fallback { Some(|| book.select_fallback(None, self.variant, "-")) } else { None }; let mut chain = families(self.styles) .filter(|family| family.covers().map_or(true, |c| c.is_match("-"))) .map(|family| book.select(family.as_str(), self.variant)) .chain(fallback_func.iter().map(|f| f())) .flatten(); chain.find_map(|id| { let font = world.font(id)?; let ttf = font.ttf(); let glyph_id = ttf.glyph_index('-')?; let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?); let range = match side { Side::Left => self.glyphs.first().map(|g| g.range.start..g.range.start), Side::Right => self.glyphs.last().map(|g| g.range.end..g.range.end), } // In the unlikely chance that we hyphenate after an empty line, // ensure that the glyph range still falls after self.base so // that subtracting either of the endpoints by self.base doesn't // underflow. See . .unwrap_or_else(|| self.base..self.base); self.width += x_advance.at(self.size); let glyph = ShapedGlyph { font, glyph_id: glyph_id.0, x_advance, x_offset: Em::zero(), y_offset: Em::zero(), adjustability: Adjustability::default(), range, safe_to_break: true, c: '-', is_justifiable: false, script: Script::Common, }; match side { Side::Left => self.glyphs.to_mut().insert(0, glyph), Side::Right => self.glyphs.to_mut().push(glyph), } Some(()) }); } /// Find the subslice of glyphs that represent the given text range if both /// sides are safe to break. fn slice_safe_to_break(&self, text_range: Range) -> Option<&[ShapedGlyph]> { let Range { mut start, mut end } = text_range; if !self.dir.is_positive() { std::mem::swap(&mut start, &mut end); } let left = self.find_safe_to_break(start)?; let right = self.find_safe_to_break(end)?; Some(&self.glyphs[left..right]) } /// Find the glyph offset matching the text index that is most towards the /// start of the text and safe-to-break. fn find_safe_to_break(&self, text_index: usize) -> Option { let ltr = self.dir.is_positive(); // Handle edge cases. let len = self.glyphs.len(); if text_index == self.base { return Some(if ltr { 0 } else { len }); } else if text_index == self.base + self.text.len() { return Some(if ltr { len } else { 0 }); } // Find any glyph with the text index. let found = self.glyphs.binary_search_by(|g: &ShapedGlyph| { let ordering = g.range.start.cmp(&text_index); if ltr { ordering } else { ordering.reverse() } }); let mut idx = match found { Ok(idx) => idx, Err(idx) => { // Handle the special case where we break before a '\n' // // For example: (assume `a` is a CJK character with three bytes) // text: " a \n b " // index: 0 1 2 3 4 5 // text_index: ^ // glyphs: 0 . 1 // // We will get found = Err(1), because '\n' does not have a // glyph. But it's safe to break here. Thus the following // condition: // - glyphs[0].end == text_index == 3 // - text[3] == '\n' return (idx > 0 && self.glyphs[idx - 1].range.end == text_index && self.text[text_index - self.base..].starts_with('\n')) .then_some(idx); } }; // Search for the start-most glyph with the text index. This means // we take empty range glyphs at the start and leave those at the end // for the next line. let dec = if ltr { usize::checked_sub } else { usize::checked_add }; while let Some(next) = dec(idx, 1) { if self.glyphs.get(next).map_or(true, |g| g.range.start != text_index) { break; } idx = next; } // RTL needs offset one because the left side of the range should be // exclusive and the right side inclusive, contrary to the normal // behaviour of ranges. self.glyphs[idx].safe_to_break.then_some(idx + usize::from(!ltr)) } } impl Debug for ShapedText<'_> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { self.text.fmt(f) } } /// Group a range of text by BiDi level and script, shape the runs and generate /// items for them. pub fn shape_range<'a>( items: &mut Vec<(Range, Item<'a>)>, engine: &Engine, text: &'a str, bidi: &BidiInfo<'a>, range: Range, styles: StyleChain<'a>, ) { let script = TextElem::script_in(styles); let lang = TextElem::lang_in(styles); let region = TextElem::region_in(styles); let mut process = |range: Range, level: BidiLevel| { let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL }; let shaped = shape(engine, range.start, &text[range.clone()], styles, dir, lang, region); items.push((range, Item::Text(shaped))); }; let mut prev_level = BidiLevel::ltr(); let mut prev_script = Script::Unknown; let mut cursor = range.start; // Group by embedding level and script. If the text's script is explicitly // set (rather than inferred from the glyphs), we keep the script at an // unchanging `Script::Unknown` so that only level changes cause breaks. for i in range.clone() { if !text.is_char_boundary(i) { continue; } let level = bidi.levels[i]; let curr_script = match script { Smart::Auto => { text[i..].chars().next().map_or(Script::Unknown, |c| c.script()) } Smart::Custom(_) => Script::Unknown, }; if level != prev_level || !is_compatible(curr_script, prev_script) { if cursor < i { process(cursor..i, prev_level); } cursor = i; prev_level = level; prev_script = curr_script; } else if is_generic_script(prev_script) { prev_script = curr_script; } } process(cursor..range.end, prev_level); } /// Whether this is not a specific script. fn is_generic_script(script: Script) -> bool { matches!(script, Script::Unknown | Script::Common | Script::Inherited) } /// Whether these script can be part of the same shape run. fn is_compatible(a: Script, b: Script) -> bool { is_generic_script(a) || is_generic_script(b) || a == b } /// Shape text into [`ShapedText`]. #[allow(clippy::too_many_arguments)] fn shape<'a>( engine: &Engine, base: usize, text: &'a str, styles: StyleChain<'a>, dir: Dir, lang: Lang, region: Option, ) -> ShapedText<'a> { let size = TextElem::size_in(styles); let mut ctx = ShapingContext { engine, size, glyphs: vec![], used: vec![], styles, variant: variant(styles), features: features(styles), fallback: TextElem::fallback_in(styles), dir, }; if !text.is_empty() { shape_segment(&mut ctx, base, text, families(styles)); } track_and_space(&mut ctx); calculate_adjustability(&mut ctx, lang, region); #[cfg(debug_assertions)] assert_all_glyphs_in_range(&ctx.glyphs, text, base..(base + text.len())); #[cfg(debug_assertions)] assert_glyph_ranges_in_order(&ctx.glyphs, dir); ShapedText { base, text, dir, lang, region, styles, variant: ctx.variant, size, width: ctx.glyphs.iter().map(|g| g.x_advance).sum::().at(size), glyphs: Cow::Owned(ctx.glyphs), } } /// Holds shaping results and metadata common to all shaped segments. struct ShapingContext<'a, 'v> { engine: &'a Engine<'v>, glyphs: Vec, used: Vec, styles: StyleChain<'a>, size: Abs, variant: FontVariant, features: Vec, fallback: bool, dir: Dir, } /// Shape text with font fallback using the `families` iterator. fn shape_segment<'a>( ctx: &mut ShapingContext, base: usize, text: &str, mut families: impl Iterator + Clone, ) { // Don't try shaping newlines, tabs, or default ignorables. if text .chars() .all(|c| c == '\n' || c == '\t' || is_default_ignorable(c)) { return; } // Find the next available family. let world = ctx.engine.world; let book = world.book(); let mut selection = None; let mut covers = None; for family in families.by_ref() { selection = book .select(family.as_str(), ctx.variant) .and_then(|id| world.font(id)) .filter(|font| !ctx.used.contains(font)); if selection.is_some() { covers = family.covers(); break; } } // Do font fallback if the families are exhausted and fallback is enabled. if selection.is_none() && ctx.fallback { let first = ctx.used.first().map(Font::info); selection = book .select_fallback(first, ctx.variant, text) .and_then(|id| world.font(id)) .filter(|font| !ctx.used.contains(font)); } // Extract the font id or shape notdef glyphs if we couldn't find any font. let Some(font) = selection else { if let Some(font) = ctx.used.first().cloned() { shape_tofus(ctx, base, text, font); } return; }; ctx.used.push(font.clone()); // Fill the buffer with our text. let mut buffer = UnicodeBuffer::new(); buffer.push_str(text); buffer.set_language(language(ctx.styles)); if let Some(script) = TextElem::script_in(ctx.styles).custom().and_then(|script| { rustybuzz::Script::from_iso15924_tag(Tag::from_bytes(script.as_bytes())) }) { buffer.set_script(script) } buffer.set_direction(match ctx.dir { Dir::LTR => rustybuzz::Direction::LeftToRight, Dir::RTL => rustybuzz::Direction::RightToLeft, _ => unimplemented!("vertical text layout"), }); buffer.guess_segment_properties(); // By default, Harfbuzz will create zero-width space glyphs for default // ignorables. This is probably useful for GUI apps that want noticeable // effects on the cursor for those, but for us it's not useful and hurts // text extraction. buffer.set_flags(BufferFlags::REMOVE_DEFAULT_IGNORABLES); // Prepare the shape plan. This plan depends on direction, script, language, // and features, but is independent from the text and can thus be memoized. let plan = create_shape_plan( &font, buffer.direction(), buffer.script(), buffer.language().as_ref(), &ctx.features, ); // Shape! let buffer = rustybuzz::shape_with_plan(font.rusty(), &plan, buffer); let infos = buffer.glyph_infos(); let pos = buffer.glyph_positions(); let ltr = ctx.dir.is_positive(); // Whether the character at the given offset is covered by the coverage. let is_covered = |offset| { let end = text[offset..] .char_indices() .nth(1) .map(|(i, _)| offset + i) .unwrap_or(text.len()); covers.map_or(true, |cov| cov.is_match(&text[offset..end])) }; // Collect the shaped glyphs, doing fallback and shaping parts again with // the next font if necessary. let mut i = 0; while i < infos.len() { let info = &infos[i]; let cluster = info.cluster as usize; // Add the glyph to the shaped output. if info.glyph_id != 0 && is_covered(cluster) { // Determine the text range of the glyph. let start = base + cluster; let end = base + if ltr { i.checked_add(1) } else { i.checked_sub(1) } .and_then(|last| infos.get(last)) .map_or(text.len(), |info| info.cluster as usize); let c = text[cluster..].chars().next().unwrap(); let script = c.script(); let x_advance = font.to_em(pos[i].x_advance); ctx.glyphs.push(ShapedGlyph { font: font.clone(), glyph_id: info.glyph_id as u16, // TODO: Don't ignore y_advance. x_advance, x_offset: font.to_em(pos[i].x_offset), y_offset: font.to_em(pos[i].y_offset), adjustability: Adjustability::default(), range: start..end, safe_to_break: !info.unsafe_to_break(), c, is_justifiable: is_justifiable( c, script, x_advance, Adjustability::default().stretchability, ), script, }); } else { // First, search for the end of the tofu sequence. let k = i; while infos.get(i + 1).is_some_and(|info| { info.glyph_id == 0 || !is_covered(info.cluster as usize) }) { i += 1; } // Then, determine the start and end text index for the tofu // sequence. // // Examples: // Everything is shown in visual order. Tofus are written as "_". // We want to find out that the tofus span the text `2..6`. // Note that the clusters are longer than 1 char. // // Left-to-right: // Text: h a l i h a l l o // Glyphs: A _ _ C E // Clusters: 0 2 4 6 8 // k=1 i=2 // // Right-to-left: // Text: O L L A H I L A H // Glyphs: E C _ _ A // Clusters: 8 6 4 2 0 // k=2 i=3 let start = infos[if ltr { k } else { i }].cluster as usize; let end = if ltr { i.checked_add(1) } else { k.checked_sub(1) } .and_then(|last| infos.get(last)) .map_or(text.len(), |info| info.cluster as usize); // Trim half-baked cluster. let remove = base + start..base + end; while ctx.glyphs.last().is_some_and(|g| remove.contains(&g.range.start)) { ctx.glyphs.pop(); } // Recursively shape the tofu sequence with the next family. shape_segment(ctx, base + start, &text[start..end], families.clone()); } i += 1; } ctx.used.pop(); } /// Create a shape plan. #[comemo::memoize] fn create_shape_plan( font: &Font, direction: rustybuzz::Direction, script: rustybuzz::Script, language: Option<&rustybuzz::Language>, features: &[rustybuzz::Feature], ) -> Arc { Arc::new(rustybuzz::ShapePlan::new( font.rusty(), direction, Some(script), language, features, )) } /// Shape the text with tofus from the given font. fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) { let x_advance = font.advance(0).unwrap_or_default(); let add_glyph = |(cluster, c): (usize, char)| { let start = base + cluster; let end = start + c.len_utf8(); let script = c.script(); ctx.glyphs.push(ShapedGlyph { font: font.clone(), glyph_id: 0, x_advance, x_offset: Em::zero(), y_offset: Em::zero(), adjustability: Adjustability::default(), range: start..end, safe_to_break: true, c, is_justifiable: is_justifiable( c, script, x_advance, Adjustability::default().stretchability, ), script, }); }; if ctx.dir.is_positive() { text.char_indices().for_each(add_glyph); } else { text.char_indices().rev().for_each(add_glyph); } } /// Apply tracking and spacing to the shaped glyphs. fn track_and_space(ctx: &mut ShapingContext) { let tracking = Em::from_length(TextElem::tracking_in(ctx.styles), ctx.size); let spacing = TextElem::spacing_in(ctx.styles).map(|abs| Em::from_length(abs, ctx.size)); let mut glyphs = ctx.glyphs.iter_mut().peekable(); while let Some(glyph) = glyphs.next() { // Make non-breaking space same width as normal space. if glyph.c == '\u{00A0}' { glyph.x_advance -= nbsp_delta(&glyph.font).unwrap_or_default(); } if glyph.is_space() { glyph.x_advance = spacing.relative_to(glyph.x_advance); } if glyphs .peek() .is_some_and(|next| glyph.range.start != next.range.start) { glyph.x_advance += tracking; } } } /// Calculate stretchability and shrinkability of each glyph, /// and CJK punctuation adjustments according to Chinese Layout Requirements. fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option) { let style = cjk_punct_style(lang, region); for glyph in &mut ctx.glyphs { glyph.adjustability = glyph.base_adjustability(style); } let mut glyphs = ctx.glyphs.iter_mut().peekable(); while let Some(glyph) = glyphs.next() { // CNS style needs not further adjustment. if glyph.is_cjk_punctuation() && matches!(style, CjkPunctStyle::Cns) { continue; } // Now we apply consecutive punctuation adjustment, specified in Chinese Layout. // Requirements, section 3.1.6.1 Punctuation Adjustment Space, and Japanese Layout // Requirements, section 3.1 Line Composition Rules for Punctuation Marks let Some(next) = glyphs.peek_mut() else { continue }; let width = glyph.x_advance; let delta = width / 2.0; if glyph.is_cjk_punctuation() && next.is_cjk_punctuation() && (glyph.shrinkability().1 + next.shrinkability().0) >= delta { let left_delta = glyph.shrinkability().1.min(delta); glyph.shrink_right(left_delta); next.shrink_left(delta - left_delta); } } } /// Difference between non-breaking and normal space. fn nbsp_delta(font: &Font) -> Option { let space = font.ttf().glyph_index(' ')?.0; let nbsp = font.ttf().glyph_index('\u{00A0}')?.0; Some(font.advance(nbsp)? - font.advance(space)?) } /// Process the language and region of a style chain into a /// rustybuzz-compatible BCP 47 language. fn language(styles: StyleChain) -> rustybuzz::Language { let mut bcp: EcoString = TextElem::lang_in(styles).as_str().into(); if let Some(region) = TextElem::region_in(styles) { bcp.push('-'); bcp.push_str(region.as_str()); } rustybuzz::Language::from_str(&bcp).unwrap() } /// Returns true if all glyphs in `glyphs` have ranges within the range `range`. #[cfg(debug_assertions)] fn assert_all_glyphs_in_range(glyphs: &[ShapedGlyph], text: &str, range: Range) { if glyphs .iter() .any(|g| g.range.start < range.start || g.range.end > range.end) { panic!("one or more glyphs in {text:?} fell out of range"); } } /// Asserts that the ranges of `glyphs` is in the proper order according to /// `dir`. /// /// This asserts instead of returning a bool in order to provide a more /// informative message when the invariant is violated. #[cfg(debug_assertions)] fn assert_glyph_ranges_in_order(glyphs: &[ShapedGlyph], dir: Dir) { if glyphs.is_empty() { return; } // Iterator::is_sorted and friends are unstable as of Rust 1.70.0 for i in 0..(glyphs.len() - 1) { let a = &glyphs[i]; let b = &glyphs[i + 1]; let ord = a.range.start.cmp(&b.range.start); let ord = if dir.is_positive() { ord } else { ord.reverse() }; if ord == std::cmp::Ordering::Greater { panic!( "glyph ranges should be monotonically {}, \ but found glyphs out of order:\n\n\ first: {a:#?}\nsecond: {b:#?}", if dir.is_positive() { "increasing" } else { "decreasing" }, ); } } } // The CJK punctuation that can appear at the beginning or end of a line. pub const BEGIN_PUNCT_PAT: &[char] = &['“', '‘', '《', '〈', '(', '『', '「', '【', '〖', '〔', '[', '{']; pub const END_PUNCT_PAT: &[char] = &[ '”', '’', ',', '.', '。', '、', ':', ';', '》', '〉', ')', '』', '」', '】', '〗', '〕', ']', '}', '?', '!', ]; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum CjkPunctStyle { /// Standard GB/T 15834-2011, used mostly in mainland China. Gb, /// Standard by Taiwan Ministry of Education, used in Taiwan and Hong Kong. Cns, /// Standard JIS X 4051, used in Japan. Jis, } pub fn cjk_punct_style(lang: Lang, region: Option) -> CjkPunctStyle { match (lang, region.as_ref().map(Region::as_str)) { (Lang::CHINESE, Some("TW" | "HK")) => CjkPunctStyle::Cns, (Lang::JAPANESE, _) => CjkPunctStyle::Jis, // zh-CN, zh-SG, zh-MY use GB-style punctuation, _ => CjkPunctStyle::Gb, } } /// Whether the glyph is a space. fn is_space(c: char) -> bool { matches!(c, ' ' | '\u{00A0}' | ' ') } /// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK). pub fn is_of_cj_script(c: char) -> bool { is_cj_script(c, c.script()) } /// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK). /// The function is dedicated to typesetting Chinese or Japanese, which do not /// have spaces between words, so K is not checked here. fn is_cj_script(c: char, script: Script) -> bool { use Script::*; // U+30FC: Katakana-Hiragana Prolonged Sound Mark matches!(script, Hiragana | Katakana | Han) || c == '\u{30FC}' } /// See fn is_cjk_left_aligned_punctuation( c: char, x_advance: Em, stretchability: (Em, Em), style: CjkPunctStyle, ) -> bool { use CjkPunctStyle::*; // CJK quotation marks shares codepoints with latin quotation marks. // But only the CJK ones have full width. if matches!(c, '”' | '’') && x_advance + stretchability.1 == Em::one() { return true; } if matches!(style, Gb | Jis) && matches!(c, ',' | '。' | '.' | '、' | ':' | ';') { return true; } if matches!(style, Gb) && matches!(c, '?' | '!') { // In GB style, exclamations and question marks are also left aligned // and can be adjusted. Note that they are not adjustable in other // styles. return true; } // See appendix A.3 https://www.w3.org/TR/clreq/#tables_of_chinese_punctuation_marks matches!(c, '》' | ')' | '』' | '」' | '】' | '〗' | '〕' | '〉' | ']' | '}') } /// See fn is_cjk_right_aligned_punctuation( c: char, x_advance: Em, stretchability: (Em, Em), ) -> bool { // CJK quotation marks shares codepoints with latin quotation marks. // But only the CJK ones have full width. if matches!(c, '“' | '‘') && x_advance + stretchability.0 == Em::one() { return true; } // See appendix A.3 https://www.w3.org/TR/clreq/#tables_of_chinese_punctuation_marks matches!(c, '《' | '(' | '『' | '「' | '【' | '〖' | '〔' | '〈' | '[' | '{') } /// See fn is_cjk_center_aligned_punctuation(c: char, style: CjkPunctStyle) -> bool { if matches!(style, CjkPunctStyle::Cns) && matches!(c, ',' | '。' | '.' | '、' | ':' | ';') { return true; } // U+30FB: Katakana Middle Dot // U+00B7: Middle Dot matches!(c, '\u{30FB}' | '\u{00B7}') } /// Whether the glyph is justifiable. /// /// Quotations in latin script and CJK are unfortunately the same codepoint /// (U+2018, U+2019, U+201C, U+201D), but quotations in Chinese must be /// fullwidth. This heuristics can therefore fail for monospace latin fonts. /// However, since monospace fonts are usually not justified this edge case /// should be rare enough. fn is_justifiable( c: char, script: Script, x_advance: Em, stretchability: (Em, Em), ) -> bool { // punctuation style is not relevant here. let style = CjkPunctStyle::Gb; is_space(c) || is_cj_script(c, script) || is_cjk_left_aligned_punctuation(c, x_advance, stretchability, style) || is_cjk_right_aligned_punctuation(c, x_advance, stretchability) || is_cjk_center_aligned_punctuation(c, style) }