mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
1196 lines
41 KiB
Rust
1196 lines
41 KiB
Rust
use std::borrow::Cow;
|
||
use std::fmt::{self, Debug, Formatter};
|
||
use std::str::FromStr;
|
||
use std::sync::Arc;
|
||
|
||
use az::SaturatingAs;
|
||
use ecow::EcoString;
|
||
use rustybuzz::{BufferFlags, ShapePlan, UnicodeBuffer};
|
||
use ttf_parser::Tag;
|
||
use typst_library::engine::Engine;
|
||
use typst_library::foundations::{Smart, StyleChain};
|
||
use typst_library::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size};
|
||
use typst_library::text::{
|
||
families, features, is_default_ignorable, variant, Font, FontFamily, FontVariant,
|
||
Glyph, Lang, Region, TextEdgeBounds, TextElem, TextItem,
|
||
};
|
||
use typst_library::World;
|
||
use typst_utils::SliceExt;
|
||
use unicode_bidi::{BidiInfo, Level as BidiLevel};
|
||
use unicode_script::{Script, UnicodeScript};
|
||
|
||
use super::{decorate, Item, Range, SpanMapper};
|
||
|
||
/// The result of shaping text.
|
||
///
|
||
/// This type contains owned or borrowed shaped text runs, which can be
|
||
/// measured, used to reshape substrings more quickly and converted into a
|
||
/// frame.
|
||
#[derive(Clone)]
|
||
pub struct ShapedText<'a> {
|
||
/// The start of the text in the full paragraph.
|
||
pub base: usize,
|
||
/// The text that was shaped.
|
||
pub text: &'a str,
|
||
/// The text direction.
|
||
pub dir: Dir,
|
||
/// The text language.
|
||
pub lang: Lang,
|
||
/// The text region.
|
||
pub region: Option<Region>,
|
||
/// The text's style properties.
|
||
pub styles: StyleChain<'a>,
|
||
/// The font variant.
|
||
pub variant: FontVariant,
|
||
/// The font size.
|
||
pub size: Abs,
|
||
/// The width of the text's bounding box.
|
||
pub width: Abs,
|
||
/// The shaped glyphs.
|
||
pub glyphs: Cow<'a, [ShapedGlyph]>,
|
||
}
|
||
|
||
/// A single glyph resulting from shaping.
|
||
#[derive(Debug, Clone)]
|
||
pub struct ShapedGlyph {
|
||
/// The font the glyph is contained in.
|
||
pub font: Font,
|
||
/// The glyph's index in the font.
|
||
pub glyph_id: u16,
|
||
/// The advance width of the glyph.
|
||
pub x_advance: Em,
|
||
/// The horizontal offset of the glyph.
|
||
pub x_offset: Em,
|
||
/// The vertical offset of the glyph.
|
||
pub y_offset: Em,
|
||
/// The adjustability of the glyph.
|
||
pub adjustability: Adjustability,
|
||
/// The byte range of this glyph's cluster in the full paragraph. A cluster
|
||
/// is a sequence of one or multiple glyphs that cannot be separated and
|
||
/// must always be treated as a union.
|
||
///
|
||
/// The range values of the glyphs in a [`ShapedText`] should not overlap
|
||
/// with each other, and they should be monotonically increasing (for
|
||
/// left-to-right or top-to-bottom text) or monotonically decreasing (for
|
||
/// right-to-left or bottom-to-top text).
|
||
pub range: Range,
|
||
/// Whether splitting the shaping result before this glyph would yield the
|
||
/// same results as shaping the parts to both sides of `text_index`
|
||
/// separately.
|
||
pub safe_to_break: bool,
|
||
/// The first char in this glyph's cluster.
|
||
pub c: char,
|
||
/// Whether this glyph is justifiable for CJK scripts.
|
||
pub is_justifiable: bool,
|
||
/// The script of the glyph.
|
||
pub script: Script,
|
||
}
|
||
|
||
#[derive(Debug, Clone, Default)]
|
||
pub struct Adjustability {
|
||
/// The left and right stretchability
|
||
pub stretchability: (Em, Em),
|
||
/// The left and right shrinkability
|
||
pub shrinkability: (Em, Em),
|
||
}
|
||
|
||
impl ShapedGlyph {
|
||
/// Whether the glyph is a space.
|
||
pub fn is_space(&self) -> bool {
|
||
is_space(self.c)
|
||
}
|
||
|
||
/// Whether the glyph is justifiable.
|
||
pub fn is_justifiable(&self) -> bool {
|
||
// GB style is not relevant here.
|
||
self.is_justifiable
|
||
}
|
||
|
||
/// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK).
|
||
pub fn is_cj_script(&self) -> bool {
|
||
is_cj_script(self.c, self.script)
|
||
}
|
||
|
||
pub fn is_cjk_punctuation(&self) -> bool {
|
||
self.is_cjk_left_aligned_punctuation(CjkPunctStyle::Gb)
|
||
|| self.is_cjk_right_aligned_punctuation()
|
||
|| self.is_cjk_center_aligned_punctuation(CjkPunctStyle::Gb)
|
||
}
|
||
|
||
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
|
||
pub fn is_cjk_left_aligned_punctuation(&self, style: CjkPunctStyle) -> bool {
|
||
is_cjk_left_aligned_punctuation(
|
||
self.c,
|
||
self.x_advance,
|
||
self.stretchability(),
|
||
style,
|
||
)
|
||
}
|
||
|
||
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
|
||
pub fn is_cjk_right_aligned_punctuation(&self) -> bool {
|
||
is_cjk_right_aligned_punctuation(self.c, self.x_advance, self.stretchability())
|
||
}
|
||
|
||
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
|
||
pub fn is_cjk_center_aligned_punctuation(&self, style: CjkPunctStyle) -> bool {
|
||
is_cjk_center_aligned_punctuation(self.c, style)
|
||
}
|
||
|
||
/// Whether the glyph is a western letter or number.
|
||
pub fn is_letter_or_number(&self) -> bool {
|
||
matches!(self.c.script(), Script::Latin | Script::Greek | Script::Cyrillic)
|
||
|| matches!(self.c, '#' | '$' | '%' | '&')
|
||
|| self.c.is_ascii_digit()
|
||
}
|
||
|
||
pub fn base_adjustability(&self, style: CjkPunctStyle) -> Adjustability {
|
||
let width = self.x_advance;
|
||
if self.is_space() {
|
||
Adjustability {
|
||
// The number for spaces is from Knuth-Plass' paper
|
||
stretchability: (Em::zero(), width / 2.0),
|
||
shrinkability: (Em::zero(), width / 3.0),
|
||
}
|
||
} else if self.is_cjk_left_aligned_punctuation(style) {
|
||
Adjustability {
|
||
stretchability: (Em::zero(), Em::zero()),
|
||
shrinkability: (Em::zero(), width / 2.0),
|
||
}
|
||
} else if self.is_cjk_right_aligned_punctuation() {
|
||
Adjustability {
|
||
stretchability: (Em::zero(), Em::zero()),
|
||
shrinkability: (width / 2.0, Em::zero()),
|
||
}
|
||
} else if self.is_cjk_center_aligned_punctuation(style) {
|
||
Adjustability {
|
||
stretchability: (Em::zero(), Em::zero()),
|
||
shrinkability: (width / 4.0, width / 4.0),
|
||
}
|
||
} else {
|
||
Adjustability::default()
|
||
}
|
||
}
|
||
|
||
/// The stretchability of the character.
|
||
pub fn stretchability(&self) -> (Em, Em) {
|
||
self.adjustability.stretchability
|
||
}
|
||
|
||
/// The shrinkability of the character.
|
||
pub fn shrinkability(&self) -> (Em, Em) {
|
||
self.adjustability.shrinkability
|
||
}
|
||
|
||
/// Shrink the width of glyph on the left side.
|
||
pub fn shrink_left(&mut self, amount: Em) {
|
||
self.x_offset -= amount;
|
||
self.x_advance -= amount;
|
||
self.adjustability.shrinkability.0 -= amount;
|
||
}
|
||
|
||
/// Shrink the width of glyph on the right side.
|
||
pub fn shrink_right(&mut self, amount: Em) {
|
||
self.x_advance -= amount;
|
||
self.adjustability.shrinkability.1 -= amount;
|
||
}
|
||
}
|
||
|
||
/// A side you can go toward.
|
||
enum Side {
|
||
/// To the left-hand side.
|
||
Left,
|
||
/// To the right-hand side.
|
||
Right,
|
||
}
|
||
|
||
impl<'a> ShapedText<'a> {
|
||
/// Build the shaped text's frame.
|
||
///
|
||
/// The `justification` defines how much extra advance width each
|
||
/// [justifiable glyph](ShapedGlyph::is_justifiable) will get.
|
||
pub fn build(
|
||
&self,
|
||
engine: &Engine,
|
||
spans: &SpanMapper,
|
||
justification_ratio: f64,
|
||
extra_justification: Abs,
|
||
) -> Frame {
|
||
let (top, bottom) = self.measure(engine);
|
||
let size = Size::new(self.width, top + bottom);
|
||
|
||
let mut offset = Abs::zero();
|
||
let mut frame = Frame::soft(size);
|
||
frame.set_baseline(top);
|
||
|
||
let shift = TextElem::baseline_in(self.styles);
|
||
let decos = TextElem::deco_in(self.styles);
|
||
let fill = TextElem::fill_in(self.styles);
|
||
let stroke = TextElem::stroke_in(self.styles);
|
||
let span_offset = TextElem::span_offset_in(self.styles);
|
||
|
||
for ((font, y_offset), group) in
|
||
self.glyphs.as_ref().group_by_key(|g| (g.font.clone(), g.y_offset))
|
||
{
|
||
let mut range = group[0].range.clone();
|
||
for glyph in group {
|
||
range.start = range.start.min(glyph.range.start);
|
||
range.end = range.end.max(glyph.range.end);
|
||
}
|
||
|
||
let pos = Point::new(offset, top + shift - y_offset.at(self.size));
|
||
let glyphs: Vec<Glyph> = group
|
||
.iter()
|
||
.map(|shaped: &ShapedGlyph| {
|
||
let adjustability_left = if justification_ratio < 0.0 {
|
||
shaped.shrinkability().0
|
||
} else {
|
||
shaped.stretchability().0
|
||
};
|
||
let adjustability_right = if justification_ratio < 0.0 {
|
||
shaped.shrinkability().1
|
||
} else {
|
||
shaped.stretchability().1
|
||
};
|
||
|
||
let justification_left = adjustability_left * justification_ratio;
|
||
let mut justification_right =
|
||
adjustability_right * justification_ratio;
|
||
if shaped.is_justifiable() {
|
||
justification_right +=
|
||
Em::from_length(extra_justification, self.size)
|
||
}
|
||
|
||
frame.size_mut().x += justification_left.at(self.size)
|
||
+ justification_right.at(self.size);
|
||
|
||
// We may not be able to reach the offset completely if
|
||
// it exceeds u16, but better to have a roughly correct
|
||
// span offset than nothing.
|
||
let mut span = spans.span_at(shaped.range.start);
|
||
span.1 = span.1.saturating_add(span_offset.saturating_as());
|
||
|
||
// |<---- a Glyph ---->|
|
||
// -->|ShapedGlyph|<--
|
||
// +---+-----------+---+
|
||
// | | *********| |
|
||
// | | * | |
|
||
// | | * ****| |
|
||
// | | * *| |
|
||
// | | *********| |
|
||
// +---+--+--------+---+
|
||
// A B C D
|
||
// Note A, B, D could be positive, zero, or negative.
|
||
// A: justification_left
|
||
// B: ShapedGlyph's x_offset
|
||
// (though a small part of the glyph may go inside B)
|
||
// B+C: ShapedGlyph's x_advance
|
||
// D: justification_right
|
||
// A+B: Glyph's x_offset
|
||
// A+B+C+D: Glyph's x_advance
|
||
Glyph {
|
||
id: shaped.glyph_id,
|
||
x_advance: shaped.x_advance
|
||
+ justification_left
|
||
+ justification_right,
|
||
x_offset: shaped.x_offset + justification_left,
|
||
range: (shaped.range.start - range.start).saturating_as()
|
||
..(shaped.range.end - range.start).saturating_as(),
|
||
span,
|
||
}
|
||
})
|
||
.collect();
|
||
|
||
let item = TextItem {
|
||
font,
|
||
size: self.size,
|
||
lang: self.lang,
|
||
region: self.region,
|
||
fill: fill.clone(),
|
||
stroke: stroke.clone().map(|s| s.unwrap_or_default()),
|
||
text: self.text[range.start - self.base..range.end - self.base].into(),
|
||
glyphs,
|
||
};
|
||
|
||
let width = item.width();
|
||
if decos.is_empty() {
|
||
frame.push(pos, FrameItem::Text(item));
|
||
} else {
|
||
// Apply line decorations.
|
||
frame.push(pos, FrameItem::Text(item.clone()));
|
||
for deco in &decos {
|
||
decorate(&mut frame, deco, &item, width, shift, pos);
|
||
}
|
||
}
|
||
|
||
offset += width;
|
||
}
|
||
|
||
frame
|
||
}
|
||
|
||
/// Measure the top and bottom extent of this text.
|
||
pub fn measure(&self, engine: &Engine) -> (Abs, Abs) {
|
||
let mut top = Abs::zero();
|
||
let mut bottom = Abs::zero();
|
||
|
||
let top_edge = TextElem::top_edge_in(self.styles);
|
||
let bottom_edge = TextElem::bottom_edge_in(self.styles);
|
||
|
||
// Expand top and bottom by reading the font's vertical metrics.
|
||
let mut expand = |font: &Font, bounds: TextEdgeBounds| {
|
||
let (t, b) = font.edges(top_edge, bottom_edge, self.size, bounds);
|
||
top.set_max(t);
|
||
bottom.set_max(b);
|
||
};
|
||
|
||
if self.glyphs.is_empty() {
|
||
// When there are no glyphs, we just use the vertical metrics of the
|
||
// first available font.
|
||
let world = engine.world;
|
||
for family in families(self.styles) {
|
||
if let Some(font) = world
|
||
.book()
|
||
.select(family.as_str(), self.variant)
|
||
.and_then(|id| world.font(id))
|
||
{
|
||
expand(&font, TextEdgeBounds::Zero);
|
||
break;
|
||
}
|
||
}
|
||
} else {
|
||
for g in self.glyphs.iter() {
|
||
expand(&g.font, TextEdgeBounds::Glyph(g.glyph_id));
|
||
}
|
||
}
|
||
|
||
(top, bottom)
|
||
}
|
||
|
||
/// How many glyphs are in the text where we can insert additional
|
||
/// space when encountering underfull lines.
|
||
pub fn justifiables(&self) -> usize {
|
||
self.glyphs.iter().filter(|g| g.is_justifiable()).count()
|
||
}
|
||
|
||
/// Whether the last glyph is a CJK character which should not be justified
|
||
/// on line end.
|
||
pub fn cjk_justifiable_at_last(&self) -> bool {
|
||
self.glyphs
|
||
.last()
|
||
.map(|g| g.is_cj_script() || g.is_cjk_punctuation())
|
||
.unwrap_or(false)
|
||
}
|
||
|
||
/// The stretchability of the text.
|
||
pub fn stretchability(&self) -> Abs {
|
||
self.glyphs
|
||
.iter()
|
||
.map(|g| g.stretchability().0 + g.stretchability().1)
|
||
.sum::<Em>()
|
||
.at(self.size)
|
||
}
|
||
|
||
/// The shrinkability of the text
|
||
pub fn shrinkability(&self) -> Abs {
|
||
self.glyphs
|
||
.iter()
|
||
.map(|g| g.shrinkability().0 + g.shrinkability().1)
|
||
.sum::<Em>()
|
||
.at(self.size)
|
||
}
|
||
|
||
/// Reshape a range of the shaped text, reusing information from this
|
||
/// shaping process if possible.
|
||
///
|
||
/// The text `range` is relative to the whole paragraph.
|
||
pub fn reshape(&'a self, engine: &Engine, text_range: Range) -> ShapedText<'a> {
|
||
let text = &self.text[text_range.start - self.base..text_range.end - self.base];
|
||
if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) {
|
||
#[cfg(debug_assertions)]
|
||
assert_all_glyphs_in_range(glyphs, text, text_range.clone());
|
||
Self {
|
||
base: text_range.start,
|
||
text,
|
||
dir: self.dir,
|
||
lang: self.lang,
|
||
region: self.region,
|
||
styles: self.styles,
|
||
size: self.size,
|
||
variant: self.variant,
|
||
width: glyphs.iter().map(|g| g.x_advance).sum::<Em>().at(self.size),
|
||
glyphs: Cow::Borrowed(glyphs),
|
||
}
|
||
} else {
|
||
shape(
|
||
engine,
|
||
text_range.start,
|
||
text,
|
||
self.styles,
|
||
self.dir,
|
||
self.lang,
|
||
self.region,
|
||
)
|
||
}
|
||
}
|
||
|
||
/// Derive an empty text run with the same properties as this one.
|
||
pub fn empty(&self) -> Self {
|
||
Self {
|
||
text: "",
|
||
width: Abs::zero(),
|
||
glyphs: Cow::Borrowed(&[]),
|
||
..*self
|
||
}
|
||
}
|
||
|
||
/// Push a hyphen to end of the text.
|
||
pub fn push_hyphen(&mut self, engine: &Engine, fallback: bool) {
|
||
self.insert_hyphen(engine, fallback, Side::Right)
|
||
}
|
||
|
||
/// Prepend a hyphen to start of the text.
|
||
pub fn prepend_hyphen(&mut self, engine: &Engine, fallback: bool) {
|
||
self.insert_hyphen(engine, fallback, Side::Left)
|
||
}
|
||
|
||
fn insert_hyphen(&mut self, engine: &Engine, fallback: bool, side: Side) {
|
||
let world = engine.world;
|
||
let book = world.book();
|
||
let fallback_func = if fallback {
|
||
Some(|| book.select_fallback(None, self.variant, "-"))
|
||
} else {
|
||
None
|
||
};
|
||
let mut chain = families(self.styles)
|
||
.filter(|family| family.covers().map_or(true, |c| c.is_match("-")))
|
||
.map(|family| book.select(family.as_str(), self.variant))
|
||
.chain(fallback_func.iter().map(|f| f()))
|
||
.flatten();
|
||
|
||
chain.find_map(|id| {
|
||
let font = world.font(id)?;
|
||
let ttf = font.ttf();
|
||
let glyph_id = ttf.glyph_index('-')?;
|
||
let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?);
|
||
let range = match side {
|
||
Side::Left => self.glyphs.first().map(|g| g.range.start..g.range.start),
|
||
Side::Right => self.glyphs.last().map(|g| g.range.end..g.range.end),
|
||
}
|
||
// In the unlikely chance that we hyphenate after an empty line,
|
||
// ensure that the glyph range still falls after self.base so
|
||
// that subtracting either of the endpoints by self.base doesn't
|
||
// underflow. See <https://github.com/typst/typst/issues/2283>.
|
||
.unwrap_or_else(|| self.base..self.base);
|
||
self.width += x_advance.at(self.size);
|
||
let glyph = ShapedGlyph {
|
||
font,
|
||
glyph_id: glyph_id.0,
|
||
x_advance,
|
||
x_offset: Em::zero(),
|
||
y_offset: Em::zero(),
|
||
adjustability: Adjustability::default(),
|
||
range,
|
||
safe_to_break: true,
|
||
c: '-',
|
||
is_justifiable: false,
|
||
script: Script::Common,
|
||
};
|
||
match side {
|
||
Side::Left => self.glyphs.to_mut().insert(0, glyph),
|
||
Side::Right => self.glyphs.to_mut().push(glyph),
|
||
}
|
||
Some(())
|
||
});
|
||
}
|
||
|
||
/// Find the subslice of glyphs that represent the given text range if both
|
||
/// sides are safe to break.
|
||
fn slice_safe_to_break(&self, text_range: Range) -> Option<&[ShapedGlyph]> {
|
||
let Range { mut start, mut end } = text_range;
|
||
if !self.dir.is_positive() {
|
||
std::mem::swap(&mut start, &mut end);
|
||
}
|
||
|
||
let left = self.find_safe_to_break(start)?;
|
||
let right = self.find_safe_to_break(end)?;
|
||
Some(&self.glyphs[left..right])
|
||
}
|
||
|
||
/// Find the glyph offset matching the text index that is most towards the
|
||
/// start of the text and safe-to-break.
|
||
fn find_safe_to_break(&self, text_index: usize) -> Option<usize> {
|
||
let ltr = self.dir.is_positive();
|
||
|
||
// Handle edge cases.
|
||
let len = self.glyphs.len();
|
||
if text_index == self.base {
|
||
return Some(if ltr { 0 } else { len });
|
||
} else if text_index == self.base + self.text.len() {
|
||
return Some(if ltr { len } else { 0 });
|
||
}
|
||
|
||
// Find any glyph with the text index.
|
||
let found = self.glyphs.binary_search_by(|g: &ShapedGlyph| {
|
||
let ordering = g.range.start.cmp(&text_index);
|
||
if ltr {
|
||
ordering
|
||
} else {
|
||
ordering.reverse()
|
||
}
|
||
});
|
||
|
||
let mut idx = match found {
|
||
Ok(idx) => idx,
|
||
Err(idx) => {
|
||
// Handle the special case where we break before a '\n'
|
||
//
|
||
// For example: (assume `a` is a CJK character with three bytes)
|
||
// text: " a \n b "
|
||
// index: 0 1 2 3 4 5
|
||
// text_index: ^
|
||
// glyphs: 0 . 1
|
||
//
|
||
// We will get found = Err(1), because '\n' does not have a
|
||
// glyph. But it's safe to break here. Thus the following
|
||
// condition:
|
||
// - glyphs[0].end == text_index == 3
|
||
// - text[3] == '\n'
|
||
return (idx > 0
|
||
&& self.glyphs[idx - 1].range.end == text_index
|
||
&& self.text[text_index - self.base..].starts_with('\n'))
|
||
.then_some(idx);
|
||
}
|
||
};
|
||
|
||
// Search for the start-most glyph with the text index. This means
|
||
// we take empty range glyphs at the start and leave those at the end
|
||
// for the next line.
|
||
let dec = if ltr { usize::checked_sub } else { usize::checked_add };
|
||
while let Some(next) = dec(idx, 1) {
|
||
if self.glyphs.get(next).map_or(true, |g| g.range.start != text_index) {
|
||
break;
|
||
}
|
||
idx = next;
|
||
}
|
||
|
||
// RTL needs offset one because the left side of the range should be
|
||
// exclusive and the right side inclusive, contrary to the normal
|
||
// behaviour of ranges.
|
||
self.glyphs[idx].safe_to_break.then_some(idx + usize::from(!ltr))
|
||
}
|
||
}
|
||
|
||
impl Debug for ShapedText<'_> {
|
||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||
self.text.fmt(f)
|
||
}
|
||
}
|
||
|
||
/// Group a range of text by BiDi level and script, shape the runs and generate
|
||
/// items for them.
|
||
pub fn shape_range<'a>(
|
||
items: &mut Vec<(Range, Item<'a>)>,
|
||
engine: &Engine,
|
||
text: &'a str,
|
||
bidi: &BidiInfo<'a>,
|
||
range: Range,
|
||
styles: StyleChain<'a>,
|
||
) {
|
||
let script = TextElem::script_in(styles);
|
||
let lang = TextElem::lang_in(styles);
|
||
let region = TextElem::region_in(styles);
|
||
let mut process = |range: Range, level: BidiLevel| {
|
||
let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
|
||
let shaped =
|
||
shape(engine, range.start, &text[range.clone()], styles, dir, lang, region);
|
||
items.push((range, Item::Text(shaped)));
|
||
};
|
||
|
||
let mut prev_level = BidiLevel::ltr();
|
||
let mut prev_script = Script::Unknown;
|
||
let mut cursor = range.start;
|
||
|
||
// Group by embedding level and script. If the text's script is explicitly
|
||
// set (rather than inferred from the glyphs), we keep the script at an
|
||
// unchanging `Script::Unknown` so that only level changes cause breaks.
|
||
for i in range.clone() {
|
||
if !text.is_char_boundary(i) {
|
||
continue;
|
||
}
|
||
|
||
let level = bidi.levels[i];
|
||
let curr_script = match script {
|
||
Smart::Auto => {
|
||
text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
|
||
}
|
||
Smart::Custom(_) => Script::Unknown,
|
||
};
|
||
|
||
if level != prev_level || !is_compatible(curr_script, prev_script) {
|
||
if cursor < i {
|
||
process(cursor..i, prev_level);
|
||
}
|
||
cursor = i;
|
||
prev_level = level;
|
||
prev_script = curr_script;
|
||
} else if is_generic_script(prev_script) {
|
||
prev_script = curr_script;
|
||
}
|
||
}
|
||
|
||
process(cursor..range.end, prev_level);
|
||
}
|
||
|
||
/// Whether this is not a specific script.
|
||
fn is_generic_script(script: Script) -> bool {
|
||
matches!(script, Script::Unknown | Script::Common | Script::Inherited)
|
||
}
|
||
|
||
/// Whether these script can be part of the same shape run.
|
||
fn is_compatible(a: Script, b: Script) -> bool {
|
||
is_generic_script(a) || is_generic_script(b) || a == b
|
||
}
|
||
|
||
/// Shape text into [`ShapedText`].
|
||
#[allow(clippy::too_many_arguments)]
|
||
fn shape<'a>(
|
||
engine: &Engine,
|
||
base: usize,
|
||
text: &'a str,
|
||
styles: StyleChain<'a>,
|
||
dir: Dir,
|
||
lang: Lang,
|
||
region: Option<Region>,
|
||
) -> ShapedText<'a> {
|
||
let size = TextElem::size_in(styles);
|
||
let mut ctx = ShapingContext {
|
||
engine,
|
||
size,
|
||
glyphs: vec![],
|
||
used: vec![],
|
||
styles,
|
||
variant: variant(styles),
|
||
features: features(styles),
|
||
fallback: TextElem::fallback_in(styles),
|
||
dir,
|
||
};
|
||
|
||
if !text.is_empty() {
|
||
shape_segment(&mut ctx, base, text, families(styles));
|
||
}
|
||
|
||
track_and_space(&mut ctx);
|
||
calculate_adjustability(&mut ctx, lang, region);
|
||
|
||
#[cfg(debug_assertions)]
|
||
assert_all_glyphs_in_range(&ctx.glyphs, text, base..(base + text.len()));
|
||
#[cfg(debug_assertions)]
|
||
assert_glyph_ranges_in_order(&ctx.glyphs, dir);
|
||
|
||
ShapedText {
|
||
base,
|
||
text,
|
||
dir,
|
||
lang,
|
||
region,
|
||
styles,
|
||
variant: ctx.variant,
|
||
size,
|
||
width: ctx.glyphs.iter().map(|g| g.x_advance).sum::<Em>().at(size),
|
||
glyphs: Cow::Owned(ctx.glyphs),
|
||
}
|
||
}
|
||
|
||
/// Holds shaping results and metadata common to all shaped segments.
|
||
struct ShapingContext<'a, 'v> {
|
||
engine: &'a Engine<'v>,
|
||
glyphs: Vec<ShapedGlyph>,
|
||
used: Vec<Font>,
|
||
styles: StyleChain<'a>,
|
||
size: Abs,
|
||
variant: FontVariant,
|
||
features: Vec<rustybuzz::Feature>,
|
||
fallback: bool,
|
||
dir: Dir,
|
||
}
|
||
|
||
/// Shape text with font fallback using the `families` iterator.
|
||
fn shape_segment<'a>(
|
||
ctx: &mut ShapingContext,
|
||
base: usize,
|
||
text: &str,
|
||
mut families: impl Iterator<Item = &'a FontFamily> + Clone,
|
||
) {
|
||
// Don't try shaping newlines, tabs, or default ignorables.
|
||
if text
|
||
.chars()
|
||
.all(|c| c == '\n' || c == '\t' || is_default_ignorable(c))
|
||
{
|
||
return;
|
||
}
|
||
|
||
// Find the next available family.
|
||
let world = ctx.engine.world;
|
||
let book = world.book();
|
||
let mut selection = None;
|
||
let mut covers = None;
|
||
for family in families.by_ref() {
|
||
selection = book
|
||
.select(family.as_str(), ctx.variant)
|
||
.and_then(|id| world.font(id))
|
||
.filter(|font| !ctx.used.contains(font));
|
||
if selection.is_some() {
|
||
covers = family.covers();
|
||
break;
|
||
}
|
||
}
|
||
|
||
// Do font fallback if the families are exhausted and fallback is enabled.
|
||
if selection.is_none() && ctx.fallback {
|
||
let first = ctx.used.first().map(Font::info);
|
||
selection = book
|
||
.select_fallback(first, ctx.variant, text)
|
||
.and_then(|id| world.font(id))
|
||
.filter(|font| !ctx.used.contains(font));
|
||
}
|
||
|
||
// Extract the font id or shape notdef glyphs if we couldn't find any font.
|
||
let Some(font) = selection else {
|
||
if let Some(font) = ctx.used.first().cloned() {
|
||
shape_tofus(ctx, base, text, font);
|
||
}
|
||
return;
|
||
};
|
||
|
||
ctx.used.push(font.clone());
|
||
|
||
// Fill the buffer with our text.
|
||
let mut buffer = UnicodeBuffer::new();
|
||
buffer.push_str(text);
|
||
buffer.set_language(language(ctx.styles));
|
||
if let Some(script) = TextElem::script_in(ctx.styles).custom().and_then(|script| {
|
||
rustybuzz::Script::from_iso15924_tag(Tag::from_bytes(script.as_bytes()))
|
||
}) {
|
||
buffer.set_script(script)
|
||
}
|
||
buffer.set_direction(match ctx.dir {
|
||
Dir::LTR => rustybuzz::Direction::LeftToRight,
|
||
Dir::RTL => rustybuzz::Direction::RightToLeft,
|
||
_ => unimplemented!("vertical text layout"),
|
||
});
|
||
buffer.guess_segment_properties();
|
||
|
||
// By default, Harfbuzz will create zero-width space glyphs for default
|
||
// ignorables. This is probably useful for GUI apps that want noticeable
|
||
// effects on the cursor for those, but for us it's not useful and hurts
|
||
// text extraction.
|
||
buffer.set_flags(BufferFlags::REMOVE_DEFAULT_IGNORABLES);
|
||
|
||
// Prepare the shape plan. This plan depends on direction, script, language,
|
||
// and features, but is independent from the text and can thus be memoized.
|
||
let plan = create_shape_plan(
|
||
&font,
|
||
buffer.direction(),
|
||
buffer.script(),
|
||
buffer.language().as_ref(),
|
||
&ctx.features,
|
||
);
|
||
|
||
// Shape!
|
||
let buffer = rustybuzz::shape_with_plan(font.rusty(), &plan, buffer);
|
||
let infos = buffer.glyph_infos();
|
||
let pos = buffer.glyph_positions();
|
||
let ltr = ctx.dir.is_positive();
|
||
|
||
// Whether the character at the given offset is covered by the coverage.
|
||
let is_covered = |offset| {
|
||
let end = text[offset..]
|
||
.char_indices()
|
||
.nth(1)
|
||
.map(|(i, _)| offset + i)
|
||
.unwrap_or(text.len());
|
||
covers.map_or(true, |cov| cov.is_match(&text[offset..end]))
|
||
};
|
||
|
||
// Collect the shaped glyphs, doing fallback and shaping parts again with
|
||
// the next font if necessary.
|
||
let mut i = 0;
|
||
while i < infos.len() {
|
||
let info = &infos[i];
|
||
let cluster = info.cluster as usize;
|
||
|
||
// Add the glyph to the shaped output.
|
||
if info.glyph_id != 0 && is_covered(cluster) {
|
||
// Determine the text range of the glyph.
|
||
let start = base + cluster;
|
||
let end = base
|
||
+ if ltr { i.checked_add(1) } else { i.checked_sub(1) }
|
||
.and_then(|last| infos.get(last))
|
||
.map_or(text.len(), |info| info.cluster as usize);
|
||
|
||
let c = text[cluster..].chars().next().unwrap();
|
||
let script = c.script();
|
||
let x_advance = font.to_em(pos[i].x_advance);
|
||
ctx.glyphs.push(ShapedGlyph {
|
||
font: font.clone(),
|
||
glyph_id: info.glyph_id as u16,
|
||
// TODO: Don't ignore y_advance.
|
||
x_advance,
|
||
x_offset: font.to_em(pos[i].x_offset),
|
||
y_offset: font.to_em(pos[i].y_offset),
|
||
adjustability: Adjustability::default(),
|
||
range: start..end,
|
||
safe_to_break: !info.unsafe_to_break(),
|
||
c,
|
||
is_justifiable: is_justifiable(
|
||
c,
|
||
script,
|
||
x_advance,
|
||
Adjustability::default().stretchability,
|
||
),
|
||
script,
|
||
});
|
||
} else {
|
||
// First, search for the end of the tofu sequence.
|
||
let k = i;
|
||
while infos.get(i + 1).is_some_and(|info| {
|
||
info.glyph_id == 0 || !is_covered(info.cluster as usize)
|
||
}) {
|
||
i += 1;
|
||
}
|
||
|
||
// Then, determine the start and end text index for the tofu
|
||
// sequence.
|
||
//
|
||
// Examples:
|
||
// Everything is shown in visual order. Tofus are written as "_".
|
||
// We want to find out that the tofus span the text `2..6`.
|
||
// Note that the clusters are longer than 1 char.
|
||
//
|
||
// Left-to-right:
|
||
// Text: h a l i h a l l o
|
||
// Glyphs: A _ _ C E
|
||
// Clusters: 0 2 4 6 8
|
||
// k=1 i=2
|
||
//
|
||
// Right-to-left:
|
||
// Text: O L L A H I L A H
|
||
// Glyphs: E C _ _ A
|
||
// Clusters: 8 6 4 2 0
|
||
// k=2 i=3
|
||
let start = infos[if ltr { k } else { i }].cluster as usize;
|
||
let end = if ltr { i.checked_add(1) } else { k.checked_sub(1) }
|
||
.and_then(|last| infos.get(last))
|
||
.map_or(text.len(), |info| info.cluster as usize);
|
||
|
||
// Trim half-baked cluster.
|
||
let remove = base + start..base + end;
|
||
while ctx.glyphs.last().is_some_and(|g| remove.contains(&g.range.start)) {
|
||
ctx.glyphs.pop();
|
||
}
|
||
|
||
// Recursively shape the tofu sequence with the next family.
|
||
shape_segment(ctx, base + start, &text[start..end], families.clone());
|
||
}
|
||
|
||
i += 1;
|
||
}
|
||
|
||
ctx.used.pop();
|
||
}
|
||
|
||
/// Create a shape plan.
|
||
#[comemo::memoize]
|
||
fn create_shape_plan(
|
||
font: &Font,
|
||
direction: rustybuzz::Direction,
|
||
script: rustybuzz::Script,
|
||
language: Option<&rustybuzz::Language>,
|
||
features: &[rustybuzz::Feature],
|
||
) -> Arc<ShapePlan> {
|
||
Arc::new(rustybuzz::ShapePlan::new(
|
||
font.rusty(),
|
||
direction,
|
||
Some(script),
|
||
language,
|
||
features,
|
||
))
|
||
}
|
||
|
||
/// Shape the text with tofus from the given font.
|
||
fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) {
|
||
let x_advance = font.advance(0).unwrap_or_default();
|
||
let add_glyph = |(cluster, c): (usize, char)| {
|
||
let start = base + cluster;
|
||
let end = start + c.len_utf8();
|
||
let script = c.script();
|
||
ctx.glyphs.push(ShapedGlyph {
|
||
font: font.clone(),
|
||
glyph_id: 0,
|
||
x_advance,
|
||
x_offset: Em::zero(),
|
||
y_offset: Em::zero(),
|
||
adjustability: Adjustability::default(),
|
||
range: start..end,
|
||
safe_to_break: true,
|
||
c,
|
||
is_justifiable: is_justifiable(
|
||
c,
|
||
script,
|
||
x_advance,
|
||
Adjustability::default().stretchability,
|
||
),
|
||
script,
|
||
});
|
||
};
|
||
if ctx.dir.is_positive() {
|
||
text.char_indices().for_each(add_glyph);
|
||
} else {
|
||
text.char_indices().rev().for_each(add_glyph);
|
||
}
|
||
}
|
||
|
||
/// Apply tracking and spacing to the shaped glyphs.
|
||
fn track_and_space(ctx: &mut ShapingContext) {
|
||
let tracking = Em::from_length(TextElem::tracking_in(ctx.styles), ctx.size);
|
||
let spacing =
|
||
TextElem::spacing_in(ctx.styles).map(|abs| Em::from_length(abs, ctx.size));
|
||
|
||
let mut glyphs = ctx.glyphs.iter_mut().peekable();
|
||
while let Some(glyph) = glyphs.next() {
|
||
// Make non-breaking space same width as normal space.
|
||
if glyph.c == '\u{00A0}' {
|
||
glyph.x_advance -= nbsp_delta(&glyph.font).unwrap_or_default();
|
||
}
|
||
|
||
if glyph.is_space() {
|
||
glyph.x_advance = spacing.relative_to(glyph.x_advance);
|
||
}
|
||
|
||
if glyphs
|
||
.peek()
|
||
.is_some_and(|next| glyph.range.start != next.range.start)
|
||
{
|
||
glyph.x_advance += tracking;
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Calculate stretchability and shrinkability of each glyph,
|
||
/// and CJK punctuation adjustments according to Chinese Layout Requirements.
|
||
fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option<Region>) {
|
||
let style = cjk_punct_style(lang, region);
|
||
|
||
for glyph in &mut ctx.glyphs {
|
||
glyph.adjustability = glyph.base_adjustability(style);
|
||
}
|
||
|
||
let mut glyphs = ctx.glyphs.iter_mut().peekable();
|
||
while let Some(glyph) = glyphs.next() {
|
||
// CNS style needs not further adjustment.
|
||
if glyph.is_cjk_punctuation() && matches!(style, CjkPunctStyle::Cns) {
|
||
continue;
|
||
}
|
||
|
||
// Now we apply consecutive punctuation adjustment, specified in Chinese Layout.
|
||
// Requirements, section 3.1.6.1 Punctuation Adjustment Space, and Japanese Layout
|
||
// Requirements, section 3.1 Line Composition Rules for Punctuation Marks
|
||
let Some(next) = glyphs.peek_mut() else { continue };
|
||
let width = glyph.x_advance;
|
||
let delta = width / 2.0;
|
||
if glyph.is_cjk_punctuation()
|
||
&& next.is_cjk_punctuation()
|
||
&& (glyph.shrinkability().1 + next.shrinkability().0) >= delta
|
||
{
|
||
let left_delta = glyph.shrinkability().1.min(delta);
|
||
glyph.shrink_right(left_delta);
|
||
next.shrink_left(delta - left_delta);
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Difference between non-breaking and normal space.
|
||
fn nbsp_delta(font: &Font) -> Option<Em> {
|
||
let space = font.ttf().glyph_index(' ')?.0;
|
||
let nbsp = font.ttf().glyph_index('\u{00A0}')?.0;
|
||
Some(font.advance(nbsp)? - font.advance(space)?)
|
||
}
|
||
|
||
/// Process the language and region of a style chain into a
|
||
/// rustybuzz-compatible BCP 47 language.
|
||
fn language(styles: StyleChain) -> rustybuzz::Language {
|
||
let mut bcp: EcoString = TextElem::lang_in(styles).as_str().into();
|
||
if let Some(region) = TextElem::region_in(styles) {
|
||
bcp.push('-');
|
||
bcp.push_str(region.as_str());
|
||
}
|
||
rustybuzz::Language::from_str(&bcp).unwrap()
|
||
}
|
||
|
||
/// Returns true if all glyphs in `glyphs` have ranges within the range `range`.
|
||
#[cfg(debug_assertions)]
|
||
fn assert_all_glyphs_in_range(glyphs: &[ShapedGlyph], text: &str, range: Range) {
|
||
if glyphs
|
||
.iter()
|
||
.any(|g| g.range.start < range.start || g.range.end > range.end)
|
||
{
|
||
panic!("one or more glyphs in {text:?} fell out of range");
|
||
}
|
||
}
|
||
|
||
/// Asserts that the ranges of `glyphs` is in the proper order according to
|
||
/// `dir`.
|
||
///
|
||
/// This asserts instead of returning a bool in order to provide a more
|
||
/// informative message when the invariant is violated.
|
||
#[cfg(debug_assertions)]
|
||
fn assert_glyph_ranges_in_order(glyphs: &[ShapedGlyph], dir: Dir) {
|
||
if glyphs.is_empty() {
|
||
return;
|
||
}
|
||
|
||
// Iterator::is_sorted and friends are unstable as of Rust 1.70.0
|
||
for i in 0..(glyphs.len() - 1) {
|
||
let a = &glyphs[i];
|
||
let b = &glyphs[i + 1];
|
||
let ord = a.range.start.cmp(&b.range.start);
|
||
let ord = if dir.is_positive() { ord } else { ord.reverse() };
|
||
if ord == std::cmp::Ordering::Greater {
|
||
panic!(
|
||
"glyph ranges should be monotonically {}, \
|
||
but found glyphs out of order:\n\n\
|
||
first: {a:#?}\nsecond: {b:#?}",
|
||
if dir.is_positive() { "increasing" } else { "decreasing" },
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
// The CJK punctuation that can appear at the beginning or end of a line.
|
||
pub const BEGIN_PUNCT_PAT: &[char] =
|
||
&['“', '‘', '《', '〈', '(', '『', '「', '【', '〖', '〔', '[', '{'];
|
||
pub const END_PUNCT_PAT: &[char] = &[
|
||
'”', '’', ',', '.', '。', '、', ':', ';', '》', '〉', ')', '』', '」', '】',
|
||
'〗', '〕', ']', '}', '?', '!',
|
||
];
|
||
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||
pub enum CjkPunctStyle {
|
||
/// Standard GB/T 15834-2011, used mostly in mainland China.
|
||
Gb,
|
||
/// Standard by Taiwan Ministry of Education, used in Taiwan and Hong Kong.
|
||
Cns,
|
||
/// Standard JIS X 4051, used in Japan.
|
||
Jis,
|
||
}
|
||
|
||
pub fn cjk_punct_style(lang: Lang, region: Option<Region>) -> CjkPunctStyle {
|
||
match (lang, region.as_ref().map(Region::as_str)) {
|
||
(Lang::CHINESE, Some("TW" | "HK")) => CjkPunctStyle::Cns,
|
||
(Lang::JAPANESE, _) => CjkPunctStyle::Jis,
|
||
// zh-CN, zh-SG, zh-MY use GB-style punctuation,
|
||
_ => CjkPunctStyle::Gb,
|
||
}
|
||
}
|
||
|
||
/// Whether the glyph is a space.
|
||
fn is_space(c: char) -> bool {
|
||
matches!(c, ' ' | '\u{00A0}' | ' ')
|
||
}
|
||
|
||
/// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK).
|
||
pub fn is_of_cj_script(c: char) -> bool {
|
||
is_cj_script(c, c.script())
|
||
}
|
||
|
||
/// Whether the glyph is part of Chinese or Japanese script (i.e. CJ, not CJK).
|
||
/// The function is dedicated to typesetting Chinese or Japanese, which do not
|
||
/// have spaces between words, so K is not checked here.
|
||
fn is_cj_script(c: char, script: Script) -> bool {
|
||
use Script::*;
|
||
// U+30FC: Katakana-Hiragana Prolonged Sound Mark
|
||
matches!(script, Hiragana | Katakana | Han) || c == '\u{30FC}'
|
||
}
|
||
|
||
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
|
||
fn is_cjk_left_aligned_punctuation(
|
||
c: char,
|
||
x_advance: Em,
|
||
stretchability: (Em, Em),
|
||
style: CjkPunctStyle,
|
||
) -> bool {
|
||
use CjkPunctStyle::*;
|
||
|
||
// CJK quotation marks shares codepoints with latin quotation marks.
|
||
// But only the CJK ones have full width.
|
||
if matches!(c, '”' | '’') && x_advance + stretchability.1 == Em::one() {
|
||
return true;
|
||
}
|
||
|
||
if matches!(style, Gb | Jis) && matches!(c, ',' | '。' | '.' | '、' | ':' | ';')
|
||
{
|
||
return true;
|
||
}
|
||
|
||
if matches!(style, Gb) && matches!(c, '?' | '!') {
|
||
// In GB style, exclamations and question marks are also left aligned
|
||
// and can be adjusted. Note that they are not adjustable in other
|
||
// styles.
|
||
return true;
|
||
}
|
||
|
||
// See appendix A.3 https://www.w3.org/TR/clreq/#tables_of_chinese_punctuation_marks
|
||
matches!(c, '》' | ')' | '』' | '」' | '】' | '〗' | '〕' | '〉' | ']' | '}')
|
||
}
|
||
|
||
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
|
||
fn is_cjk_right_aligned_punctuation(
|
||
c: char,
|
||
x_advance: Em,
|
||
stretchability: (Em, Em),
|
||
) -> bool {
|
||
// CJK quotation marks shares codepoints with latin quotation marks.
|
||
// But only the CJK ones have full width.
|
||
if matches!(c, '“' | '‘') && x_advance + stretchability.0 == Em::one() {
|
||
return true;
|
||
}
|
||
// See appendix A.3 https://www.w3.org/TR/clreq/#tables_of_chinese_punctuation_marks
|
||
matches!(c, '《' | '(' | '『' | '「' | '【' | '〖' | '〔' | '〈' | '[' | '{')
|
||
}
|
||
|
||
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
|
||
fn is_cjk_center_aligned_punctuation(c: char, style: CjkPunctStyle) -> bool {
|
||
if matches!(style, CjkPunctStyle::Cns)
|
||
&& matches!(c, ',' | '。' | '.' | '、' | ':' | ';')
|
||
{
|
||
return true;
|
||
}
|
||
|
||
// U+30FB: Katakana Middle Dot
|
||
// U+00B7: Middle Dot
|
||
matches!(c, '\u{30FB}' | '\u{00B7}')
|
||
}
|
||
|
||
/// Whether the glyph is justifiable.
|
||
///
|
||
/// Quotations in latin script and CJK are unfortunately the same codepoint
|
||
/// (U+2018, U+2019, U+201C, U+201D), but quotations in Chinese must be
|
||
/// fullwidth. This heuristics can therefore fail for monospace latin fonts.
|
||
/// However, since monospace fonts are usually not justified this edge case
|
||
/// should be rare enough.
|
||
fn is_justifiable(
|
||
c: char,
|
||
script: Script,
|
||
x_advance: Em,
|
||
stretchability: (Em, Em),
|
||
) -> bool {
|
||
// punctuation style is not relevant here.
|
||
let style = CjkPunctStyle::Gb;
|
||
is_space(c)
|
||
|| is_cj_script(c, script)
|
||
|| is_cjk_left_aligned_punctuation(c, x_advance, stretchability, style)
|
||
|| is_cjk_right_aligned_punctuation(c, x_advance, stretchability)
|
||
|| is_cjk_center_aligned_punctuation(c, style)
|
||
}
|