shape_tofus: respect text direction when inserting tofus (#1519)

This ensures that the ranges of the shaped glyphs are monotonically
decreasing in right-to-left-text, thus avoiding nonsensical results
in find_safe_to_break that later causes a panic (see #1373).
Additionally, debug assertions have been added to catch non-monotonic
glyph ranges.
This commit is contained in:
bluebear94 2023-06-26 07:40:21 -04:00 committed by GitHub
parent 47a131dfbb
commit 9ef4643ba1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 63 additions and 1 deletions

View File

@ -1,5 +1,7 @@
use std::borrow::Cow;
use std::cmp::Ordering;
use std::ops::Range;
use std::panic;
use std::str::FromStr;
use az::SaturatingAs;
@ -58,6 +60,12 @@ pub struct ShapedGlyph {
/// The byte range of this glyph's cluster in the full paragraph. A cluster
/// is a sequence of one or multiple glyphs that cannot be separated and
/// must always be treated as a union.
///
/// The range values of the glyphs in a [`ShapedText`] should not
/// overlap with each other, and they should be monotonically
/// increasing (for left-to-right or top-to-bottom text) or
/// monotonically decreasing (for right-to-left or bottom-to-top
/// text).
pub range: Range<usize>,
/// Whether splitting the shaping result before this glyph would yield the
/// same results as shaping the parts to both sides of `text_index`
@ -388,6 +396,11 @@ impl<'a> ShapedText<'a> {
) -> ShapedText<'a> {
let text = &self.text[text_range.start - self.base..text_range.end - self.base];
if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) {
debug_assert!(
all_glyphs_in_range(glyphs, &text_range),
"one or more glyphs in {:?} fell out of range",
text
);
Self {
base: text_range.start,
text,
@ -559,6 +572,14 @@ pub fn shape<'a>(
track_and_space(&mut ctx);
calculate_adjustability(&mut ctx, lang, region);
debug_assert!(
all_glyphs_in_range(&ctx.glyphs, &(base..(base + text.len()))),
"one or more glyphs in {:?} fell out of range",
text
);
#[cfg(debug_assertions)]
assert_glyph_ranges_in_order(&ctx.glyphs, dir);
ShapedText {
base,
text,
@ -708,7 +729,7 @@ fn shape_segment(
/// Shape the text with tofus from the given font.
fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) {
let x_advance = font.advance(0).unwrap_or_default();
for (cluster, c) in text.char_indices() {
let add_glyph = |(cluster, c): (usize, char)| {
let start = base + cluster;
let end = start + c.len_utf8();
ctx.glyphs.push(ShapedGlyph {
@ -723,6 +744,11 @@ fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) {
c,
span: ctx.spans.span_at(start),
});
};
if ctx.dir.is_positive() {
text.char_indices().for_each(add_glyph);
} else {
text.char_indices().rev().for_each(add_glyph);
}
}
@ -916,3 +942,32 @@ fn language(styles: StyleChain) -> rustybuzz::Language {
}
rustybuzz::Language::from_str(&bcp).unwrap()
}
/// Returns true if all glyphs in `glyphs` have ranges within the range `range`.
#[cfg(debug_assertions)]
fn all_glyphs_in_range(glyphs: &[ShapedGlyph], range: &Range<usize>) -> bool {
glyphs
.iter()
.all(|g| g.range.start >= range.start && g.range.end <= range.end)
}
/// Asserts that the ranges of `glyphs` is in the proper order according to `dir`.
///
/// This asserts instead of returning a bool in order to provide a more informative message when the invariant is violated.
fn assert_glyph_ranges_in_order(glyphs: &[ShapedGlyph], dir: Dir) {
// Iterator::is_sorted and friends are unstable as of Rust 1.70.0
if !glyphs.is_empty() {
for i in 0..(glyphs.len() - 1) {
let a = &glyphs[i];
let b = &glyphs[i + 1];
let ord = a.range.start.cmp(&b.range.start);
let ord = if dir.is_positive() { ord } else { ord.reverse() };
if ord == Ordering::Greater {
panic!(
"glyph ranges should be monotonically {}, but found glyphs out of order:\n\nfirst: {a:#?}\nsecond: {b:#?}",
if dir.is_positive() { "increasing" } else { "decreasing" }
);
}
}
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 513 B

View File

@ -0,0 +1,7 @@
// Test that shaping missing characters in both left-to-right and
// right-to-left directions does not cause a crash.
---
#"\u{590}\u{591}\u{592}\u{593}"
#"\u{30000}\u{30001}\u{30002}\u{30003}"