diff --git a/Cargo.lock b/Cargo.lock index dcf7bfdf5..2341a52e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -873,6 +873,7 @@ dependencies = [ "typed-arena", "typst-macros", "unicode-bidi", + "unicode-script", "unicode-segmentation", "unicode-xid", "usvg", diff --git a/Cargo.toml b/Cargo.toml index e30971bdf..fa7449afd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ rustybuzz = "0.4" unicode-bidi = "0.3.5" unicode-segmentation = "1" unicode-xid = "0.2" +unicode-script = "0.5" xi-unicode = "0.3" # Raster and vector graphics handling diff --git a/fonts/IBMPlexSansDevanagari-Regular.ttf b/fonts/IBMPlexSansDevanagari-Regular.ttf new file mode 100644 index 000000000..5d7c8f0f1 Binary files /dev/null and b/fonts/IBMPlexSansDevanagari-Regular.ttf differ diff --git a/src/library/text/par.rs b/src/library/text/par.rs index 57ced1896..6eb3da66d 100644 --- a/src/library/text/par.rs +++ b/src/library/text/par.rs @@ -1,13 +1,14 @@ use std::sync::Arc; use unicode_bidi::{BidiInfo, Level}; +use unicode_script::{Script, UnicodeScript}; use xi_unicode::LineBreakIterator; use super::{shape, Lang, ShapedText, TextNode}; use crate::font::FontStore; use crate::library::layout::Spacing; use crate::library::prelude::*; -use crate::util::{ArcExt, EcoString, SliceExt}; +use crate::util::{ArcExt, EcoString}; /// Arrange text, spacing and inline-level nodes into a paragraph. #[derive(Hash)] @@ -437,23 +438,46 @@ fn prepare<'a>( _ => None, }); - let mut items = vec![]; let mut cursor = 0; + let mut items = vec![]; // Layout the children and collect them into items. for (segment, styles) in segments { + let end = cursor + segment.len(); match segment { - Segment::Text(len) => { - // TODO: Also split by script. - let mut start = cursor; - for (level, count) in bidi.levels[cursor .. cursor + len].group() { - let end = start + count; - let text = &bidi.text[start .. end]; + Segment::Text(_) => { + let mut process = |text, level: Level| { let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL }; let shaped = shape(&mut ctx.fonts, text, styles, dir); items.push(Item::Text(shaped)); - start = end; + }; + + let mut prev_level = Level::ltr(); + let mut prev_script = Script::Unknown; + + // Group by embedding level and script. + for i in cursor .. end { + if !text.is_char_boundary(i) { + continue; + } + + let level = bidi.levels[i]; + let script = + text[i ..].chars().next().map_or(Script::Unknown, |c| c.script()); + + if level != prev_level || !is_compatible(script, prev_script) { + if cursor < i { + process(&text[cursor .. i], prev_level); + } + cursor = i; + prev_level = level; + prev_script = script; + } else if is_generic_script(prev_script) { + prev_script = script; + } } + + process(&text[cursor .. end], prev_level); } Segment::Spacing(spacing) => match spacing { Spacing::Relative(v) => { @@ -482,12 +506,22 @@ fn prepare<'a>( } } - cursor += segment.len(); + cursor = end; } Ok(Preparation { bidi, items, styles, children: &par.0 }) } +/// Whether this is not a specific script. +fn is_generic_script(script: Script) -> bool { + matches!(script, Script::Unknown | Script::Common | Script::Inherited) +} + +/// Whether these script can be part of the same shape run. +fn is_compatible(a: Script, b: Script) -> bool { + is_generic_script(a) || is_generic_script(b) || a == b +} + /// Find suitable linebreaks. fn linebreak<'a>( p: &'a Preparation<'a>, diff --git a/src/library/text/shaping.rs b/src/library/text/shaping.rs index 0a480c83c..72f86a38d 100644 --- a/src/library/text/shaping.rs +++ b/src/library/text/shaping.rs @@ -12,7 +12,6 @@ use crate::util::SliceExt; /// This type contains owned or borrowed shaped text runs, which can be /// measured, used to reshape substrings more quickly and converted into a /// frame. -#[derive(Debug, Clone)] pub struct ShapedText<'a> { /// The text that was shaped. pub text: &'a str, @@ -269,11 +268,13 @@ impl<'a> ShapedText<'a> { // RTL needs offset one because the left side of the range should be // exclusive and the right side inclusive, contrary to the normal // behaviour of ranges. - if !ltr { - idx += 1; - } + self.glyphs[idx].safe_to_break.then(|| idx + (!ltr) as usize) + } +} - self.glyphs[idx].safe_to_break.then(|| idx) +impl Debug for ShapedText<'_> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + self.text.fmt(f) } } diff --git a/src/util/mod.rs b/src/util/mod.rs index e42d0664b..d898f5455 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -103,12 +103,6 @@ where /// Additional methods for slices. pub trait SliceExt { - /// Find consecutive runs of the same elements in a slice and yield for - /// each such run the element and number of times it appears. - fn group(&self) -> Group<'_, T> - where - T: PartialEq; - /// Split a slice into consecutive runs with the same key and yield for /// each such run the key and the slice of elements with that key. fn group_by_key(&self, f: F) -> GroupByKey<'_, T, F> @@ -118,35 +112,11 @@ pub trait SliceExt { } impl SliceExt for [T] { - fn group(&self) -> Group<'_, T> { - Group { slice: self } - } - fn group_by_key(&self, f: F) -> GroupByKey<'_, T, F> { GroupByKey { slice: self, f } } } -/// This struct is created by [`SliceExt::group`]. -pub struct Group<'a, T> { - slice: &'a [T], -} - -impl<'a, T> Iterator for Group<'a, T> -where - T: PartialEq, -{ - type Item = (&'a T, usize); - - fn next(&mut self) -> Option { - let mut iter = self.slice.iter(); - let first = iter.next()?; - let count = 1 + iter.take_while(|&t| t == first).count(); - self.slice = &self.slice[count ..]; - Some((first, count)) - } -} - /// This struct is created by [`SliceExt::group_by_key`]. pub struct GroupByKey<'a, T, F> { slice: &'a [T], diff --git a/tests/ref/text/shaping.png b/tests/ref/text/shaping.png index a95f44e00..3d3e611df 100644 Binary files a/tests/ref/text/shaping.png and b/tests/ref/text/shaping.png differ diff --git a/tests/ref/text/tracking-spacing.png b/tests/ref/text/tracking-spacing.png index ec130c993..8e6db3cc6 100644 Binary files a/tests/ref/text/tracking-spacing.png and b/tests/ref/text/tracking-spacing.png differ diff --git a/tests/typ/text/shaping.typ b/tests/typ/text/shaping.typ new file mode 100644 index 000000000..511990909 --- /dev/null +++ b/tests/typ/text/shaping.typ @@ -0,0 +1,18 @@ +// Test shaping quirks. + +--- +// Test separation by script. +ABCअपार्टमेंट + +// This is how it should look like. +अपार्टमेंट + +// This (without the spaces) is how it would look +// if we didn't separate by script. +अ पा र् ट में ट + +--- +// Test that RTL safe-to-break doesn't panic even though newline +// doesn't exist in shaping output. +#set text(dir: rtl, "Noto Serif Hebrew") +\ ט