diff --git a/crates/typst-library/src/layout/par.rs b/crates/typst-library/src/layout/par.rs index fe86f62a5..6d1b653f7 100644 --- a/crates/typst-library/src/layout/par.rs +++ b/crates/typst-library/src/layout/par.rs @@ -750,6 +750,7 @@ fn shape_range<'a>( spans: &SpanMapper, styles: StyleChain<'a>, ) { + let script = TextElem::script_in(styles); let lang = TextElem::lang_in(styles); let region = TextElem::region_in(styles); let mut process = |range: Range, level: BidiLevel| { @@ -763,25 +764,31 @@ fn shape_range<'a>( let mut prev_script = Script::Unknown; let mut cursor = range.start; - // Group by embedding level and script. + // Group by embedding level and script. If the text's script is explicitly + // set (rather than inferred from the glpyhs), we keep the script at an + // unchanging `Script::Unknown` so that only level changes cause breaks. for i in range.clone() { if !bidi.text.is_char_boundary(i) { continue; } let level = bidi.levels[i]; - let script = - bidi.text[i..].chars().next().map_or(Script::Unknown, |c| c.script()); + let curr_script = match script { + Smart::Auto => { + bidi.text[i..].chars().next().map_or(Script::Unknown, |c| c.script()) + } + Smart::Custom(_) => Script::Unknown, + }; - if level != prev_level || !is_compatible(script, prev_script) { + if level != prev_level || !is_compatible(curr_script, prev_script) { if cursor < i { process(cursor..i, prev_level); } cursor = i; prev_level = level; - prev_script = script; + prev_script = curr_script; } else if is_generic_script(prev_script) { - prev_script = script; + prev_script = curr_script; } } diff --git a/crates/typst-library/src/text/mod.rs b/crates/typst-library/src/text/mod.rs index f7c15c29d..6cfcb7c7f 100644 --- a/crates/typst-library/src/text/mod.rs +++ b/crates/typst-library/src/text/mod.rs @@ -265,6 +265,31 @@ pub struct TextElem { #[default(BottomEdge::Metric(BottomEdgeMetric::Baseline))] pub bottom_edge: BottomEdge, + /// The OpenType writing script setting. + /// + /// The combination of `{script}` and `{lang}` determine how + /// font features, such as glyph substitution, are implemented. + /// Frequently the value is a modified (all-lowercase) ISO 15924 script identifier, and + /// the `math` writing script is used for features appropriate + /// for mathematical symbols. + /// + /// When set to `{auto}`, the default and recommended setting, + /// an appropriate script is chosen for each block of characters + /// sharing a common Unicode script property. + /// + /// ```example + /// #let scedilla = [Ş] + /// #set text(font: "Linux Libertine", size: 20pt) + /// #scedilla // S with a cedilla + /// + /// #set text(script: "latn", lang: "ro") + /// #scedilla // S with a subscript comma + /// + /// #set text(script: "grek", lang: "ro") + /// #scedilla // S with a cedilla + /// ``` + pub script: Smart, + /// An [ISO 639-1/2/3 language code.](https://en.wikipedia.org/wiki/ISO_639) /// /// Setting the correct language affects various parts of Typst: diff --git a/crates/typst-library/src/text/shaping.rs b/crates/typst-library/src/text/shaping.rs index 3ccac635a..53289e263 100644 --- a/crates/typst-library/src/text/shaping.rs +++ b/crates/typst-library/src/text/shaping.rs @@ -634,6 +634,11 @@ fn shape_segment( let mut buffer = UnicodeBuffer::new(); buffer.push_str(text); buffer.set_language(language(ctx.styles)); + if let Some(script) = TextElem::script_in(ctx.styles).as_custom().and_then(|script| { + rustybuzz::Script::from_iso15924_tag(Tag::from_bytes(script.as_bytes())) + }) { + buffer.set_script(script) + } buffer.set_direction(match ctx.dir { Dir::LTR => rustybuzz::Direction::LeftToRight, Dir::RTL => rustybuzz::Direction::RightToLeft, diff --git a/crates/typst/src/doc.rs b/crates/typst/src/doc.rs index 8532934cc..3eaf4c599 100644 --- a/crates/typst/src/doc.rs +++ b/crates/typst/src/doc.rs @@ -514,6 +514,45 @@ impl Glyph { } } +/// An ISO 15924-type script identifier +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct WritingScript([u8; 4], u8); + +impl WritingScript { + /// Return the script as an all lowercase string slice. + pub fn as_str(&self) -> &str { + std::str::from_utf8(&self.0[..usize::from(self.1)]).unwrap_or_default() + } + + /// Return the description of the script as raw bytes. + pub fn as_bytes(&self) -> &[u8; 4] { + &self.0 + } +} + +impl FromStr for WritingScript { + type Err = &'static str; + + /// Construct a region from its ISO 15924 code. + fn from_str(iso: &str) -> Result { + let len = iso.len(); + if matches!(len, 3..=4) && iso.is_ascii() { + let mut bytes = [b' '; 4]; + bytes[..len].copy_from_slice(iso.as_bytes()); + bytes.make_ascii_lowercase(); + Ok(Self(bytes, len as u8)) + } else { + Err("expected three or four letter script code (ISO 15924 or 'math')") + } + } +} + +cast! { + WritingScript, + self => self.as_str().into_value(), + string: EcoString => Self::from_str(&string)?, +} + /// An identifier for a natural language. #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] pub struct Lang([u8; 3], u8); diff --git a/tests/ref/text/lang.png b/tests/ref/text/lang.png index 9ec881529..a5ae89796 100644 Binary files a/tests/ref/text/lang.png and b/tests/ref/text/lang.png differ diff --git a/tests/ref/text/shaping.png b/tests/ref/text/shaping.png index 278fe8ee1..a77dda3ae 100644 Binary files a/tests/ref/text/shaping.png and b/tests/ref/text/shaping.png differ diff --git a/tests/typ/text/lang.typ b/tests/typ/text/lang.typ index a70b4d633..7f1ae1fc3 100644 --- a/tests/typ/text/lang.typ +++ b/tests/typ/text/lang.typ @@ -22,6 +22,26 @@ #text(lang: "uk")[Бб] #text(lang: "sr")[Бб] +--- +// Verify that writing script/language combination has an effect +#{ + set text(size:20pt) + set text(script: "latn", lang: "en") + [Ş ] + set text(script: "latn", lang: "ro") + [Ş ] + set text(script: "grek", lang: "ro") + [Ş ] +} + +--- +// Error: 19-23 expected string or auto, found none +#set text(script: none) + +--- +// Error: 19-23 expected three or four letter script code (ISO 15924 or 'math') +#set text(script: "ab") + --- // Error: 17-21 expected string, found none #set text(lang: none) diff --git a/tests/typ/text/shaping.typ b/tests/typ/text/shaping.typ index 3a99815bb..3a8d54110 100644 --- a/tests/typ/text/shaping.typ +++ b/tests/typ/text/shaping.typ @@ -11,6 +11,16 @@ ABCअपार्टमेंट // if we didn't separate by script. अ पा र् ट में ट +--- +// A forced `latn` script inhibits Devanagari font features. +#set text(script: "latn") +ABCअपार्टमेंट + +--- +// A forced `deva` script enables Devanagari font features. +#set text(script: "deva") +ABCअपार्टमेंट + --- // Test that RTL safe-to-break doesn't panic even though newline // doesn't exist in shaping output.