Support OpenType writing script (#1697)

This commit is contained in:
damaxwell 2023-07-19 02:25:24 -08:00 committed by GitHub
parent f39bfa4762
commit 8a57395ee4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 112 additions and 6 deletions

View File

@ -750,6 +750,7 @@ fn shape_range<'a>(
spans: &SpanMapper, spans: &SpanMapper,
styles: StyleChain<'a>, styles: StyleChain<'a>,
) { ) {
let script = TextElem::script_in(styles);
let lang = TextElem::lang_in(styles); let lang = TextElem::lang_in(styles);
let region = TextElem::region_in(styles); let region = TextElem::region_in(styles);
let mut process = |range: Range, level: BidiLevel| { let mut process = |range: Range, level: BidiLevel| {
@ -763,25 +764,31 @@ fn shape_range<'a>(
let mut prev_script = Script::Unknown; let mut prev_script = Script::Unknown;
let mut cursor = range.start; let mut cursor = range.start;
// Group by embedding level and script. // Group by embedding level and script. If the text's script is explicitly
// set (rather than inferred from the glpyhs), we keep the script at an
// unchanging `Script::Unknown` so that only level changes cause breaks.
for i in range.clone() { for i in range.clone() {
if !bidi.text.is_char_boundary(i) { if !bidi.text.is_char_boundary(i) {
continue; continue;
} }
let level = bidi.levels[i]; let level = bidi.levels[i];
let script = let curr_script = match script {
bidi.text[i..].chars().next().map_or(Script::Unknown, |c| c.script()); Smart::Auto => {
bidi.text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
}
Smart::Custom(_) => Script::Unknown,
};
if level != prev_level || !is_compatible(script, prev_script) { if level != prev_level || !is_compatible(curr_script, prev_script) {
if cursor < i { if cursor < i {
process(cursor..i, prev_level); process(cursor..i, prev_level);
} }
cursor = i; cursor = i;
prev_level = level; prev_level = level;
prev_script = script; prev_script = curr_script;
} else if is_generic_script(prev_script) { } else if is_generic_script(prev_script) {
prev_script = script; prev_script = curr_script;
} }
} }

View File

@ -265,6 +265,31 @@ pub struct TextElem {
#[default(BottomEdge::Metric(BottomEdgeMetric::Baseline))] #[default(BottomEdge::Metric(BottomEdgeMetric::Baseline))]
pub bottom_edge: BottomEdge, pub bottom_edge: BottomEdge,
/// The OpenType writing script setting.
///
/// The combination of `{script}` and `{lang}` determine how
/// font features, such as glyph substitution, are implemented.
/// Frequently the value is a modified (all-lowercase) ISO 15924 script identifier, and
/// the `math` writing script is used for features appropriate
/// for mathematical symbols.
///
/// When set to `{auto}`, the default and recommended setting,
/// an appropriate script is chosen for each block of characters
/// sharing a common Unicode script property.
///
/// ```example
/// #let scedilla = [Ş]
/// #set text(font: "Linux Libertine", size: 20pt)
/// #scedilla // S with a cedilla
///
/// #set text(script: "latn", lang: "ro")
/// #scedilla // S with a subscript comma
///
/// #set text(script: "grek", lang: "ro")
/// #scedilla // S with a cedilla
/// ```
pub script: Smart<WritingScript>,
/// An [ISO 639-1/2/3 language code.](https://en.wikipedia.org/wiki/ISO_639) /// An [ISO 639-1/2/3 language code.](https://en.wikipedia.org/wiki/ISO_639)
/// ///
/// Setting the correct language affects various parts of Typst: /// Setting the correct language affects various parts of Typst:

View File

@ -634,6 +634,11 @@ fn shape_segment(
let mut buffer = UnicodeBuffer::new(); let mut buffer = UnicodeBuffer::new();
buffer.push_str(text); buffer.push_str(text);
buffer.set_language(language(ctx.styles)); buffer.set_language(language(ctx.styles));
if let Some(script) = TextElem::script_in(ctx.styles).as_custom().and_then(|script| {
rustybuzz::Script::from_iso15924_tag(Tag::from_bytes(script.as_bytes()))
}) {
buffer.set_script(script)
}
buffer.set_direction(match ctx.dir { buffer.set_direction(match ctx.dir {
Dir::LTR => rustybuzz::Direction::LeftToRight, Dir::LTR => rustybuzz::Direction::LeftToRight,
Dir::RTL => rustybuzz::Direction::RightToLeft, Dir::RTL => rustybuzz::Direction::RightToLeft,

View File

@ -514,6 +514,45 @@ impl Glyph {
} }
} }
/// An ISO 15924-type script identifier
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct WritingScript([u8; 4], u8);
impl WritingScript {
/// Return the script as an all lowercase string slice.
pub fn as_str(&self) -> &str {
std::str::from_utf8(&self.0[..usize::from(self.1)]).unwrap_or_default()
}
/// Return the description of the script as raw bytes.
pub fn as_bytes(&self) -> &[u8; 4] {
&self.0
}
}
impl FromStr for WritingScript {
type Err = &'static str;
/// Construct a region from its ISO 15924 code.
fn from_str(iso: &str) -> Result<Self, Self::Err> {
let len = iso.len();
if matches!(len, 3..=4) && iso.is_ascii() {
let mut bytes = [b' '; 4];
bytes[..len].copy_from_slice(iso.as_bytes());
bytes.make_ascii_lowercase();
Ok(Self(bytes, len as u8))
} else {
Err("expected three or four letter script code (ISO 15924 or 'math')")
}
}
}
cast! {
WritingScript,
self => self.as_str().into_value(),
string: EcoString => Self::from_str(&string)?,
}
/// An identifier for a natural language. /// An identifier for a natural language.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Lang([u8; 3], u8); pub struct Lang([u8; 3], u8);

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.1 KiB

After

Width:  |  Height:  |  Size: 5.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

View File

@ -22,6 +22,26 @@
#text(lang: "uk")[Бб] #text(lang: "uk")[Бб]
#text(lang: "sr")[Бб] #text(lang: "sr")[Бб]
---
// Verify that writing script/language combination has an effect
#{
set text(size:20pt)
set text(script: "latn", lang: "en")
[Ş ]
set text(script: "latn", lang: "ro")
[Ş ]
set text(script: "grek", lang: "ro")
[Ş ]
}
---
// Error: 19-23 expected string or auto, found none
#set text(script: none)
---
// Error: 19-23 expected three or four letter script code (ISO 15924 or 'math')
#set text(script: "ab")
--- ---
// Error: 17-21 expected string, found none // Error: 17-21 expected string, found none
#set text(lang: none) #set text(lang: none)

View File

@ -11,6 +11,16 @@ ABCअपार्टमेंट
// if we didn't separate by script. // if we didn't separate by script.
पा र् में पा र् में
---
// A forced `latn` script inhibits Devanagari font features.
#set text(script: "latn")
ABCअपार्टमेंट
---
// A forced `deva` script enables Devanagari font features.
#set text(script: "deva")
ABCअपार्टमेंट
--- ---
// Test that RTL safe-to-break doesn't panic even though newline // Test that RTL safe-to-break doesn't panic even though newline
// doesn't exist in shaping output. // doesn't exist in shaping output.