diff --git a/crates/typst/src/layout/inline/mod.rs b/crates/typst/src/layout/inline/mod.rs index 6add43101..667421fd5 100644 --- a/crates/typst/src/layout/inline/mod.rs +++ b/crates/typst/src/layout/inline/mod.rs @@ -7,7 +7,7 @@ use unicode_script::{Script, UnicodeScript}; use self::linebreak::{breakpoints, Breakpoint}; use self::shaping::{ - is_gb_style, is_of_cj_script, shape, ShapedGlyph, ShapedText, BEGIN_PUNCT_PAT, + cjk_punct_style, is_of_cj_script, shape, ShapedGlyph, ShapedText, BEGIN_PUNCT_PAT, END_PUNCT_PAT, }; use crate::diag::{bail, SourceResult}; @@ -1041,7 +1041,7 @@ fn line<'a>( justify |= text.ends_with('\u{2028}'); // Deal with CJK punctuation at line ends. - let gb_style = is_gb_style(shaped.lang, shaped.region); + let gb_style = cjk_punct_style(shaped.lang, shaped.region); let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT) || (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script)); diff --git a/crates/typst/src/layout/inline/shaping.rs b/crates/typst/src/layout/inline/shaping.rs index 586d89e7d..c346233e3 100644 --- a/crates/typst/src/layout/inline/shaping.rs +++ b/crates/typst/src/layout/inline/shaping.rs @@ -114,18 +114,18 @@ impl ShapedGlyph { } pub fn is_cjk_punctuation(&self) -> bool { - self.is_cjk_left_aligned_punctuation(true) + self.is_cjk_left_aligned_punctuation(CjkPunctStyle::Gb) || self.is_cjk_right_aligned_punctuation() - || self.is_cjk_center_aligned_punctuation(true) + || self.is_cjk_center_aligned_punctuation(CjkPunctStyle::Gb) } /// See - pub fn is_cjk_left_aligned_punctuation(&self, gb_style: bool) -> bool { + pub fn is_cjk_left_aligned_punctuation(&self, style: CjkPunctStyle) -> bool { is_cjk_left_aligned_punctuation( self.c, self.x_advance, self.stretchability(), - gb_style, + style, ) } @@ -135,8 +135,8 @@ impl ShapedGlyph { } /// See - pub fn is_cjk_center_aligned_punctuation(&self, gb_style: bool) -> bool { - is_cjk_center_aligned_punctuation(self.c, gb_style) + pub fn is_cjk_center_aligned_punctuation(&self, style: CjkPunctStyle) -> bool { + is_cjk_center_aligned_punctuation(self.c, style) } /// Whether the glyph is a western letter or number. @@ -146,7 +146,7 @@ impl ShapedGlyph { || self.c.is_ascii_digit() } - pub fn base_adjustability(&self, gb_style: bool) -> Adjustability { + pub fn base_adjustability(&self, style: CjkPunctStyle) -> Adjustability { let width = self.x_advance; if self.is_space() { Adjustability { @@ -154,7 +154,7 @@ impl ShapedGlyph { stretchability: (Em::zero(), width / 2.0), shrinkability: (Em::zero(), width / 3.0), } - } else if self.is_cjk_left_aligned_punctuation(gb_style) { + } else if self.is_cjk_left_aligned_punctuation(style) { Adjustability { stretchability: (Em::zero(), Em::zero()), shrinkability: (Em::zero(), width / 2.0), @@ -164,7 +164,7 @@ impl ShapedGlyph { stretchability: (Em::zero(), Em::zero()), shrinkability: (width / 2.0, Em::zero()), } - } else if self.is_cjk_center_aligned_punctuation(gb_style) { + } else if self.is_cjk_center_aligned_punctuation(style) { Adjustability { stretchability: (Em::zero(), Em::zero()), shrinkability: (width / 4.0, width / 4.0), @@ -883,16 +883,16 @@ fn track_and_space(ctx: &mut ShapingContext) { /// Calculate stretchability and shrinkability of each glyph, /// and CJK punctuation adjustments according to Chinese Layout Requirements. fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option) { - let gb_style = is_gb_style(lang, region); + let style = cjk_punct_style(lang, region); for glyph in &mut ctx.glyphs { - glyph.adjustability = glyph.base_adjustability(gb_style); + glyph.adjustability = glyph.base_adjustability(style); } let mut glyphs = ctx.glyphs.iter_mut().peekable(); while let Some(glyph) = glyphs.next() { - // Only GB style needs further adjustment. - if glyph.is_cjk_punctuation() && !gb_style { + // CNS style needs not further adjustment. + if glyph.is_cjk_punctuation() && matches!(style, CjkPunctStyle::Cns) { continue; } @@ -976,11 +976,23 @@ pub(super) const END_PUNCT_PAT: &[char] = &[ '〗', '〕', ']', '}', '?', '!', ]; -pub(super) fn is_gb_style(lang: Lang, region: Option) -> bool { - // Most CJK variants, including zh-CN, ja-JP, zh-SG, zh-MY use GB-style punctuation, - // while zh-HK and zh-TW use alternative style. We default to use GB-style. - !(lang == Lang::CHINESE - && matches!(region.as_ref().map(Region::as_str), Some("TW" | "HK"))) +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum CjkPunctStyle { + /// Standard GB/T 15834-2011, used mostly in mainland China. + Gb, + /// Standard by Taiwan Ministry of Education, used in Taiwan and Hong Kong. + Cns, + /// Standard JIS X 4051, used in Japan. + Jis, +} + +pub(super) fn cjk_punct_style(lang: Lang, region: Option) -> CjkPunctStyle { + match (lang, region.as_ref().map(Region::as_str)) { + (Lang::CHINESE, Some("TW" | "HK")) => CjkPunctStyle::Cns, + (Lang::JAPANESE, _) => CjkPunctStyle::Jis, + // zh-CN, zh-SG, zh-MY use GB-style punctuation, + _ => CjkPunctStyle::Gb, + } } /// Whether the glyph is a space. @@ -1007,16 +1019,22 @@ fn is_cjk_left_aligned_punctuation( c: char, x_advance: Em, stretchability: (Em, Em), - gb_style: bool, + style: CjkPunctStyle, ) -> bool { + use CjkPunctStyle::*; + // CJK quotation marks shares codepoints with latin quotation marks. // But only the CJK ones have full width. if matches!(c, '”' | '’') && x_advance + stretchability.1 == Em::one() { return true; } - if gb_style && matches!(c, ',' | '。' | '.' | '、' | ':' | ';' | '!' | '?') + if matches!(style, Gb | Jis) && matches!(c, ',' | '。' | '.' | '、' | ':' | ';') { + return true; + } + + if matches!(style, Gb) && matches!(c, '?' | '!') { // In GB style, exclamations and question marks are also left aligned and can be adjusted. // Note that they are not adjustable in other styles. return true; @@ -1042,13 +1060,16 @@ fn is_cjk_right_aligned_punctuation( } /// See -fn is_cjk_center_aligned_punctuation(c: char, gb_style: bool) -> bool { - if !gb_style && matches!(c, ',' | '。' | '.' | '、' | ':' | ';') { +fn is_cjk_center_aligned_punctuation(c: char, style: CjkPunctStyle) -> bool { + if matches!(style, CjkPunctStyle::Cns) + && matches!(c, ',' | '。' | '.' | '、' | ':' | ';') + { return true; } // U+30FB: Katakana Middle Dot - matches!(c, '\u{30FB}') + // U+00B7: Middle Dot + matches!(c, '\u{30FB}' | '\u{00B7}') } /// Whether the glyph is justifiable. @@ -1064,10 +1085,11 @@ fn is_justifiable( x_advance: Em, stretchability: (Em, Em), ) -> bool { - // GB style is not relevant here. + // punctuation style is not relevant here. + let style = CjkPunctStyle::Gb; is_space(c) || is_cj_script(c, script) - || is_cjk_left_aligned_punctuation(c, x_advance, stretchability, true) + || is_cjk_left_aligned_punctuation(c, x_advance, stretchability, style) || is_cjk_right_aligned_punctuation(c, x_advance, stretchability) - || is_cjk_center_aligned_punctuation(c, true) + || is_cjk_center_aligned_punctuation(c, style) } diff --git a/tests/ref/layout/cjk-punctuation-adjustment.png b/tests/ref/layout/cjk-punctuation-adjustment.png index 711797514..1da08f23f 100644 Binary files a/tests/ref/layout/cjk-punctuation-adjustment.png and b/tests/ref/layout/cjk-punctuation-adjustment.png differ diff --git a/tests/typ/layout/cjk-punctuation-adjustment.typ b/tests/typ/layout/cjk-punctuation-adjustment.typ index 0f1f28946..88ee9560d 100644 --- a/tests/typ/layout/cjk-punctuation-adjustment.typ +++ b/tests/typ/layout/cjk-punctuation-adjustment.typ @@ -9,6 +9,12 @@ // because zh-TW does not follow GB style #set text(lang: "zh", region: "TW", font: "Noto Serif CJK TC") 原來,你也玩《原神》! ? + +#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC") +「真的吗?」 + +#set text(lang: "ja", font: "Noto Serif CJK JP") +「本当に?」 --- #set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC")