Fix JIS style punctuation (#3543)

This commit is contained in:
Peng Guanwen 2024-03-04 17:02:25 +08:00 committed by GitHub
parent decb4fd9b9
commit 086bca9576
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 56 additions and 28 deletions

View File

@ -7,7 +7,7 @@ use unicode_script::{Script, UnicodeScript};
use self::linebreak::{breakpoints, Breakpoint}; use self::linebreak::{breakpoints, Breakpoint};
use self::shaping::{ use self::shaping::{
is_gb_style, is_of_cj_script, shape, ShapedGlyph, ShapedText, BEGIN_PUNCT_PAT, cjk_punct_style, is_of_cj_script, shape, ShapedGlyph, ShapedText, BEGIN_PUNCT_PAT,
END_PUNCT_PAT, END_PUNCT_PAT,
}; };
use crate::diag::{bail, SourceResult}; use crate::diag::{bail, SourceResult};
@ -1041,7 +1041,7 @@ fn line<'a>(
justify |= text.ends_with('\u{2028}'); justify |= text.ends_with('\u{2028}');
// Deal with CJK punctuation at line ends. // Deal with CJK punctuation at line ends.
let gb_style = is_gb_style(shaped.lang, shaped.region); let gb_style = cjk_punct_style(shaped.lang, shaped.region);
let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT) let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT)
|| (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script)); || (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script));

View File

@ -114,18 +114,18 @@ impl ShapedGlyph {
} }
pub fn is_cjk_punctuation(&self) -> bool { pub fn is_cjk_punctuation(&self) -> bool {
self.is_cjk_left_aligned_punctuation(true) self.is_cjk_left_aligned_punctuation(CjkPunctStyle::Gb)
|| self.is_cjk_right_aligned_punctuation() || self.is_cjk_right_aligned_punctuation()
|| self.is_cjk_center_aligned_punctuation(true) || self.is_cjk_center_aligned_punctuation(CjkPunctStyle::Gb)
} }
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment> /// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
pub fn is_cjk_left_aligned_punctuation(&self, gb_style: bool) -> bool { pub fn is_cjk_left_aligned_punctuation(&self, style: CjkPunctStyle) -> bool {
is_cjk_left_aligned_punctuation( is_cjk_left_aligned_punctuation(
self.c, self.c,
self.x_advance, self.x_advance,
self.stretchability(), self.stretchability(),
gb_style, style,
) )
} }
@ -135,8 +135,8 @@ impl ShapedGlyph {
} }
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment> /// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
pub fn is_cjk_center_aligned_punctuation(&self, gb_style: bool) -> bool { pub fn is_cjk_center_aligned_punctuation(&self, style: CjkPunctStyle) -> bool {
is_cjk_center_aligned_punctuation(self.c, gb_style) is_cjk_center_aligned_punctuation(self.c, style)
} }
/// Whether the glyph is a western letter or number. /// Whether the glyph is a western letter or number.
@ -146,7 +146,7 @@ impl ShapedGlyph {
|| self.c.is_ascii_digit() || self.c.is_ascii_digit()
} }
pub fn base_adjustability(&self, gb_style: bool) -> Adjustability { pub fn base_adjustability(&self, style: CjkPunctStyle) -> Adjustability {
let width = self.x_advance; let width = self.x_advance;
if self.is_space() { if self.is_space() {
Adjustability { Adjustability {
@ -154,7 +154,7 @@ impl ShapedGlyph {
stretchability: (Em::zero(), width / 2.0), stretchability: (Em::zero(), width / 2.0),
shrinkability: (Em::zero(), width / 3.0), shrinkability: (Em::zero(), width / 3.0),
} }
} else if self.is_cjk_left_aligned_punctuation(gb_style) { } else if self.is_cjk_left_aligned_punctuation(style) {
Adjustability { Adjustability {
stretchability: (Em::zero(), Em::zero()), stretchability: (Em::zero(), Em::zero()),
shrinkability: (Em::zero(), width / 2.0), shrinkability: (Em::zero(), width / 2.0),
@ -164,7 +164,7 @@ impl ShapedGlyph {
stretchability: (Em::zero(), Em::zero()), stretchability: (Em::zero(), Em::zero()),
shrinkability: (width / 2.0, Em::zero()), shrinkability: (width / 2.0, Em::zero()),
} }
} else if self.is_cjk_center_aligned_punctuation(gb_style) { } else if self.is_cjk_center_aligned_punctuation(style) {
Adjustability { Adjustability {
stretchability: (Em::zero(), Em::zero()), stretchability: (Em::zero(), Em::zero()),
shrinkability: (width / 4.0, width / 4.0), shrinkability: (width / 4.0, width / 4.0),
@ -883,16 +883,16 @@ fn track_and_space(ctx: &mut ShapingContext) {
/// Calculate stretchability and shrinkability of each glyph, /// Calculate stretchability and shrinkability of each glyph,
/// and CJK punctuation adjustments according to Chinese Layout Requirements. /// and CJK punctuation adjustments according to Chinese Layout Requirements.
fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option<Region>) { fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option<Region>) {
let gb_style = is_gb_style(lang, region); let style = cjk_punct_style(lang, region);
for glyph in &mut ctx.glyphs { for glyph in &mut ctx.glyphs {
glyph.adjustability = glyph.base_adjustability(gb_style); glyph.adjustability = glyph.base_adjustability(style);
} }
let mut glyphs = ctx.glyphs.iter_mut().peekable(); let mut glyphs = ctx.glyphs.iter_mut().peekable();
while let Some(glyph) = glyphs.next() { while let Some(glyph) = glyphs.next() {
// Only GB style needs further adjustment. // CNS style needs not further adjustment.
if glyph.is_cjk_punctuation() && !gb_style { if glyph.is_cjk_punctuation() && matches!(style, CjkPunctStyle::Cns) {
continue; continue;
} }
@ -976,11 +976,23 @@ pub(super) const END_PUNCT_PAT: &[char] = &[
'〗', '', '', '', '', '', '〗', '', '', '', '', '',
]; ];
pub(super) fn is_gb_style(lang: Lang, region: Option<Region>) -> bool { #[derive(Debug, Clone, Copy, PartialEq, Eq)]
// Most CJK variants, including zh-CN, ja-JP, zh-SG, zh-MY use GB-style punctuation, pub(super) enum CjkPunctStyle {
// while zh-HK and zh-TW use alternative style. We default to use GB-style. /// Standard GB/T 15834-2011, used mostly in mainland China.
!(lang == Lang::CHINESE Gb,
&& matches!(region.as_ref().map(Region::as_str), Some("TW" | "HK"))) /// Standard by Taiwan Ministry of Education, used in Taiwan and Hong Kong.
Cns,
/// Standard JIS X 4051, used in Japan.
Jis,
}
pub(super) fn cjk_punct_style(lang: Lang, region: Option<Region>) -> CjkPunctStyle {
match (lang, region.as_ref().map(Region::as_str)) {
(Lang::CHINESE, Some("TW" | "HK")) => CjkPunctStyle::Cns,
(Lang::JAPANESE, _) => CjkPunctStyle::Jis,
// zh-CN, zh-SG, zh-MY use GB-style punctuation,
_ => CjkPunctStyle::Gb,
}
} }
/// Whether the glyph is a space. /// Whether the glyph is a space.
@ -1007,16 +1019,22 @@ fn is_cjk_left_aligned_punctuation(
c: char, c: char,
x_advance: Em, x_advance: Em,
stretchability: (Em, Em), stretchability: (Em, Em),
gb_style: bool, style: CjkPunctStyle,
) -> bool { ) -> bool {
use CjkPunctStyle::*;
// CJK quotation marks shares codepoints with latin quotation marks. // CJK quotation marks shares codepoints with latin quotation marks.
// But only the CJK ones have full width. // But only the CJK ones have full width.
if matches!(c, '”' | '') && x_advance + stretchability.1 == Em::one() { if matches!(c, '”' | '') && x_advance + stretchability.1 == Em::one() {
return true; return true;
} }
if gb_style && matches!(c, '' | '。' | '' | '、' | '' | '' | '' | '') if matches!(style, Gb | Jis) && matches!(c, '' | '。' | '' | '、' | '' | '')
{ {
return true;
}
if matches!(style, Gb) && matches!(c, '' | '') {
// In GB style, exclamations and question marks are also left aligned and can be adjusted. // In GB style, exclamations and question marks are also left aligned and can be adjusted.
// Note that they are not adjustable in other styles. // Note that they are not adjustable in other styles.
return true; return true;
@ -1042,13 +1060,16 @@ fn is_cjk_right_aligned_punctuation(
} }
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment> /// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
fn is_cjk_center_aligned_punctuation(c: char, gb_style: bool) -> bool { fn is_cjk_center_aligned_punctuation(c: char, style: CjkPunctStyle) -> bool {
if !gb_style && matches!(c, '' | '。' | '' | '、' | '' | '') { if matches!(style, CjkPunctStyle::Cns)
&& matches!(c, '' | '。' | '' | '、' | '' | '')
{
return true; return true;
} }
// U+30FB: Katakana Middle Dot // U+30FB: Katakana Middle Dot
matches!(c, '\u{30FB}') // U+00B7: Middle Dot
matches!(c, '\u{30FB}' | '\u{00B7}')
} }
/// Whether the glyph is justifiable. /// Whether the glyph is justifiable.
@ -1064,10 +1085,11 @@ fn is_justifiable(
x_advance: Em, x_advance: Em,
stretchability: (Em, Em), stretchability: (Em, Em),
) -> bool { ) -> bool {
// GB style is not relevant here. // punctuation style is not relevant here.
let style = CjkPunctStyle::Gb;
is_space(c) is_space(c)
|| is_cj_script(c, script) || is_cj_script(c, script)
|| is_cjk_left_aligned_punctuation(c, x_advance, stretchability, true) || is_cjk_left_aligned_punctuation(c, x_advance, stretchability, style)
|| is_cjk_right_aligned_punctuation(c, x_advance, stretchability) || is_cjk_right_aligned_punctuation(c, x_advance, stretchability)
|| is_cjk_center_aligned_punctuation(c, true) || is_cjk_center_aligned_punctuation(c, style)
} }

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 28 KiB

View File

@ -9,6 +9,12 @@
// because zh-TW does not follow GB style // because zh-TW does not follow GB style
#set text(lang: "zh", region: "TW", font: "Noto Serif CJK TC") #set text(lang: "zh", region: "TW", font: "Noto Serif CJK TC")
原來,你也玩《原神》! 原來,你也玩《原神》!
#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC")
「真的吗?」
#set text(lang: "ja", font: "Noto Serif CJK JP")
「本当に?」
--- ---
#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC") #set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC")