mirror of
https://github.com/typst/typst
synced 2025-05-14 17:15:28 +08:00
Fix JIS style punctuation (#3543)
This commit is contained in:
parent
decb4fd9b9
commit
086bca9576
@ -7,7 +7,7 @@ use unicode_script::{Script, UnicodeScript};
|
|||||||
|
|
||||||
use self::linebreak::{breakpoints, Breakpoint};
|
use self::linebreak::{breakpoints, Breakpoint};
|
||||||
use self::shaping::{
|
use self::shaping::{
|
||||||
is_gb_style, is_of_cj_script, shape, ShapedGlyph, ShapedText, BEGIN_PUNCT_PAT,
|
cjk_punct_style, is_of_cj_script, shape, ShapedGlyph, ShapedText, BEGIN_PUNCT_PAT,
|
||||||
END_PUNCT_PAT,
|
END_PUNCT_PAT,
|
||||||
};
|
};
|
||||||
use crate::diag::{bail, SourceResult};
|
use crate::diag::{bail, SourceResult};
|
||||||
@ -1041,7 +1041,7 @@ fn line<'a>(
|
|||||||
justify |= text.ends_with('\u{2028}');
|
justify |= text.ends_with('\u{2028}');
|
||||||
|
|
||||||
// Deal with CJK punctuation at line ends.
|
// Deal with CJK punctuation at line ends.
|
||||||
let gb_style = is_gb_style(shaped.lang, shaped.region);
|
let gb_style = cjk_punct_style(shaped.lang, shaped.region);
|
||||||
let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT)
|
let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT)
|
||||||
|| (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script));
|
|| (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script));
|
||||||
|
|
||||||
|
@ -114,18 +114,18 @@ impl ShapedGlyph {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_cjk_punctuation(&self) -> bool {
|
pub fn is_cjk_punctuation(&self) -> bool {
|
||||||
self.is_cjk_left_aligned_punctuation(true)
|
self.is_cjk_left_aligned_punctuation(CjkPunctStyle::Gb)
|
||||||
|| self.is_cjk_right_aligned_punctuation()
|
|| self.is_cjk_right_aligned_punctuation()
|
||||||
|| self.is_cjk_center_aligned_punctuation(true)
|
|| self.is_cjk_center_aligned_punctuation(CjkPunctStyle::Gb)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
|
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
|
||||||
pub fn is_cjk_left_aligned_punctuation(&self, gb_style: bool) -> bool {
|
pub fn is_cjk_left_aligned_punctuation(&self, style: CjkPunctStyle) -> bool {
|
||||||
is_cjk_left_aligned_punctuation(
|
is_cjk_left_aligned_punctuation(
|
||||||
self.c,
|
self.c,
|
||||||
self.x_advance,
|
self.x_advance,
|
||||||
self.stretchability(),
|
self.stretchability(),
|
||||||
gb_style,
|
style,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,8 +135,8 @@ impl ShapedGlyph {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
|
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
|
||||||
pub fn is_cjk_center_aligned_punctuation(&self, gb_style: bool) -> bool {
|
pub fn is_cjk_center_aligned_punctuation(&self, style: CjkPunctStyle) -> bool {
|
||||||
is_cjk_center_aligned_punctuation(self.c, gb_style)
|
is_cjk_center_aligned_punctuation(self.c, style)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether the glyph is a western letter or number.
|
/// Whether the glyph is a western letter or number.
|
||||||
@ -146,7 +146,7 @@ impl ShapedGlyph {
|
|||||||
|| self.c.is_ascii_digit()
|
|| self.c.is_ascii_digit()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn base_adjustability(&self, gb_style: bool) -> Adjustability {
|
pub fn base_adjustability(&self, style: CjkPunctStyle) -> Adjustability {
|
||||||
let width = self.x_advance;
|
let width = self.x_advance;
|
||||||
if self.is_space() {
|
if self.is_space() {
|
||||||
Adjustability {
|
Adjustability {
|
||||||
@ -154,7 +154,7 @@ impl ShapedGlyph {
|
|||||||
stretchability: (Em::zero(), width / 2.0),
|
stretchability: (Em::zero(), width / 2.0),
|
||||||
shrinkability: (Em::zero(), width / 3.0),
|
shrinkability: (Em::zero(), width / 3.0),
|
||||||
}
|
}
|
||||||
} else if self.is_cjk_left_aligned_punctuation(gb_style) {
|
} else if self.is_cjk_left_aligned_punctuation(style) {
|
||||||
Adjustability {
|
Adjustability {
|
||||||
stretchability: (Em::zero(), Em::zero()),
|
stretchability: (Em::zero(), Em::zero()),
|
||||||
shrinkability: (Em::zero(), width / 2.0),
|
shrinkability: (Em::zero(), width / 2.0),
|
||||||
@ -164,7 +164,7 @@ impl ShapedGlyph {
|
|||||||
stretchability: (Em::zero(), Em::zero()),
|
stretchability: (Em::zero(), Em::zero()),
|
||||||
shrinkability: (width / 2.0, Em::zero()),
|
shrinkability: (width / 2.0, Em::zero()),
|
||||||
}
|
}
|
||||||
} else if self.is_cjk_center_aligned_punctuation(gb_style) {
|
} else if self.is_cjk_center_aligned_punctuation(style) {
|
||||||
Adjustability {
|
Adjustability {
|
||||||
stretchability: (Em::zero(), Em::zero()),
|
stretchability: (Em::zero(), Em::zero()),
|
||||||
shrinkability: (width / 4.0, width / 4.0),
|
shrinkability: (width / 4.0, width / 4.0),
|
||||||
@ -883,16 +883,16 @@ fn track_and_space(ctx: &mut ShapingContext) {
|
|||||||
/// Calculate stretchability and shrinkability of each glyph,
|
/// Calculate stretchability and shrinkability of each glyph,
|
||||||
/// and CJK punctuation adjustments according to Chinese Layout Requirements.
|
/// and CJK punctuation adjustments according to Chinese Layout Requirements.
|
||||||
fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option<Region>) {
|
fn calculate_adjustability(ctx: &mut ShapingContext, lang: Lang, region: Option<Region>) {
|
||||||
let gb_style = is_gb_style(lang, region);
|
let style = cjk_punct_style(lang, region);
|
||||||
|
|
||||||
for glyph in &mut ctx.glyphs {
|
for glyph in &mut ctx.glyphs {
|
||||||
glyph.adjustability = glyph.base_adjustability(gb_style);
|
glyph.adjustability = glyph.base_adjustability(style);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut glyphs = ctx.glyphs.iter_mut().peekable();
|
let mut glyphs = ctx.glyphs.iter_mut().peekable();
|
||||||
while let Some(glyph) = glyphs.next() {
|
while let Some(glyph) = glyphs.next() {
|
||||||
// Only GB style needs further adjustment.
|
// CNS style needs not further adjustment.
|
||||||
if glyph.is_cjk_punctuation() && !gb_style {
|
if glyph.is_cjk_punctuation() && matches!(style, CjkPunctStyle::Cns) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -976,11 +976,23 @@ pub(super) const END_PUNCT_PAT: &[char] = &[
|
|||||||
'〗', '〕', ']', '}', '?', '!',
|
'〗', '〕', ']', '}', '?', '!',
|
||||||
];
|
];
|
||||||
|
|
||||||
pub(super) fn is_gb_style(lang: Lang, region: Option<Region>) -> bool {
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
// Most CJK variants, including zh-CN, ja-JP, zh-SG, zh-MY use GB-style punctuation,
|
pub(super) enum CjkPunctStyle {
|
||||||
// while zh-HK and zh-TW use alternative style. We default to use GB-style.
|
/// Standard GB/T 15834-2011, used mostly in mainland China.
|
||||||
!(lang == Lang::CHINESE
|
Gb,
|
||||||
&& matches!(region.as_ref().map(Region::as_str), Some("TW" | "HK")))
|
/// Standard by Taiwan Ministry of Education, used in Taiwan and Hong Kong.
|
||||||
|
Cns,
|
||||||
|
/// Standard JIS X 4051, used in Japan.
|
||||||
|
Jis,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) fn cjk_punct_style(lang: Lang, region: Option<Region>) -> CjkPunctStyle {
|
||||||
|
match (lang, region.as_ref().map(Region::as_str)) {
|
||||||
|
(Lang::CHINESE, Some("TW" | "HK")) => CjkPunctStyle::Cns,
|
||||||
|
(Lang::JAPANESE, _) => CjkPunctStyle::Jis,
|
||||||
|
// zh-CN, zh-SG, zh-MY use GB-style punctuation,
|
||||||
|
_ => CjkPunctStyle::Gb,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether the glyph is a space.
|
/// Whether the glyph is a space.
|
||||||
@ -1007,16 +1019,22 @@ fn is_cjk_left_aligned_punctuation(
|
|||||||
c: char,
|
c: char,
|
||||||
x_advance: Em,
|
x_advance: Em,
|
||||||
stretchability: (Em, Em),
|
stretchability: (Em, Em),
|
||||||
gb_style: bool,
|
style: CjkPunctStyle,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
|
use CjkPunctStyle::*;
|
||||||
|
|
||||||
// CJK quotation marks shares codepoints with latin quotation marks.
|
// CJK quotation marks shares codepoints with latin quotation marks.
|
||||||
// But only the CJK ones have full width.
|
// But only the CJK ones have full width.
|
||||||
if matches!(c, '”' | '’') && x_advance + stretchability.1 == Em::one() {
|
if matches!(c, '”' | '’') && x_advance + stretchability.1 == Em::one() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if gb_style && matches!(c, ',' | '。' | '.' | '、' | ':' | ';' | '!' | '?')
|
if matches!(style, Gb | Jis) && matches!(c, ',' | '。' | '.' | '、' | ':' | ';')
|
||||||
{
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if matches!(style, Gb) && matches!(c, '?' | '!') {
|
||||||
// In GB style, exclamations and question marks are also left aligned and can be adjusted.
|
// In GB style, exclamations and question marks are also left aligned and can be adjusted.
|
||||||
// Note that they are not adjustable in other styles.
|
// Note that they are not adjustable in other styles.
|
||||||
return true;
|
return true;
|
||||||
@ -1042,13 +1060,16 @@ fn is_cjk_right_aligned_punctuation(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
|
/// See <https://www.w3.org/TR/clreq/#punctuation_width_adjustment>
|
||||||
fn is_cjk_center_aligned_punctuation(c: char, gb_style: bool) -> bool {
|
fn is_cjk_center_aligned_punctuation(c: char, style: CjkPunctStyle) -> bool {
|
||||||
if !gb_style && matches!(c, ',' | '。' | '.' | '、' | ':' | ';') {
|
if matches!(style, CjkPunctStyle::Cns)
|
||||||
|
&& matches!(c, ',' | '。' | '.' | '、' | ':' | ';')
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// U+30FB: Katakana Middle Dot
|
// U+30FB: Katakana Middle Dot
|
||||||
matches!(c, '\u{30FB}')
|
// U+00B7: Middle Dot
|
||||||
|
matches!(c, '\u{30FB}' | '\u{00B7}')
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether the glyph is justifiable.
|
/// Whether the glyph is justifiable.
|
||||||
@ -1064,10 +1085,11 @@ fn is_justifiable(
|
|||||||
x_advance: Em,
|
x_advance: Em,
|
||||||
stretchability: (Em, Em),
|
stretchability: (Em, Em),
|
||||||
) -> bool {
|
) -> bool {
|
||||||
// GB style is not relevant here.
|
// punctuation style is not relevant here.
|
||||||
|
let style = CjkPunctStyle::Gb;
|
||||||
is_space(c)
|
is_space(c)
|
||||||
|| is_cj_script(c, script)
|
|| is_cj_script(c, script)
|
||||||
|| is_cjk_left_aligned_punctuation(c, x_advance, stretchability, true)
|
|| is_cjk_left_aligned_punctuation(c, x_advance, stretchability, style)
|
||||||
|| is_cjk_right_aligned_punctuation(c, x_advance, stretchability)
|
|| is_cjk_right_aligned_punctuation(c, x_advance, stretchability)
|
||||||
|| is_cjk_center_aligned_punctuation(c, true)
|
|| is_cjk_center_aligned_punctuation(c, style)
|
||||||
}
|
}
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 28 KiB |
@ -9,6 +9,12 @@
|
|||||||
// because zh-TW does not follow GB style
|
// because zh-TW does not follow GB style
|
||||||
#set text(lang: "zh", region: "TW", font: "Noto Serif CJK TC")
|
#set text(lang: "zh", region: "TW", font: "Noto Serif CJK TC")
|
||||||
原來,你也玩《原神》! ?
|
原來,你也玩《原神》! ?
|
||||||
|
|
||||||
|
#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC")
|
||||||
|
「真的吗?」
|
||||||
|
|
||||||
|
#set text(lang: "ja", font: "Noto Serif CJK JP")
|
||||||
|
「本当に?」
|
||||||
---
|
---
|
||||||
|
|
||||||
#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC")
|
#set text(lang: "zh", region: "CN", font: "Noto Serif CJK SC")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user