mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Automatically add spacing between CJK and Latin characters (#2334)
This commit is contained in:
parent
a59666369b
commit
e4d9db83ea
@ -16,8 +16,8 @@ use crate::layout::AlignElem;
|
||||
use crate::math::EquationElem;
|
||||
use crate::prelude::*;
|
||||
use crate::text::{
|
||||
is_gb_style, shape, LinebreakElem, Quoter, Quotes, ShapedText, SmartquoteElem,
|
||||
SpaceElem, TextElem,
|
||||
char_is_cjk_script, is_gb_style, shape, LinebreakElem, Quoter, Quotes, ShapedGlyph,
|
||||
ShapedText, SmartquoteElem, SpaceElem, TextElem,
|
||||
};
|
||||
|
||||
/// Arranges text, spacing and inline-level elements into a paragraph.
|
||||
@ -723,6 +723,10 @@ fn prepare<'a>(
|
||||
cursor = end;
|
||||
}
|
||||
|
||||
if TextElem::cjk_latin_spacing_in(styles).is_auto() {
|
||||
add_cjk_latin_spacing(&mut items);
|
||||
}
|
||||
|
||||
Ok(Preparation {
|
||||
bidi,
|
||||
items,
|
||||
@ -736,6 +740,52 @@ fn prepare<'a>(
|
||||
})
|
||||
}
|
||||
|
||||
/// Add some spacing between Han characters and western characters.
|
||||
/// See Requirements for Chinese Text Layout, Section 3.2.2 Mixed Text Composition in Horizontal
|
||||
/// Written Mode
|
||||
fn add_cjk_latin_spacing(items: &mut [Item]) {
|
||||
let mut items = items.iter_mut().peekable();
|
||||
let mut prev: Option<&ShapedGlyph> = None;
|
||||
while let Some(item) = items.next() {
|
||||
let Some(text) = item.text_mut() else {
|
||||
prev = None;
|
||||
continue;
|
||||
};
|
||||
|
||||
// Since we only call this function in [`prepare`], we can assume
|
||||
// that the Cow is owned, and `to_mut` can be called without overhead.
|
||||
debug_assert!(matches!(text.glyphs, std::borrow::Cow::Owned(_)));
|
||||
let mut glyphs = text.glyphs.to_mut().iter_mut().peekable();
|
||||
|
||||
while let Some(glyph) = glyphs.next() {
|
||||
let next = glyphs.peek().map(|n| n as _).or_else(|| {
|
||||
items
|
||||
.peek()
|
||||
.and_then(|i| i.text())
|
||||
.and_then(|shaped| shaped.glyphs.first())
|
||||
});
|
||||
|
||||
// Case 1: CJK followed by a Latin character
|
||||
if glyph.is_cjk_script() && next.map_or(false, |g| g.is_letter_or_number()) {
|
||||
// The spacing is default to 1/4 em, and can be shrunk to 1/8 em.
|
||||
glyph.x_advance += Em::new(0.25);
|
||||
glyph.adjustability.shrinkability.1 += Em::new(0.125);
|
||||
text.width += Em::new(0.25).at(text.size);
|
||||
}
|
||||
|
||||
// Case 2: Latin followed by a CJK character
|
||||
if glyph.is_cjk_script() && prev.map_or(false, |g| g.is_letter_or_number()) {
|
||||
glyph.x_advance += Em::new(0.25);
|
||||
glyph.x_offset += Em::new(0.25);
|
||||
glyph.adjustability.shrinkability.0 += Em::new(0.125);
|
||||
text.width += Em::new(0.25).at(text.size);
|
||||
}
|
||||
|
||||
prev = Some(glyph);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Group a range of text by BiDi level and script, shape the runs and generate
|
||||
/// items for them.
|
||||
fn shape_range<'a>(
|
||||
@ -839,10 +889,11 @@ fn linebreak_simple<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<Line
|
||||
let mut lines = vec![];
|
||||
let mut start = 0;
|
||||
let mut last = None;
|
||||
let cjk_latin_spacing = TextElem::cjk_latin_spacing_in(p.styles).is_auto();
|
||||
|
||||
for (end, mandatory, hyphen) in breakpoints(p) {
|
||||
// Compute the line and its size.
|
||||
let mut attempt = line(vt, p, start..end, mandatory, hyphen);
|
||||
let mut attempt = line(vt, p, start..end, mandatory, hyphen, cjk_latin_spacing);
|
||||
|
||||
// If the line doesn't fit anymore, we push the last fitting attempt
|
||||
// into the stack and rebuild the line from the attempt's end. The
|
||||
@ -851,7 +902,7 @@ fn linebreak_simple<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<Line
|
||||
if let Some((last_attempt, last_end)) = last.take() {
|
||||
lines.push(last_attempt);
|
||||
start = last_end;
|
||||
attempt = line(vt, p, start..end, mandatory, hyphen);
|
||||
attempt = line(vt, p, start..end, mandatory, hyphen, cjk_latin_spacing);
|
||||
}
|
||||
}
|
||||
|
||||
@ -914,10 +965,11 @@ fn linebreak_optimized<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<L
|
||||
let mut table = vec![Entry {
|
||||
pred: 0,
|
||||
total: 0.0,
|
||||
line: line(vt, p, 0..0, false, false),
|
||||
line: line(vt, p, 0..0, false, false, false),
|
||||
}];
|
||||
|
||||
let em = TextElem::size_in(p.styles);
|
||||
let cjk_latin_spacing = TextElem::cjk_latin_spacing_in(p.styles).is_auto();
|
||||
|
||||
for (end, mandatory, hyphen) in breakpoints(p) {
|
||||
let k = table.len();
|
||||
@ -929,7 +981,7 @@ fn linebreak_optimized<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<L
|
||||
// Layout the line.
|
||||
let start = pred.line.end;
|
||||
|
||||
let attempt = line(vt, p, start..end, mandatory, hyphen);
|
||||
let attempt = line(vt, p, start..end, mandatory, hyphen, cjk_latin_spacing);
|
||||
|
||||
// Determine how much the line's spaces would need to be stretched
|
||||
// to make it the desired width.
|
||||
@ -1213,10 +1265,16 @@ fn line<'a>(
|
||||
mut range: Range,
|
||||
mandatory: bool,
|
||||
hyphen: bool,
|
||||
cjk_latin_spacing: bool,
|
||||
) -> Line<'a> {
|
||||
let end = range.end;
|
||||
let mut justify = p.justify && end < p.bidi.text.len() && !mandatory;
|
||||
|
||||
// The CJK punctuation that can appear at the beginning or end of a line.
|
||||
const BEGIN_PUNCT_PAT: &[char] = &['“', '‘', '《', '(', '『', '「'];
|
||||
const END_PUNCT_PAT: &[char] =
|
||||
&['”', '’', ',', '。', '、', ':', ';', '》', ')', '』', '」'];
|
||||
|
||||
if range.is_empty() {
|
||||
return Line {
|
||||
bidi: &p.bidi,
|
||||
@ -1256,8 +1314,8 @@ fn line<'a>(
|
||||
|
||||
// Deal with CJK punctuation at line ends.
|
||||
let gb_style = is_gb_style(shaped.lang, shaped.region);
|
||||
let end_cjk_punct = trimmed
|
||||
.ends_with(['”', '’', ',', '。', '、', ':', ';', '》', ')', '』', '」']);
|
||||
let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT)
|
||||
|| (cjk_latin_spacing && trimmed.ends_with(char_is_cjk_script));
|
||||
|
||||
// Usually, we don't want to shape an empty string because:
|
||||
// - We don't want the height of trimmed whitespace in a different
|
||||
@ -1268,21 +1326,37 @@ fn line<'a>(
|
||||
// need the shaped empty string to make the line the appropriate
|
||||
// height. That is the case exactly if the string is empty and there
|
||||
// are no other items in the line.
|
||||
if hyphen || start + shaped.text.len() > range.end || end_cjk_punct {
|
||||
if hyphen || start + shaped.text.len() > range.end || maybe_adjust_last_glyph {
|
||||
if hyphen || start < range.end || before.is_empty() {
|
||||
let mut reshaped = shaped.reshape(vt, &p.spans, start..range.end);
|
||||
if hyphen || shy {
|
||||
reshaped.push_hyphen(vt, TextElem::fallback_in(p.styles));
|
||||
}
|
||||
let punct = reshaped.glyphs.last();
|
||||
if let Some(punct) = punct {
|
||||
if punct.is_cjk_left_aligned_punctuation(gb_style) {
|
||||
let shrink_amount = punct.shrinkability().1;
|
||||
|
||||
if let Some(last_glyph) = reshaped.glyphs.last() {
|
||||
if last_glyph.is_cjk_left_aligned_punctuation(gb_style) {
|
||||
// If the last glyph is a CJK punctuation, we want to shrink it.
|
||||
// See Requirements for Chinese Text Layout, Section 3.1.6.3
|
||||
// Compression of punctuation marks at line start or line end
|
||||
let shrink_amount = last_glyph.shrinkability().1;
|
||||
let punct = reshaped.glyphs.to_mut().last_mut().unwrap();
|
||||
punct.shrink_right(shrink_amount);
|
||||
reshaped.width -= shrink_amount.at(reshaped.size);
|
||||
} else if cjk_latin_spacing
|
||||
&& last_glyph.is_cjk_script()
|
||||
&& (last_glyph.x_advance - last_glyph.x_offset) > Em::one()
|
||||
{
|
||||
// If the last glyph is a CJK character adjusted by [`add_cjk_latin_spacing`],
|
||||
// restore the original width.
|
||||
let shrink_amount =
|
||||
last_glyph.x_advance - last_glyph.x_offset - Em::one();
|
||||
let glyph = reshaped.glyphs.to_mut().last_mut().unwrap();
|
||||
glyph.x_advance -= shrink_amount;
|
||||
glyph.adjustability.shrinkability.1 = Em::zero();
|
||||
reshaped.width -= shrink_amount.at(reshaped.size);
|
||||
}
|
||||
}
|
||||
|
||||
width += reshaped.width;
|
||||
last = Some(Item::Text(reshaped));
|
||||
}
|
||||
@ -1291,9 +1365,10 @@ fn line<'a>(
|
||||
}
|
||||
}
|
||||
|
||||
// Deal with CJK punctuation at line starts.
|
||||
// Deal with CJK characters at line starts.
|
||||
let text = &p.bidi.text[range.start..end];
|
||||
let start_cjk_punct = text.starts_with(['“', '‘', '《', '(', '『', '「']);
|
||||
let maybe_adjust_first_glyph = text.starts_with(BEGIN_PUNCT_PAT)
|
||||
|| (cjk_latin_spacing && text.starts_with(char_is_cjk_script));
|
||||
|
||||
// Reshape the start item if it's split in half.
|
||||
let mut first = None;
|
||||
@ -1303,8 +1378,9 @@ fn line<'a>(
|
||||
let end = range.end.min(base + shaped.text.len());
|
||||
|
||||
// Reshape if necessary.
|
||||
if range.start + shaped.text.len() > end || start_cjk_punct {
|
||||
if range.start < end || start_cjk_punct {
|
||||
if range.start + shaped.text.len() > end || maybe_adjust_first_glyph {
|
||||
// If the range is empty, we don't want to push an empty text item.
|
||||
if range.start < end {
|
||||
let reshaped = shaped.reshape(vt, &p.spans, range.start..end);
|
||||
width += reshaped.width;
|
||||
first = Some(Item::Text(reshaped));
|
||||
@ -1314,14 +1390,29 @@ fn line<'a>(
|
||||
}
|
||||
}
|
||||
|
||||
if start_cjk_punct {
|
||||
if maybe_adjust_first_glyph {
|
||||
let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
|
||||
if let Some(reshaped) = reshaped {
|
||||
if let Some(punct) = reshaped.glyphs.first() {
|
||||
if punct.is_cjk_right_aligned_punctuation() {
|
||||
let shrink_amount = punct.shrinkability().0;
|
||||
let punct = reshaped.glyphs.to_mut().first_mut().unwrap();
|
||||
punct.shrink_left(shrink_amount);
|
||||
if let Some(first_glyph) = reshaped.glyphs.first() {
|
||||
if first_glyph.is_cjk_right_aligned_punctuation() {
|
||||
// If the first glyph is a CJK punctuation, we want to shrink it.
|
||||
let shrink_amount = first_glyph.shrinkability().0;
|
||||
let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
|
||||
glyph.shrink_left(shrink_amount);
|
||||
let amount_abs = shrink_amount.at(reshaped.size);
|
||||
reshaped.width -= amount_abs;
|
||||
width -= amount_abs;
|
||||
} else if cjk_latin_spacing
|
||||
&& first_glyph.is_cjk_script()
|
||||
&& first_glyph.x_offset > Em::zero()
|
||||
{
|
||||
// If the first glyph is a CJK character adjusted by [`add_cjk_latin_spacing`],
|
||||
// restore the original width.
|
||||
let shrink_amount = first_glyph.x_offset;
|
||||
let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
|
||||
glyph.x_advance -= shrink_amount;
|
||||
glyph.x_offset = Em::zero();
|
||||
glyph.adjustability.shrinkability.0 = Em::zero();
|
||||
let amount_abs = shrink_amount.at(reshaped.size);
|
||||
reshaped.width -= amount_abs;
|
||||
width -= amount_abs;
|
||||
|
@ -19,6 +19,7 @@ pub use self::shift::*;
|
||||
use rustybuzz::Tag;
|
||||
use ttf_parser::Rect;
|
||||
use typst::diag::{bail, error, SourceResult};
|
||||
use typst::eval::Never;
|
||||
use typst::font::{Font, FontStretch, FontStyle, FontWeight, VerticalFontMetric};
|
||||
|
||||
use crate::layout::ParElem;
|
||||
@ -224,6 +225,17 @@ pub struct TextElem {
|
||||
#[default(Rel::one())]
|
||||
pub spacing: Rel<Length>,
|
||||
|
||||
/// Whether to automatically insert spacing between CJK and Latin characters.
|
||||
///
|
||||
/// ```example
|
||||
/// #set text(cjk-latin-spacing: auto)
|
||||
/// 第4章介绍了基本的API。
|
||||
///
|
||||
/// #set text(cjk-latin-spacing: none)
|
||||
/// 第4章介绍了基本的API。
|
||||
/// ```
|
||||
pub cjk_latin_spacing: Smart<Option<Never>>,
|
||||
|
||||
/// An amount to shift the text baseline by.
|
||||
///
|
||||
/// ```example
|
||||
|
@ -100,9 +100,7 @@ impl ShapedGlyph {
|
||||
}
|
||||
|
||||
pub fn is_cjk_script(&self) -> bool {
|
||||
use Script::*;
|
||||
// U+30FC: Katakana-Hiragana Prolonged Sound Mark
|
||||
matches!(self.c.script(), Hiragana | Katakana | Han) || self.c == '\u{30FC}'
|
||||
char_is_cjk_script(self.c)
|
||||
}
|
||||
|
||||
pub fn is_cjk_punctuation(&self) -> bool {
|
||||
@ -151,6 +149,13 @@ impl ShapedGlyph {
|
||||
matches!(self.c, '\u{30FB}')
|
||||
}
|
||||
|
||||
/// Whether the glyph is a western letter or number.
|
||||
pub fn is_letter_or_number(&self) -> bool {
|
||||
matches!(self.c.script(), Script::Latin | Script::Greek | Script::Cyrillic)
|
||||
|| matches!(self.c, '#' | '$' | '%' | '&')
|
||||
|| self.c.is_ascii_digit()
|
||||
}
|
||||
|
||||
pub fn base_adjustability(&self, gb_style: bool) -> Adjustability {
|
||||
let width = self.x_advance;
|
||||
if self.is_space() {
|
||||
@ -958,6 +963,12 @@ fn language(styles: StyleChain) -> rustybuzz::Language {
|
||||
rustybuzz::Language::from_str(&bcp).unwrap()
|
||||
}
|
||||
|
||||
pub fn char_is_cjk_script(c: char) -> bool {
|
||||
use Script::*;
|
||||
// U+30FC: Katakana-Hiragana Prolonged Sound Mark
|
||||
matches!(c.script(), Hiragana | Katakana | Han) || c == '\u{30FC}'
|
||||
}
|
||||
|
||||
/// Returns true if all glyphs in `glyphs` have ranges within the range `range`.
|
||||
#[cfg(debug_assertions)]
|
||||
fn assert_all_glyphs_in_range(glyphs: &[ShapedGlyph], text: &str, range: Range<usize>) {
|
||||
|
BIN
tests/ref/layout/cjk-latin-spacing.png
Normal file
BIN
tests/ref/layout/cjk-latin-spacing.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.9 KiB |
Binary file not shown.
Before Width: | Height: | Size: 69 KiB After Width: | Height: | Size: 68 KiB |
Binary file not shown.
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 20 KiB |
16
tests/typ/layout/cjk-latin-spacing.typ
Normal file
16
tests/typ/layout/cjk-latin-spacing.typ
Normal file
@ -0,0 +1,16 @@
|
||||
// Test CJK-Latin spacing.
|
||||
|
||||
#set page(width: 50pt + 10pt, margin: (x: 5pt))
|
||||
#set text(lang: "zh", font: "Noto Serif CJK SC", cjk-latin-spacing: auto)
|
||||
#set par(justify: true)
|
||||
|
||||
中文,中12文1中,文12中文
|
||||
|
||||
中文,中ab文a中,文ab中文
|
||||
|
||||
#set text(cjk-latin-spacing: none)
|
||||
|
||||
中文,中12文1中,文12中文
|
||||
|
||||
中文,中ab文a中,文ab中文
|
||||
|
Loading…
x
Reference in New Issue
Block a user