diff --git a/library/src/layout/par.rs b/library/src/layout/par.rs index 0ad9e1718..17e07cd0f 100644 --- a/library/src/layout/par.rs +++ b/library/src/layout/par.rs @@ -457,22 +457,35 @@ impl<'a> Line<'a> { self.items().skip(start).take(end - start) } - /// How many justifiable glyphs the line contains. + /// How many glyphs are in the text where we can insert additional + /// space when encountering underfull lines. fn justifiables(&self) -> usize { let mut count = 0; for shaped in self.items().filter_map(Item::text) { count += shaped.justifiables(); } + // CJK character at line end should not be adjusted. + if self + .items() + .last() + .and_then(Item::text) + .map(|s| s.cjk_justifiable_at_last()) + .unwrap_or(false) + { + count -= 1; + } + count } - /// How much of the line is stretchable spaces. - fn stretch(&self) -> Abs { - let mut stretch = Abs::zero(); - for shaped in self.items().filter_map(Item::text) { - stretch += shaped.stretch(); - } - stretch + /// How much can the line stretch + fn stretchability(&self) -> Abs { + self.items().filter_map(Item::text).map(|s| s.stretchability()).sum() + } + + /// How much can the line shrink + fn shrinkability(&self) -> Abs { + self.items().filter_map(Item::text).map(|s| s.shrinkability()).sum() } /// The sum of fractions in the line. @@ -835,10 +848,9 @@ fn linebreak_optimized<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec= Abs::zero() { + attempt.stretchability() + } else { + attempt.shrinkability() + }; + // Ideally, the ratio should between -1.0 and 1.0, but sometimes a value above 1.0 + // is possible, in which case the line is underfull. + let mut ratio = delta / adjust; + if ratio.is_nan() { + // The line is not stretchable, but it just fits. + // This often happens with monospace fonts and CJK texts. + ratio = 0.0; + } + if ratio.is_infinite() { + // The line's not stretchable, we calculate the ratio in another way... + ratio = delta / (em / 2.0); + // ...and because it is underfull/overfull, make sure the ratio is at least 1.0. + if ratio > 0.0 { + ratio += 1.0; + } else { + ratio -= 1.0; + } } - - // At some point, it doesn't matter any more. - ratio = ratio.min(10.0); // Determine the cost of the line. let min_ratio = if attempt.justify { MIN_RATIO } else { 0.0 }; @@ -883,11 +912,15 @@ fn linebreak_optimized<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec Abs::zero() { + // Attempt to reduce the length of the line, using shrinkability. + justification_ratio = (remaining / shrink).max(-1.0); + remaining = (remaining + shrink).min(Abs::zero()); + } else if line.justify && fr.is_zero() { + // Attempt to increase the length of the line, using stretchability. + if stretch > Abs::zero() { + justification_ratio = (remaining / stretch).min(1.0); + remaining = (remaining - stretch).max(Abs::zero()); + } + let justifiables = line.justifiables(); - if justifiables > 0 { - justification = remaining / justifiables as f64; + if justifiables > 0 && remaining > Abs::zero() { + // Underfull line, distribute the extra space. + extra_justification = remaining / justifiables as f64; remaining = Abs::zero(); } } @@ -1275,7 +1333,7 @@ fn commit( } } Item::Text(shaped) => { - let frame = shaped.build(vt, justification); + let frame = shaped.build(vt, justification_ratio, extra_justification); push(&mut offset, frame); } Item::Frame(frame) => { diff --git a/library/src/text/shaping.rs b/library/src/text/shaping.rs index 2dd0cd6d6..0e5e0a73a 100644 --- a/library/src/text/shaping.rs +++ b/library/src/text/shaping.rs @@ -70,22 +70,42 @@ impl ShapedGlyph { } /// Whether the glyph is justifiable. - /// - /// Typst's basic justification strategy is to stretch all the spaces - /// in a line until the line fills the available width. However, some - /// scripts (notably Chinese and Japanese) don't use spaces. - /// - /// In Japanese typography, the convention is to insert space evenly - /// between all glyphs. I assume it's the same in Chinese. pub fn is_justifiable(&self) -> bool { - self.is_space() || is_spaceless(self.c.script()) + self.is_space() || self.is_cjk() || self.is_cjk_punctuation() } -} -/// Does this script separate its words using spaces? -fn is_spaceless(script: Script) -> bool { - use Script::*; - matches!(script, Hiragana | Katakana | Han) + pub fn is_cjk(&self) -> bool { + use Script::*; + matches!(self.c.script(), Hiragana | Katakana | Han) + } + + pub fn is_cjk_punctuation(&self) -> bool { + matches!(self.c, ',' | '。' | '、' | ':' | ';') + } + + /// The stretchability of the character. + pub fn stretchability(&self) -> Em { + let width = self.x_advance; + if self.is_space() { + // The number for spaces is from Knuth-Plass' paper + width / 2.0 + } else { + Em::zero() + } + } + + /// The shrinkability of the character. + pub fn shrinkability(&self) -> Em { + let width = self.x_advance; + if self.is_space() { + // The number for spaces is from Knuth-Plass' paper + width / 3.0 + } else if self.is_cjk_punctuation() { + width / 2.0 + } else { + Em::zero() + } + } } /// A side you can go toward. @@ -101,7 +121,12 @@ impl<'a> ShapedText<'a> { /// /// The `justification` defines how much extra advance width each /// [justifiable glyph](ShapedGlyph::is_justifiable) will get. - pub fn build(&self, vt: &Vt, justification: Abs) -> Frame { + pub fn build( + &self, + vt: &Vt, + justification_ratio: f64, + extra_justification: Abs, + ) -> Frame { let (top, bottom) = self.measure(vt); let size = Size::new(self.width, top + bottom); @@ -120,19 +145,25 @@ impl<'a> ShapedText<'a> { let pos = Point::new(offset, top + shift - y_offset.at(self.size)); let glyphs = group .iter() - .map(|glyph| Glyph { - id: glyph.glyph_id, - x_advance: glyph.x_advance - + if glyph.is_justifiable() { - frame.size_mut().x += justification; - Em::from_length(justification, self.size) - } else { - Em::zero() - }, - x_offset: glyph.x_offset, - c: glyph.c, - span: glyph.span, - offset: glyph.offset, + .map(|glyph| { + let mut justification = Em::zero(); + if justification_ratio < 0.0 { + justification += glyph.shrinkability() * justification_ratio + } else { + justification += glyph.stretchability() * justification_ratio + } + if glyph.is_justifiable() { + justification += Em::from_length(extra_justification, self.size) + } + frame.size_mut().x += justification.at(self.size); + Glyph { + id: glyph.glyph_id, + x_advance: glyph.x_advance + justification, + x_offset: glyph.x_offset, + c: glyph.c, + span: glyph.span, + offset: glyph.offset, + } }) .collect(); @@ -200,17 +231,35 @@ impl<'a> ShapedText<'a> { (top, bottom) } - /// How many justifiable glyphs the text contains. + /// How many glyphs are in the text where we can insert additional + /// space when encountering underfull lines. pub fn justifiables(&self) -> usize { self.glyphs.iter().filter(|g| g.is_justifiable()).count() } - /// The width of the spaces in the text. - pub fn stretch(&self) -> Abs { + /// Whether the last glyph is a CJK character which should not be justified + /// on line end. + pub fn cjk_justifiable_at_last(&self) -> bool { + self.glyphs + .last() + .map(|g| g.is_cjk() || g.is_cjk_punctuation()) + .unwrap_or(false) + } + + /// The stretchability of the text. + pub fn stretchability(&self) -> Abs { self.glyphs .iter() - .filter(|g| g.is_justifiable()) - .map(|g| g.x_advance) + .map(|g| g.stretchability()) + .sum::() + .at(self.size) + } + + /// The shrinkability of the text + pub fn shrinkability(&self) -> Abs { + self.glyphs + .iter() + .map(|g| g.shrinkability()) .sum::() .at(self.size) } diff --git a/tests/ref/layout/par-justify-cjk.png b/tests/ref/layout/par-justify-cjk.png new file mode 100644 index 000000000..ee58318a9 Binary files /dev/null and b/tests/ref/layout/par-justify-cjk.png differ diff --git a/tests/ref/layout/par-justify.png b/tests/ref/layout/par-justify.png index 8a1578fea..a16c3f7b1 100644 Binary files a/tests/ref/layout/par-justify.png and b/tests/ref/layout/par-justify.png differ diff --git a/tests/ref/layout/par-knuth.png b/tests/ref/layout/par-knuth.png index afe590b0c..f3da17531 100644 Binary files a/tests/ref/layout/par-knuth.png and b/tests/ref/layout/par-knuth.png differ diff --git a/tests/ref/text/linebreak.png b/tests/ref/text/linebreak.png index 64d0d403d..c127182c9 100644 Binary files a/tests/ref/text/linebreak.png and b/tests/ref/text/linebreak.png differ diff --git a/tests/typ/layout/par-justify-cjk.typ b/tests/typ/layout/par-justify-cjk.typ new file mode 100644 index 000000000..1016b282b --- /dev/null +++ b/tests/typ/layout/par-justify-cjk.typ @@ -0,0 +1,23 @@ +// Test Chinese text in narrow lines. + +// In Chinese typography, line length should be multiples of the character size +// and the line ends should be aligned with each other. +// Most Chinese publications do not use hanging punctuation at line end. +#set page(width: auto) +#set par(justify: true) +#set text(overhang: false, lang: "zh") + +#rect(inset: 0pt, width: 80pt, fill: rgb("eee"))[ + 中文维基百科使用汉字书写,汉字是汉族或华人的共同文字,是中国大陆、新加坡、马来西亚、台湾、香港、澳门的唯一官方文字或官方文字之一。25.9%,而美国和荷兰则分別占13.7%及8.2%。近年來,中国大陆地区的维基百科编辑者正在迅速增加; +] + +--- +// Japanese typography is more complex, make sure it is at least a bit sensible. +#set page(width: auto) +#set par(justify: true) +#set text(lang: "jp") +#rect(inset: 0pt, width: 80pt, fill: rgb("eee"))[ + ウィキペディア(英: Wikipedia)は、世界中のボランティアの共同作業によって執筆及び作成されるフリーの多言語インターネット百科事典である。主に寄付に依って活動している非営利団体「ウィキメディア財団」が所有・運営している。 + + 専門家によるオンライン百科事典プロジェクトNupedia(ヌーペディア)を前身として、2001年1月、ラリー・サンガーとジミー・ウェールズ(英: Jimmy Donal "Jimbo" Wales)により英語でプロジェクトが開始された。 +] \ No newline at end of file diff --git a/tests/typ/layout/par-justify.typ b/tests/typ/layout/par-justify.typ index 24d3ab38d..7034a42b3 100644 --- a/tests/typ/layout/par-justify.typ +++ b/tests/typ/layout/par-justify.typ @@ -20,12 +20,6 @@ D A B C #linebreak(justify: true) D E F #linebreak(justify: true) ---- -// Test that justificating chinese text is at least a bit sensible. -#set page(width: 200pt) -#set par(justify: true) -中文维基百科使用汉字书写,汉字是汉族或华人的共同文字,是中国大陆、新加坡、马来西亚、台湾、香港、澳门的唯一官方文字或官方文字之一。25.9%,而美国和荷兰则分別占13.7%及8.2%。近年來,中国大陆地区的维基百科编辑者正在迅速增加; - --- // Test that there are no hick-ups with justification enabled and // basically empty paragraph.