diff --git a/Cargo.lock b/Cargo.lock index 4a6d4f0e8..b4fd7b72d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -240,6 +240,12 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +[[package]] +name = "hypher" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29349e08e99b98d0e16a0ca738d181d5c73431a9a46b78918318c4bc9b10106" + [[package]] name = "iai" version = "0.1.1" @@ -806,6 +812,7 @@ dependencies = [ "either", "flate2", "fxhash", + "hypher", "iai", "image", "kurbo", diff --git a/Cargo.toml b/Cargo.toml index 3971b9714..ed265b1a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ serde = { version = "1", features = ["derive"] } typed-arena = "2" # Text and font handling +hypher = "0.1" kurbo = "0.8" ttf-parser = "0.12" rustybuzz = "0.4" diff --git a/src/library/text/mod.rs b/src/library/text/mod.rs index 0df59007b..1ce3518ce 100644 --- a/src/library/text/mod.rs +++ b/src/library/text/mod.rs @@ -1,3 +1,5 @@ +//! Text shaping and paragraph layout. + mod deco; mod link; mod par; diff --git a/src/library/text/par.rs b/src/library/text/par.rs index 812231c2e..70cac1be5 100644 --- a/src/library/text/par.rs +++ b/src/library/text/par.rs @@ -27,12 +27,17 @@ pub enum ParChild { #[class] impl ParNode { + /// An ISO 639-1 language code. + pub const LANG: Option = None; /// The direction for text and inline objects. pub const DIR: Dir = Dir::LTR; /// How to align text and inline objects in their line. pub const ALIGN: Align = Align::Left; /// Whether to justify text in its line. pub const JUSTIFY: bool = false; + /// Whether to hyphenate text to improve line breaking. When `auto`, words + /// will will be hyphenated if and only if justification is enabled. + pub const HYPHENATE: Smart = Smart::Auto; /// The spacing between lines (dependent on scaled font size). pub const LEADING: Linear = Relative::new(0.65).into(); /// The extra spacing between paragraphs (dependent on scaled font size). @@ -49,13 +54,14 @@ impl ParNode { } fn set(args: &mut Args, styles: &mut StyleMap) -> TypResult<()> { + let lang = args.named::>("lang")?; + let mut dir = - args.named("lang")? - .map(|iso: EcoString| match iso.to_lowercase().as_str() { - "ar" | "he" | "fa" | "ur" | "ps" | "yi" => Dir::RTL, - "en" | "fr" | "de" => Dir::LTR, - _ => Dir::LTR, - }); + lang.clone().flatten().map(|iso| match iso.to_lowercase().as_str() { + "ar" | "dv" | "fa" | "he" | "ks" | "pa" | "ps" | "sd" | "ug" | "ur" + | "yi" => Dir::RTL, + _ => Dir::LTR, + }); if let Some(Spanned { v, span }) = args.named::>("dir")? { if v.axis() != SpecAxis::Horizontal { @@ -74,9 +80,11 @@ impl ParNode { dir.map(|dir| dir.start().into()) }; + styles.set_opt(Self::LANG, lang); styles.set_opt(Self::DIR, dir); styles.set_opt(Self::ALIGN, align); styles.set_opt(Self::JUSTIFY, args.named("justify")?); + styles.set_opt(Self::HYPHENATE, args.named("hyphenate")?); styles.set_opt(Self::LEADING, args.named("leading")?); styles.set_opt(Self::SPACING, args.named("spacing")?); styles.set_opt(Self::INDENT, args.named("indent")?); @@ -137,7 +145,7 @@ impl Layout for ParNode { let par = ParLayout::new(ctx, self, bidi, regions, &styles)?; // Break the paragraph into lines. - let lines = break_into_lines(&mut ctx.fonts, &par, regions.first.x); + let lines = break_into_lines(&mut ctx.fonts, &par, regions.first.x, styles); // Stack the lines into one frame per region. Ok(stack_lines(&ctx.fonts, lines, regions, styles)) @@ -278,6 +286,7 @@ impl<'a> ParLayout<'a> { fonts: &mut FontStore, mut range: Range, mandatory: bool, + hyphen: bool, ) -> LineLayout<'a> { // Find the items which bound the text range. let last_idx = self.find(range.end.saturating_sub(1)).unwrap(); @@ -308,7 +317,10 @@ impl<'a> ParLayout<'a> { // empty string. if !shifted.is_empty() || rest.is_empty() { // Reshape that part. - let reshaped = shaped.reshape(fonts, shifted); + let mut reshaped = shaped.reshape(fonts, shifted); + if hyphen { + reshaped.push_hyphen(fonts); + } last = Some(ParItem::Text(reshaped)); } @@ -524,6 +536,7 @@ fn break_into_lines<'a>( fonts: &mut FontStore, par: &'a ParLayout<'a>, width: Length, + styles: StyleChain, ) -> Vec> { // The already determined lines and the current line attempt. let mut lines = vec![]; @@ -531,9 +544,9 @@ fn break_into_lines<'a>( let mut last = None; // Find suitable line breaks. - for (end, mandatory) in LineBreakIterator::new(&par.bidi.text) { + for (end, mandatory, hyphen) in breakpoints(&par.bidi.text, styles) { // Compute the line and its size. - let mut line = par.line(fonts, start .. end, mandatory); + let mut line = par.line(fonts, start .. end, mandatory, hyphen); // If the line doesn't fit anymore, we push the last fitting attempt // into the stack and rebuild the line from its end. The resulting @@ -542,7 +555,7 @@ fn break_into_lines<'a>( if let Some((last_line, last_end)) = last.take() { lines.push(last_line); start = last_end; - line = par.line(fonts, start .. end, mandatory); + line = par.line(fonts, start .. end, mandatory, hyphen); } } @@ -565,6 +578,47 @@ fn break_into_lines<'a>( lines } +/// Determine all possible points in the text where lines can broken. +fn breakpoints<'a>( + text: &'a str, + styles: StyleChain, +) -> impl Iterator + 'a { + let mut lang = None; + if styles.get(ParNode::HYPHENATE).unwrap_or(styles.get(ParNode::JUSTIFY)) { + lang = styles + .get_ref(ParNode::LANG) + .as_ref() + .and_then(|iso| iso.as_bytes().try_into().ok()) + .and_then(hypher::Lang::from_iso); + } + + let breaks = LineBreakIterator::new(text); + let mut last = 0; + + if let Some(lang) = lang { + Either::Left(breaks.flat_map(move |(end, mandatory)| { + let word = &text[last .. end]; + let trimmed = word.trim_end_matches(|c: char| !c.is_alphabetic()); + let suffix = last + trimmed.len(); + let mut start = std::mem::replace(&mut last, end); + if trimmed.is_empty() { + Either::Left([(end, mandatory, false)].into_iter()) + } else { + Either::Right(hypher::hyphenate(trimmed, lang).map(move |syllable| { + start += syllable.len(); + if start == suffix { + start = end; + } + let hyphen = start < end; + (start, mandatory && !hyphen, hyphen) + })) + } + })) + } else { + Either::Right(breaks.map(|(e, m)| (e, m, false))) + } +} + /// Combine the lines into one frame per region. fn stack_lines( fonts: &FontStore, diff --git a/src/library/text/shaping.rs b/src/library/text/shaping.rs index 26c8daf35..b467abf70 100644 --- a/src/library/text/shaping.rs +++ b/src/library/text/shaping.rs @@ -135,6 +135,34 @@ impl<'a> ShapedText<'a> { } } + /// Push a hyphen to end of the text. + pub fn push_hyphen(&mut self, fonts: &mut FontStore) { + // When there are no glyphs, we just use the vertical metrics of the + // first available font. + let size = self.styles.get(TextNode::SIZE).abs; + let variant = variant(self.styles); + families(self.styles).find_map(|family| { + // Allow hyphens to overhang a bit. + const INSET: f64 = 0.4; + let face_id = fonts.select(family, variant)?; + let face = fonts.get(face_id); + let ttf = face.ttf(); + let glyph_id = ttf.glyph_index('-')?; + let x_advance = face.to_em(ttf.glyph_hor_advance(glyph_id)?); + self.size.x += INSET * x_advance.resolve(size); + self.glyphs.to_mut().push(ShapedGlyph { + face_id, + glyph_id: glyph_id.0, + x_advance, + x_offset: Em::zero(), + text_index: self.text.len(), + safe_to_break: true, + is_space: false, + }); + Some(()) + }); + } + /// Find the subslice of glyphs that represent the given text range if both /// sides are safe to break. fn slice_safe_to_break(&self, text_range: Range) -> Option<&[ShapedGlyph]> { @@ -531,8 +559,9 @@ fn measure( if glyphs.is_empty() { // When there are no glyphs, we just use the vertical metrics of the // first available font. + let variant = variant(styles); for family in families(styles) { - if let Some(face_id) = fonts.select(family, variant(styles)) { + if let Some(face_id) = fonts.select(family, variant) { expand(fonts.get(face_id)); break; } diff --git a/tests/ref/text/hyphenate.png b/tests/ref/text/hyphenate.png new file mode 100644 index 000000000..050cab12b Binary files /dev/null and b/tests/ref/text/hyphenate.png differ diff --git a/tests/ref/text/justify.png b/tests/ref/text/justify.png index 26787af9f..38141bdc8 100644 Binary files a/tests/ref/text/justify.png and b/tests/ref/text/justify.png differ diff --git a/tests/typ/text/hyphenate.typ b/tests/typ/text/hyphenate.typ new file mode 100644 index 000000000..d6f44477b --- /dev/null +++ b/tests/typ/text/hyphenate.typ @@ -0,0 +1,14 @@ +// Test hyphenation. + +--- +#set page(width: 70pt) +#set par(lang: "en", hyphenate: true) +Warm welcomes to Typst. + +#h(6pt) networks, the rest. + +--- +#set page(width: 60pt) +#set par(lang: "el", hyphenate: true) +διαμερίσματα. \ +λατρευτός diff --git a/tests/typ/text/justify.typ b/tests/typ/text/justify.typ index 7b8a28299..eb8feb61b 100644 --- a/tests/typ/text/justify.typ +++ b/tests/typ/text/justify.typ @@ -1,9 +1,16 @@ --- -#set par(indent: 14pt, spacing: 0pt, leading: 5pt, justify: true) +#set page(width: 180pt) +#set par( + lang: "en", + justify: true, + indent: 14pt, + spacing: 0pt, + leading: 5pt, +) This text is justified, meaning that spaces are stretched so that the text -forms as "block" with flush edges at both sides. +forms a "block" with flush edges at both sides. First line indents and hyphenation play nicely with justified text.