Hyphenation

This commit is contained in:
Laurenz 2022-03-08 19:49:26 +01:00
parent d24c7030d8
commit b71113d37a
9 changed files with 128 additions and 14 deletions

7
Cargo.lock generated
View File

@ -240,6 +240,12 @@ version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
[[package]]
name = "hypher"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29349e08e99b98d0e16a0ca738d181d5c73431a9a46b78918318c4bc9b10106"
[[package]] [[package]]
name = "iai" name = "iai"
version = "0.1.1" version = "0.1.1"
@ -806,6 +812,7 @@ dependencies = [
"either", "either",
"flate2", "flate2",
"fxhash", "fxhash",
"hypher",
"iai", "iai",
"image", "image",
"kurbo", "kurbo",

View File

@ -22,6 +22,7 @@ serde = { version = "1", features = ["derive"] }
typed-arena = "2" typed-arena = "2"
# Text and font handling # Text and font handling
hypher = "0.1"
kurbo = "0.8" kurbo = "0.8"
ttf-parser = "0.12" ttf-parser = "0.12"
rustybuzz = "0.4" rustybuzz = "0.4"

View File

@ -1,3 +1,5 @@
//! Text shaping and paragraph layout.
mod deco; mod deco;
mod link; mod link;
mod par; mod par;

View File

@ -27,12 +27,17 @@ pub enum ParChild {
#[class] #[class]
impl ParNode { impl ParNode {
/// An ISO 639-1 language code.
pub const LANG: Option<EcoString> = None;
/// The direction for text and inline objects. /// The direction for text and inline objects.
pub const DIR: Dir = Dir::LTR; pub const DIR: Dir = Dir::LTR;
/// How to align text and inline objects in their line. /// How to align text and inline objects in their line.
pub const ALIGN: Align = Align::Left; pub const ALIGN: Align = Align::Left;
/// Whether to justify text in its line. /// Whether to justify text in its line.
pub const JUSTIFY: bool = false; pub const JUSTIFY: bool = false;
/// Whether to hyphenate text to improve line breaking. When `auto`, words
/// will will be hyphenated if and only if justification is enabled.
pub const HYPHENATE: Smart<bool> = Smart::Auto;
/// The spacing between lines (dependent on scaled font size). /// The spacing between lines (dependent on scaled font size).
pub const LEADING: Linear = Relative::new(0.65).into(); pub const LEADING: Linear = Relative::new(0.65).into();
/// The extra spacing between paragraphs (dependent on scaled font size). /// The extra spacing between paragraphs (dependent on scaled font size).
@ -49,11 +54,12 @@ impl ParNode {
} }
fn set(args: &mut Args, styles: &mut StyleMap) -> TypResult<()> { fn set(args: &mut Args, styles: &mut StyleMap) -> TypResult<()> {
let lang = args.named::<Option<EcoString>>("lang")?;
let mut dir = let mut dir =
args.named("lang")? lang.clone().flatten().map(|iso| match iso.to_lowercase().as_str() {
.map(|iso: EcoString| match iso.to_lowercase().as_str() { "ar" | "dv" | "fa" | "he" | "ks" | "pa" | "ps" | "sd" | "ug" | "ur"
"ar" | "he" | "fa" | "ur" | "ps" | "yi" => Dir::RTL, | "yi" => Dir::RTL,
"en" | "fr" | "de" => Dir::LTR,
_ => Dir::LTR, _ => Dir::LTR,
}); });
@ -74,9 +80,11 @@ impl ParNode {
dir.map(|dir| dir.start().into()) dir.map(|dir| dir.start().into())
}; };
styles.set_opt(Self::LANG, lang);
styles.set_opt(Self::DIR, dir); styles.set_opt(Self::DIR, dir);
styles.set_opt(Self::ALIGN, align); styles.set_opt(Self::ALIGN, align);
styles.set_opt(Self::JUSTIFY, args.named("justify")?); styles.set_opt(Self::JUSTIFY, args.named("justify")?);
styles.set_opt(Self::HYPHENATE, args.named("hyphenate")?);
styles.set_opt(Self::LEADING, args.named("leading")?); styles.set_opt(Self::LEADING, args.named("leading")?);
styles.set_opt(Self::SPACING, args.named("spacing")?); styles.set_opt(Self::SPACING, args.named("spacing")?);
styles.set_opt(Self::INDENT, args.named("indent")?); styles.set_opt(Self::INDENT, args.named("indent")?);
@ -137,7 +145,7 @@ impl Layout for ParNode {
let par = ParLayout::new(ctx, self, bidi, regions, &styles)?; let par = ParLayout::new(ctx, self, bidi, regions, &styles)?;
// Break the paragraph into lines. // Break the paragraph into lines.
let lines = break_into_lines(&mut ctx.fonts, &par, regions.first.x); let lines = break_into_lines(&mut ctx.fonts, &par, regions.first.x, styles);
// Stack the lines into one frame per region. // Stack the lines into one frame per region.
Ok(stack_lines(&ctx.fonts, lines, regions, styles)) Ok(stack_lines(&ctx.fonts, lines, regions, styles))
@ -278,6 +286,7 @@ impl<'a> ParLayout<'a> {
fonts: &mut FontStore, fonts: &mut FontStore,
mut range: Range, mut range: Range,
mandatory: bool, mandatory: bool,
hyphen: bool,
) -> LineLayout<'a> { ) -> LineLayout<'a> {
// Find the items which bound the text range. // Find the items which bound the text range.
let last_idx = self.find(range.end.saturating_sub(1)).unwrap(); let last_idx = self.find(range.end.saturating_sub(1)).unwrap();
@ -308,7 +317,10 @@ impl<'a> ParLayout<'a> {
// empty string. // empty string.
if !shifted.is_empty() || rest.is_empty() { if !shifted.is_empty() || rest.is_empty() {
// Reshape that part. // Reshape that part.
let reshaped = shaped.reshape(fonts, shifted); let mut reshaped = shaped.reshape(fonts, shifted);
if hyphen {
reshaped.push_hyphen(fonts);
}
last = Some(ParItem::Text(reshaped)); last = Some(ParItem::Text(reshaped));
} }
@ -524,6 +536,7 @@ fn break_into_lines<'a>(
fonts: &mut FontStore, fonts: &mut FontStore,
par: &'a ParLayout<'a>, par: &'a ParLayout<'a>,
width: Length, width: Length,
styles: StyleChain,
) -> Vec<LineLayout<'a>> { ) -> Vec<LineLayout<'a>> {
// The already determined lines and the current line attempt. // The already determined lines and the current line attempt.
let mut lines = vec![]; let mut lines = vec![];
@ -531,9 +544,9 @@ fn break_into_lines<'a>(
let mut last = None; let mut last = None;
// Find suitable line breaks. // Find suitable line breaks.
for (end, mandatory) in LineBreakIterator::new(&par.bidi.text) { for (end, mandatory, hyphen) in breakpoints(&par.bidi.text, styles) {
// Compute the line and its size. // Compute the line and its size.
let mut line = par.line(fonts, start .. end, mandatory); let mut line = par.line(fonts, start .. end, mandatory, hyphen);
// If the line doesn't fit anymore, we push the last fitting attempt // If the line doesn't fit anymore, we push the last fitting attempt
// into the stack and rebuild the line from its end. The resulting // into the stack and rebuild the line from its end. The resulting
@ -542,7 +555,7 @@ fn break_into_lines<'a>(
if let Some((last_line, last_end)) = last.take() { if let Some((last_line, last_end)) = last.take() {
lines.push(last_line); lines.push(last_line);
start = last_end; start = last_end;
line = par.line(fonts, start .. end, mandatory); line = par.line(fonts, start .. end, mandatory, hyphen);
} }
} }
@ -565,6 +578,47 @@ fn break_into_lines<'a>(
lines lines
} }
/// Determine all possible points in the text where lines can broken.
fn breakpoints<'a>(
text: &'a str,
styles: StyleChain,
) -> impl Iterator<Item = (usize, bool, bool)> + 'a {
let mut lang = None;
if styles.get(ParNode::HYPHENATE).unwrap_or(styles.get(ParNode::JUSTIFY)) {
lang = styles
.get_ref(ParNode::LANG)
.as_ref()
.and_then(|iso| iso.as_bytes().try_into().ok())
.and_then(hypher::Lang::from_iso);
}
let breaks = LineBreakIterator::new(text);
let mut last = 0;
if let Some(lang) = lang {
Either::Left(breaks.flat_map(move |(end, mandatory)| {
let word = &text[last .. end];
let trimmed = word.trim_end_matches(|c: char| !c.is_alphabetic());
let suffix = last + trimmed.len();
let mut start = std::mem::replace(&mut last, end);
if trimmed.is_empty() {
Either::Left([(end, mandatory, false)].into_iter())
} else {
Either::Right(hypher::hyphenate(trimmed, lang).map(move |syllable| {
start += syllable.len();
if start == suffix {
start = end;
}
let hyphen = start < end;
(start, mandatory && !hyphen, hyphen)
}))
}
}))
} else {
Either::Right(breaks.map(|(e, m)| (e, m, false)))
}
}
/// Combine the lines into one frame per region. /// Combine the lines into one frame per region.
fn stack_lines( fn stack_lines(
fonts: &FontStore, fonts: &FontStore,

View File

@ -135,6 +135,34 @@ impl<'a> ShapedText<'a> {
} }
} }
/// Push a hyphen to end of the text.
pub fn push_hyphen(&mut self, fonts: &mut FontStore) {
// When there are no glyphs, we just use the vertical metrics of the
// first available font.
let size = self.styles.get(TextNode::SIZE).abs;
let variant = variant(self.styles);
families(self.styles).find_map(|family| {
// Allow hyphens to overhang a bit.
const INSET: f64 = 0.4;
let face_id = fonts.select(family, variant)?;
let face = fonts.get(face_id);
let ttf = face.ttf();
let glyph_id = ttf.glyph_index('-')?;
let x_advance = face.to_em(ttf.glyph_hor_advance(glyph_id)?);
self.size.x += INSET * x_advance.resolve(size);
self.glyphs.to_mut().push(ShapedGlyph {
face_id,
glyph_id: glyph_id.0,
x_advance,
x_offset: Em::zero(),
text_index: self.text.len(),
safe_to_break: true,
is_space: false,
});
Some(())
});
}
/// Find the subslice of glyphs that represent the given text range if both /// Find the subslice of glyphs that represent the given text range if both
/// sides are safe to break. /// sides are safe to break.
fn slice_safe_to_break(&self, text_range: Range<usize>) -> Option<&[ShapedGlyph]> { fn slice_safe_to_break(&self, text_range: Range<usize>) -> Option<&[ShapedGlyph]> {
@ -531,8 +559,9 @@ fn measure(
if glyphs.is_empty() { if glyphs.is_empty() {
// When there are no glyphs, we just use the vertical metrics of the // When there are no glyphs, we just use the vertical metrics of the
// first available font. // first available font.
let variant = variant(styles);
for family in families(styles) { for family in families(styles) {
if let Some(face_id) = fonts.select(family, variant(styles)) { if let Some(face_id) = fonts.select(family, variant) {
expand(fonts.get(face_id)); expand(fonts.get(face_id));
break; break;
} }

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

View File

@ -0,0 +1,14 @@
// Test hyphenation.
---
#set page(width: 70pt)
#set par(lang: "en", hyphenate: true)
Warm welcomes to Typst.
#h(6pt) networks, the rest.
---
#set page(width: 60pt)
#set par(lang: "el", hyphenate: true)
διαμερίσματα. \
λατρευτός

View File

@ -1,9 +1,16 @@
--- ---
#set par(indent: 14pt, spacing: 0pt, leading: 5pt, justify: true) #set page(width: 180pt)
#set par(
lang: "en",
justify: true,
indent: 14pt,
spacing: 0pt,
leading: 5pt,
)
This text is justified, meaning that spaces are stretched so that the text This text is justified, meaning that spaces are stretched so that the text
forms as "block" with flush edges at both sides. forms a "block" with flush edges at both sides.
First line indents and hyphenation play nicely with justified text. First line indents and hyphenation play nicely with justified text.