From 97de0a0595d28e29d944112ab6e06700d9c9d73d Mon Sep 17 00:00:00 2001 From: Matt Fellenz Date: Tue, 30 Apr 2024 05:18:19 -0700 Subject: [PATCH] Various text layout config improvements (#3787) --- crates/typst/src/layout/inline/mod.rs | 43 ++++++--- crates/typst/src/text/mod.rs | 123 +++++++++++++++++++++++- tests/ref/costs-hyphenation-avoid.png | Bin 0 -> 1642 bytes tests/ref/costs-runt-allow.png | Bin 0 -> 607 bytes tests/ref/costs-runt-avoid.png | Bin 0 -> 1265 bytes tests/ref/costs-widow-orphan.png | Bin 0 -> 2676 bytes tests/suite/layout/inline/hyphenate.typ | 56 +++++++++++ 7 files changed, 201 insertions(+), 21 deletions(-) create mode 100644 tests/ref/costs-hyphenation-avoid.png create mode 100644 tests/ref/costs-runt-allow.png create mode 100644 tests/ref/costs-runt-avoid.png create mode 100644 tests/ref/costs-widow-orphan.png diff --git a/crates/typst/src/layout/inline/mod.rs b/crates/typst/src/layout/inline/mod.rs index fd1e60db9..77408c7a2 100644 --- a/crates/typst/src/layout/inline/mod.rs +++ b/crates/typst/src/layout/inline/mod.rs @@ -117,6 +117,7 @@ struct Preparation<'a> { spans: SpanMapper, /// Whether to hyphenate if it's the same for all children. hyphenate: Option, + costs: crate::text::Costs, /// The text language if it's the same for all children. lang: Option, /// The paragraph's resolved horizontal alignment. @@ -630,11 +631,14 @@ fn prepare<'a>( add_cjk_latin_spacing(&mut items); } + let costs = TextElem::costs_in(styles); + Ok(Preparation { bidi, items, spans, hyphenate: shared_get(styles, children, TextElem::hyphenate_in), + costs, lang: shared_get(styles, children, TextElem::lang_in), align: AlignElem::alignment_in(styles).resolve(styles).x, justify: ParElem::justify_in(styles), @@ -876,12 +880,15 @@ fn linebreak_optimized<'a>( } // Cost parameters. - const HYPH_COST: Cost = 0.5; - const RUNT_COST: Cost = 0.5; + const DEFAULT_HYPH_COST: Cost = 0.5; + const DEFAULT_RUNT_COST: Cost = 0.5; const CONSECUTIVE_DASH_COST: Cost = 0.3; const MAX_COST: Cost = 1_000_000.0; const MIN_RATIO: f64 = -1.0; + let hyph_cost = DEFAULT_HYPH_COST * p.costs.hyphenation().get(); + let runt_cost = DEFAULT_RUNT_COST * p.costs.runt().get(); + // Dynamic programming table. let mut active = 0; let mut table = vec![Entry { @@ -965,12 +972,12 @@ fn linebreak_optimized<'a>( // Penalize runts. if k == i + 1 && is_end { - cost += RUNT_COST; + cost += runt_cost; } // Penalize hyphens. if breakpoint == Breakpoint::Hyphen { - cost += HYPH_COST; + cost += hyph_cost; } // In Knuth paper, cost = (1 + 100|r|^3 + p)^2 + a, @@ -1212,19 +1219,23 @@ fn finalize( .map(|line| commit(engine, p, line, width, region.y, shrink)) .collect::>()?; - // Prevent orphans. - if frames.len() >= 2 && !frames[1].is_empty() { - let second = frames.remove(1); - let first = &mut frames[0]; - merge(first, second, p.leading); + // Positive ratios enable prevention, while zero and negative ratios disable it. + if p.costs.orphan().get() > 0.0 { + // Prevent orphans. + if frames.len() >= 2 && !frames[1].is_empty() { + let second = frames.remove(1); + let first = &mut frames[0]; + merge(first, second, p.leading); + } } - - // Prevent widows. - let len = frames.len(); - if len >= 2 && !frames[len - 2].is_empty() { - let second = frames.pop().unwrap(); - let first = frames.last_mut().unwrap(); - merge(first, second, p.leading); + if p.costs.widow().get() > 0.0 { + // Prevent widows. + let len = frames.len(); + if len >= 2 && !frames[len - 2].is_empty() { + let second = frames.pop().unwrap(); + let first = frames.last_mut().unwrap(); + merge(first, second, p.leading); + } } Ok(Fragment::frames(frames)) diff --git a/crates/typst/src/text/mod.rs b/crates/typst/src/text/mod.rs index ef621ee56..0b8818e80 100644 --- a/crates/typst/src/text/mod.rs +++ b/crates/typst/src/text/mod.rs @@ -37,13 +37,12 @@ use ttf_parser::Rect; use crate::diag::{bail, warning, SourceResult, StrResult}; use crate::engine::Engine; -use crate::foundations::Packed; use crate::foundations::{ - cast, category, elem, Args, Array, Cast, Category, Construct, Content, Dict, Fold, - NativeElement, Never, PlainText, Repr, Resolve, Scope, Set, Smart, StyleChain, + cast, category, dict, elem, Args, Array, Cast, Category, Construct, Content, Dict, + Fold, NativeElement, Never, Packed, PlainText, Repr, Resolve, Scope, Set, Smart, + StyleChain, }; -use crate::layout::Em; -use crate::layout::{Abs, Axis, Dir, Length, Rel}; +use crate::layout::{Abs, Axis, Dir, Em, Length, Ratio, Rel}; use crate::model::ParElem; use crate::syntax::Spanned; use crate::visualize::{Color, Paint, RelativeTo, Stroke}; @@ -482,6 +481,52 @@ pub struct TextElem { #[ghost] pub hyphenate: Hyphenate, + /// The "cost" of various choices when laying out text. A higher cost means + /// the layout engine will make the choice less often. Costs are specified + /// as a ratio of the default cost, so `50%` will make text layout twice as + /// eager to make a given choice, while `200%` will make it half as eager. + /// + /// Currently, the following costs can be customized: + /// - `hyphenation`: splitting a word across multiple lines + /// - `runt`: ending a paragraph with a line with a single word + /// - `widow`: leaving a single line of paragraph on the next page + /// - `orphan`: leaving single line of paragraph on the previous page + /// + /// Hyphenation is generally avoided by placing the whole word on the next + /// line, so a higher hyphenation cost can result in awkward justification + /// spacing. + /// + /// Runts are avoided by placing more or fewer words on previous lines, so a + /// higher runt cost can result in more awkward in justification spacing. + /// + /// Text layout prevents widows and orphans by default because they are + /// generally discouraged by style guides. However, in some contexts they + /// are allowed because the prevention method, which moves a line to the + /// next page, can result in an uneven number of lines between pages. + /// The `widow` and `orphan` costs allow disabling these modifications. + /// (Currently, 0% allows widows/orphans; anything else, including the + /// default of `auto`, prevents them. More nuanced cost specification for + /// these modifications is planned for the future.) + /// + /// The default costs are an acceptable balance, but some may find that it + /// hyphenates or avoids runs too eagerly, breaking the flow of dense prose. + /// A cost of 600% (six times the normal cost) may work better for such + /// contexts. + /// + /// ```example + /// #set text(hyphenate: true, size: 11.4pt) + /// #set par(justify: true) + /// + /// #lorem(10) + /// + /// // Set hyphenation to ten times the normal cost. + /// #set text(costs: (hyphenation: 1000%)) + /// + /// #lorem(10) + /// ``` + #[fold] + pub costs: Costs, + /// Whether to apply kerning. /// /// When enabled, specific letter pairings move closer together or further @@ -1184,3 +1229,71 @@ impl Fold for WeightDelta { Self(outer.0 + self.0) } } + +/// Costs that are updated (prioritizing the later value) when folded. +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash)] +#[non_exhaustive] // We may add more costs in the future. +pub struct Costs { + pub hyphenation: Option, + pub runt: Option, + pub widow: Option, + pub orphan: Option, +} + +impl Costs { + #[inline] + #[must_use] + pub fn hyphenation(&self) -> Ratio { + self.hyphenation.unwrap_or(Ratio::one()) + } + + #[inline] + #[must_use] + pub fn runt(&self) -> Ratio { + self.runt.unwrap_or(Ratio::one()) + } + + #[inline] + #[must_use] + pub fn widow(&self) -> Ratio { + self.widow.unwrap_or(Ratio::one()) + } + + #[inline] + #[must_use] + pub fn orphan(&self) -> Ratio { + self.orphan.unwrap_or(Ratio::one()) + } +} + +impl Fold for Costs { + #[inline] + fn fold(self, outer: Self) -> Self { + Self { + hyphenation: self.hyphenation.or(outer.hyphenation), + runt: self.runt.or(outer.runt), + widow: self.widow.or(outer.widow), + orphan: self.orphan.or(outer.orphan), + } + } +} + +cast! { + Costs, + self => dict![ + "hyphenation" => self.hyphenation(), + "runt" => self.runt(), + "widow" => self.widow(), + "orphan" => self.orphan(), + ].into_value(), + mut v: Dict => { + let ret = Self { + hyphenation: v.take("hyphenation").ok().map(|v| v.cast()).transpose()?, + runt: v.take("runt").ok().map(|v| v.cast()).transpose()?, + widow: v.take("widow").ok().map(|v| v.cast()).transpose()?, + orphan: v.take("orphan").ok().map(|v| v.cast()).transpose()?, + }; + v.finish(&["hyphenation", "runt", "widow", "orphan"])?; + ret + }, +} diff --git a/tests/ref/costs-hyphenation-avoid.png b/tests/ref/costs-hyphenation-avoid.png new file mode 100644 index 0000000000000000000000000000000000000000..8efaef63cc25d190f630bc0f088811e03e685024 GIT binary patch literal 1642 zcmV-w29^1VP)?)Mku^4MciVKM*3<>NwYFAF6|7Y#cz`Zdpnx2TU_lt}=@v>s1|07Dkjf!g z0R;gOIprvFRW8S2067H)hk==SZ<;j$LJ1j2WYf(13B2+<^8MrU7Ms*tI#>tm;6E$e z_5!VItKxN!8(C6owHEM$5SBIK#G2)w0zfmtbyt>@b4CmJaSzK=vSvLN0K~*l@f}O* zv|I!Jk5!LpNw-x`O`5*~O1|uFwt1PC845s}v(oBD&^a{`NfCXAZ(UF&#OG3&oyc^! zUyCwMe3yn`+yB9qmJcf~h|MM3$qdo}be@hq+g@wfGHCtukKw6*VVAE+mhZ1Pn*kh@ zv{g2rD%bA@7&Icp7qh1sKatGgyE7u;{cXf!sL1!4l=zB@wkwa)_4eX8sL%<5hxQ~6+sl`!Ad2Oi6wYBZ2Z6+o_k0#U9qsTyGP2;rGnw>sYhzI?%{ z?14dUq<$iubNy`SQ0c&MhY>xC@D_pSh1ajT#EY6USu^vQnUX((n|I2-4rcr> zBdQ@lwfiQOdJ8Y*0M8HbLn3GD6U|gz81xh9j8tmO!Dp#zah!`E(&?=;MFuK>_{iB( z7vh3j|1mc^3Gl?6+m~T)$m%~=0(Bkm;9|l&K;|5zx#t0pe6hhRaKtrA6Y5^qb}?v|CV>tQ8$%25K z3$0Y;W|8wjTC15x9hH@9sUj(&xaj@l4Z$^^Yf?PfHXt#=sO3j5 z#n!PhC&Dq744ck>hF~RqGzarlQunj{}d72tQ#aM-de4j?$wGmi9U8F2O`Q(;Gd&BuiS&n&E zl(oz?B~Z-yxVlq$%D~tpV!s&>I!wa2jc*&VQDKUvz|eMVqh7dg0Dk=KCLIvhot?T{ zt%G&24%P}FY`^z2y2z^47shCWM^ zeg*pL9M~4SZ;D5>$b5qIgn$XN%oAj!$54?UiY7d?kmS@9`h-5qnXC^p(Er({kf(|M zBiasZi|wMo0?#}3c4W>wM$8o3m;HS@9_#K7^ZC|{@5-dIVW|ncJ=K4ledQj$ zpxwZ>m>z|G1m#2u0hcPgg9oG(Cz+NJ{LeOr*nxrhcVQ*0$Hd|3zsjW3pxwZ>*t_Ij z#Xr4j&#PXQT?tu63@bmvuB2e1^h%TkGPXVIc}KyL1`TJ~$>4srb}*pjz_wV?g(~6I zajyVUT#BVvg!72K?jlM^326)w$<#mjRPfmO_T?s)=8{?B(3$J}Tf39^rS5X+;5Q2o z{>rjCrEduS`8Z2kI3Lf7G)iicVD;wK0Y5ioS>j|?bomkU)2yENy>-E9_FYZ?t{=Tr zYW~nI6LDCWqmp0JZ_4fO3k4{$uS;GYxZ3u8BJHAZ>4;^G;2JL*n{KAR;so3b_zr)4eJ%;ce zw$1FFhgwtMZ(v!|vDbTj@U!Do*uy76Fp+R_G%_By!W2`cRDxWLfAc8sJBIT^O@X^& z`<&_!=jzdy!|;}wS7TqaadIr1FNQpdHT|>N!iiQxVYVX(8^9zm_Q9;X*R(@`+RyXW zgk#{qx!5Z2Jge?9`woG_NNr$Puhd4|Wsw8qeiMsKATC3gc#TK4t)k|29*{V|`1zZ^ oT&C5~^^QPyt97sr*1>D`Uy0?6lw5>7oB#j-07*qoM6N<$f`pqn9smFU literal 0 HcmV?d00001 diff --git a/tests/ref/costs-runt-allow.png b/tests/ref/costs-runt-allow.png new file mode 100644 index 0000000000000000000000000000000000000000..31a348ff6f471f649916d5a7ed2e93b6dd10efce GIT binary patch literal 607 zcmV-l0-*hgP)bz3Fpi)(dgaiXN22&^@5LzBeN>D|Bk`SIrz&vZb3noIHlu0E} zrMpL#@40(T&fqR^Pk{gnumB72{|x@>o6$1P$kY*8Gq1>4h%9P7+WKem-0QF(&63Fz zvTRn#NSMs27N_APnf8)Rvp~jG@*>it6OOJr=NuYoGUX$yW|^DF*hIc501N_7%7U!; z09ywAjI+GSW$`LmC z1FXKPp>TU|yC0ER8@6Hh5q$m=)si)VdQd~A+am(YKN^~_lG6bKFY72>KhB}Pt|2k# z03oyyR2pdI#ufr@XWw>j8_Uj3#8wONF2J!57V}1S_u%vQDOM71cMpE|(a%q@H)6R2 zSbzmsfd3NgHsa|czyd750{rh{{==+r52Wfl zlG#I>S7wc)mD5KxGJQxE^)5T+x3!%okICH0c&nHz(lNl$TDn=PLm3;85CCiqaK4No zch4Yue6Fm!Q&RmB%0dMgTw6KwcTff~NKhfICLGg-+{$hhbKSYa_v*vkb#nEf>MC}o z(_=p~87?)u?wYtc?%=vSoFXK(-6>7FMX_;j&&>J5O3_`NPxZTK002ovPDHLkV1mgS4nF_@ literal 0 HcmV?d00001 diff --git a/tests/ref/costs-runt-avoid.png b/tests/ref/costs-runt-avoid.png new file mode 100644 index 0000000000000000000000000000000000000000..e45de59ef9f12e9eb2bdfa1482a7d0198c05e3bc GIT binary patch literal 1265 zcmV<8co?>jQsRr9>t+WMx@d0n|#(5w|&Ob6STTA%7N4K^DoY4O}jFh)0Oan~^_ww(!ib475X`kDM*yM5oe&yaN z&jZlC<&sySKdMZFQ$=7b0W$4SQfMrtOE40`2XaD<-gQ{PhJM+)Bzv-Xd?n2{Yc8~|JkM}{_XC7kVAKBqX6 zOQHP+(0QXw!(kNe1Y%rezU7a%?vcmjBdhR(L-cMyio^Y`m!xMeXK3G+(G@LSCNQVu zuSSrzR`U_KH(rdbcbPrvU2TkRqBH{64kY{qPbfFMgopbHs${ci7T!jk4~KO`OIobM za>`=(nEi2d{-&5-A!HD&-H|~jWG9Y)1`)2{*og1` z7*X8FQFi|l(BOZ%AwJn?6|UdEL>EY~L#udKi|``B6TE49%6_m>#+&x&?FZ+t&PJP) z^-5m&FsMxu_0(=~myBkw$I`Bv-8sFuJ;R=`+PCR}nIjR{6*f8v*ETUeVaRoEV`rE+B!Xhlfb|2U*nQt4gA=?Hv zTMOs}vquTm>tlZ1O0Z#Ivn?|~muw(|0V2R2Jxg|R&iTWg-lJ8z~&8I7_w&Lm6c z;Lv!Fa<}g4U8qvkyG#uqo*ln%J`qdj{3bjLqPvO21^4=kAu`|pT8Q4zS z@q*)bqk>JUa3Ma?6q;6xrL#xtL!x{9(!_yH{7Kjy9;jQQfvxdVfSfzlCbx-29oVD* zBW7kxC}6IsuNHM+`|_Hi>b0_rytD$C+A-pBWQzp(mENv3WxodUqkvSxkE2^aGG^g9k`J=rRExAWPXY9|FqzNleLhU>YC zQP@*xz=pJ{Q0gzqXvV7x=n{T70;5v3Jn~uvb+yc4v}eGunlHEhpj@muZb!E;VQRT zq?{a9e1PIuqv}Yb7VFA5t4l3A3z5ksp7ENH1mr6lb@rWqx1Yfxb!LD|*hqNXc|^?JhEOX6TH~-KR#u8G89{ zKed6Iam9|)KDoHdd}*!7pND3iz$Tawd&@?D-bqqC-p?wXwg%^x>g!_sU+~Bx{#g z<18uy9OUjxA^;e0a=*t_nFmQf#Y(bB3pve`K*u4x{i5<`_;wctISt#bjSg|(RLoP2 z1k)+QS@)kvusFl4r)65o)sM~G^A7`n(QnAL^Z%Lu(#O0{lF2LKXSC zLhaC0GpaY^@5IJybyJ3Wy#3U%iA9yhO;1(wa++Vj-#3F@0KaRXM*TKYzLOO#&dX`l zOaWBMU6z;A_8e~5LdYd{)G%e(Dz1JV7B`tcQnRlAgBIEzYbtx{fju&w-a1B{19Pcb zPx#RH9UpekcG`%isL=dzH*M*L%C4ncv|?k~)Zr}4sKXB=X9_hEuNvx(a84Qwa%J_I(1^~4;g!_V> z3#qoVUciZf?A?(+#>$V}#>*=Jne0sK?wpjv5l0fp4VC^<1Ox=hp2H7UW33(oK(3$u zNR6*u4Z$wUN8vWx^Pkdr&O3B+_+eg&4(mr)$mL7SYVZGs(_!5!2l7&l2~z50^I3bz zF4RW?y?`*|rc{fBb4Wq`wE~M8c zg)QUD*D|Z3@{4}ovwVkq12#eTN=o*`FB>RR9!_I6)oZY*lmqzgqCu&5hp&{WhJE!_ zfQ)U`^X^g`w?S!S2dqhK|EOIO$+5~w;R3sOlad^R8>V-yEW76Vx!1SQrx8q;!){TjVpjjp!gU4j683z^tKzpGK>m*4t`T09 z;)6h`<~xAH17f=M@1+{y?n6?@xoV9Erq=@vtNG2txsxIXol)5P-(kcAhxY0wT06%Y zXO(JX7+t5=$t#%)etN~zgClGTzQin!`D7nuO;08pVtZ$FhdZ`xCR<~0E00FuqJ!L( z6wK|iJk3|t<(e$b9+)TBE?`AX;?kzU>A||rxw)K*N0nS{5>!IiUWY32r|#&&?1F6+ z{{h=sCs5zNL8XfO|8>{2Y#}OYtoZ)^;Gb_DntL^ z@&i{A_ZFX#bVWN{Y$i~0Wp6nQYXvK8%_BP{_C*?Zt{$jd>FAzKo76KJEA6t#=)Z1& ztmt|_lPmvY2QjM!EMNf(Sik}nuz&?DU;ztQEH%YD7O;Q?ESj34sVSP8qNypGngSM0 zP0`d8O-<3%6irP5zg2jsoHtB;Hxs6%1pr0K)ZpZANsR@7h-_ZOvx4zy`&SC4pk!Qz zcKHC1_wkg^BRkswpsAM^sU4c|JhGfD^FMtasfhM{Mz_n^h1kd3m!_KK%cYsz@{S`} z7TRj|B<)Q4$$aV}f3+_Hl7iBn!w>bx&LeArY!tmAZarkWsE@)!9u6}Z@$tJqIh>hZ zXu{t1fra=u@p)P8_gIB>ZztiAHrC2RS;|jJ9_(YM+&d6Rv1y#6Fv;MXu`5MMaRPfQ z{7;r1$6*vUC}AEyrz=-IofIAj@>N%S&DeS5qS5n6v*o21Twgkml+-w60w&Ug1jWne zk>sd4^5LC?!e3qS7=`D68mgUGgiQ+D$2PCyqmJHhxa`0JSi$Kl`Oog{ySANHrA{XE zs8z=u>-Ml06H$A{o zW~Ip3JjCluI7R#4 z0qh?T)UB14a)3*YTn~{}tEU?I_-L0fy5}VO{E-(WoU6g^y_yl$(y-%+rRs6|GTP30 zMq=lnRWLm`&^-Bb<;i$dc%aRy#ymEq$p0}Ib)jJk`CI$9^7xM~Ow!ThIqgzMzHX_~ zU|E4;evjr8)|98L=u=D&KIT_fH&(*QQv3Dg63(1QTgFN_w)SIEpB(3 zI~GZj!cSDLyyRI-?|IL3&YG6RCH-{+WO>JXD&fp^6th~u0v51<1uS3z3s}Gc7O;Q? iEMNf(Sil1Q>+>Ixi#XI#-2&eL0000