From 017f2f45666abdc4a1c6a253f0e04a271b5a82c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20Ara=C3=BAjo?= Date: Wed, 15 May 2024 10:42:13 -0300 Subject: [PATCH] Fix hyphen duplication rule for some languages (#4058) --- crates/typst/src/layout/inline/mod.rs | 111 ++++++++++++++++-- crates/typst/src/layout/inline/shaping.rs | 35 ++++-- crates/typst/src/text/lang.rs | 3 + tests/ref/hyphenate-es-captalized-names.png | Bin 0 -> 4238 bytes tests/ref/hyphenate-es-repeat-hyphen.png | Bin 0 -> 3224 bytes tests/ref/hyphenate-pt-dash-emphasis.png | Bin 0 -> 983 bytes tests/ref/hyphenate-pt-no-repeat-hyphen.png | Bin 0 -> 1533 bytes ...at-hyphen-hyphenate-true-with-emphasis.png | Bin 0 -> 1350 bytes ...henate-pt-repeat-hyphen-hyphenate-true.png | Bin 0 -> 1341 bytes ...pt-repeat-hyphen-natural-word-breaking.png | Bin 0 -> 1341 bytes tests/suite/layout/inline/hyphenate.typ | 52 ++++++++ 11 files changed, 179 insertions(+), 22 deletions(-) create mode 100644 tests/ref/hyphenate-es-captalized-names.png create mode 100644 tests/ref/hyphenate-es-repeat-hyphen.png create mode 100644 tests/ref/hyphenate-pt-dash-emphasis.png create mode 100644 tests/ref/hyphenate-pt-no-repeat-hyphen.png create mode 100644 tests/ref/hyphenate-pt-repeat-hyphen-hyphenate-true-with-emphasis.png create mode 100644 tests/ref/hyphenate-pt-repeat-hyphen-hyphenate-true.png create mode 100644 tests/ref/hyphenate-pt-repeat-hyphen-natural-word-breaking.png diff --git a/crates/typst/src/layout/inline/mod.rs b/crates/typst/src/layout/inline/mod.rs index 0b73eef61..f8b17f463 100644 --- a/crates/typst/src/layout/inline/mod.rs +++ b/crates/typst/src/layout/inline/mod.rs @@ -298,6 +298,19 @@ impl SpanMapper { } } +/// A dash at the end of a line. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub(super) enum Dash { + /// A hyphen added to break a word. + SoftHyphen, + /// Regular hyphen, present in a compound word, e.g. beija-flor. + HardHyphen, + /// An em dash. + Long, + /// An en dash. + Short, +} + /// A layouted line, consisting of a sequence of layouted paragraph items that /// are mostly borrowed from the preparation phase. This type enables you to /// measure the size of a line in a range before committing to building the @@ -327,7 +340,7 @@ struct Line<'a> { justify: bool, /// Whether the line ends with a hyphen or dash, either naturally or through /// hyphenation. - dash: bool, + dash: Option, } impl<'a> Line<'a> { @@ -814,8 +827,10 @@ fn linebreak_simple<'a>( let mut last = None; breakpoints(p, |end, breakpoint| { + let prepend_hyphen = lines.last().map(should_repeat_hyphen).unwrap_or(false); + // Compute the line and its size. - let mut attempt = line(engine, p, start..end, breakpoint); + let mut attempt = line(engine, p, start..end, breakpoint, prepend_hyphen); // If the line doesn't fit anymore, we push the last fitting attempt // into the stack and rebuild the line from the attempt's end. The @@ -824,7 +839,7 @@ fn linebreak_simple<'a>( if let Some((last_attempt, last_end)) = last.take() { lines.push(last_attempt); start = last_end; - attempt = line(engine, p, start..end, breakpoint); + attempt = line(engine, p, start..end, breakpoint, prepend_hyphen); } } @@ -894,7 +909,7 @@ fn linebreak_optimized<'a>( let mut table = vec![Entry { pred: 0, total: 0.0, - line: line(engine, p, 0..0, Breakpoint::Mandatory), + line: line(engine, p, 0..0, Breakpoint::Mandatory, false), }]; let em = p.size; @@ -908,8 +923,9 @@ fn linebreak_optimized<'a>( for (i, pred) in table.iter().enumerate().skip(active) { // Layout the line. let start = pred.line.end; + let prepend_hyphen = should_repeat_hyphen(&pred.line); - let attempt = line(engine, p, start..end, breakpoint); + let attempt = line(engine, p, start..end, breakpoint, prepend_hyphen); // Determine how much the line's spaces would need to be stretched // to make it the desired width. @@ -987,7 +1003,7 @@ fn linebreak_optimized<'a>( cost = (0.01 + cost).powi(2); // Penalize two consecutive dashes (not necessarily hyphens) extra. - if attempt.dash && pred.line.dash { + if attempt.dash.is_some() && pred.line.dash.is_some() { cost += CONSECUTIVE_DASH_COST; } @@ -1022,6 +1038,7 @@ fn line<'a>( p: &'a Preparation, mut range: Range, breakpoint: Breakpoint, + prepend_hyphen: bool, ) -> Line<'a> { let end = range.end; let mut justify = @@ -1037,7 +1054,7 @@ fn line<'a>( last: None, width: Abs::zero(), justify, - dash: false, + dash: None, }; } @@ -1047,7 +1064,7 @@ fn line<'a>( // Reshape the last item if it's split in half or hyphenated. let mut last = None; - let mut dash = false; + let mut dash = None; if let Some((Item::Text(shaped), before)) = inner.split_last() { // Compute the range we want to shape, trimming whitespace at the // end of the line. @@ -1062,7 +1079,17 @@ fn line<'a>( // Deal with hyphens, dashes and justification. let shy = trimmed.ends_with('\u{ad}'); let hyphen = breakpoint == Breakpoint::Hyphen; - dash = hyphen || shy || trimmed.ends_with(['-', '–', '—']); + dash = if hyphen || shy { + Some(Dash::SoftHyphen) + } else if trimmed.ends_with('-') { + Some(Dash::HardHyphen) + } else if trimmed.ends_with('–') { + Some(Dash::Short) + } else if trimmed.ends_with('—') { + Some(Dash::Long) + } else { + None + }; justify |= text.ends_with('\u{2028}'); // Deal with CJK punctuation at line ends. @@ -1079,7 +1106,11 @@ fn line<'a>( // need the shaped empty string to make the line the appropriate // height. That is the case exactly if the string is empty and there // are no other items in the line. - if hyphen || start + shaped.text.len() > range.end || maybe_adjust_last_glyph { + if hyphen + || start + shaped.text.len() > range.end + || maybe_adjust_last_glyph + || prepend_hyphen + { if hyphen || start < range.end || before.is_empty() { let mut reshaped = shaped.reshape(engine, &p.spans, start..range.end); if hyphen || shy { @@ -1131,7 +1162,10 @@ fn line<'a>( let end = range.end.min(base + shaped.text.len()); // Reshape if necessary. - if range.start + shaped.text.len() > end || maybe_adjust_first_glyph { + if range.start + shaped.text.len() > end + || maybe_adjust_first_glyph + || prepend_hyphen + { // If the range is empty, we don't want to push an empty text item. if range.start < end { let reshaped = shaped.reshape(engine, &p.spans, range.start..end); @@ -1143,6 +1177,15 @@ fn line<'a>( } } + if prepend_hyphen { + let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut); + if let Some(reshaped) = reshaped { + let width_before = reshaped.width; + reshaped.prepend_hyphen(engine, p.fallback); + width += reshaped.width - width_before; + } + } + if maybe_adjust_first_glyph { let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut); if let Some(reshaped) = reshaped { @@ -1446,3 +1489,49 @@ fn overhang(c: char) -> f64 { _ => 0.0, } } + +/// Whether the hyphen should repeat at the start of the next line. +fn should_repeat_hyphen(pred_line: &Line) -> bool { + // If the predecessor line does not end with a Dash::HardHyphen, we shall + // not place a hyphen at the start of the next line. + if pred_line.dash != Some(Dash::HardHyphen) { + return false; + } + + // If there's a trimmed out space, we needn't repeat the hyphen. That's the + // case of a text like "...kebab é a -melhor- comida que existe", where the + // hyphens are a kind of emphasis marker. + if pred_line.trimmed.end != pred_line.end { + return false; + } + + // The hyphen should repeat only in the languages that require that feature. + // For more information see the discussion at https://github.com/typst/typst/issues/3235 + let Some(Item::Text(shape)) = pred_line.last.as_ref() else { return false }; + + match shape.lang { + // - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3 + // - Czech: see https://prirucka.ujc.cas.cz/?id=164 + // - Croatian: see http://pravopis.hr/pravilo/spojnica/68/ + // - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni + // - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX) + // - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/ + Lang::LOWER_SORBIAN + | Lang::CZECH + | Lang::CROATIAN + | Lang::POLISH + | Lang::PORTUGUESE + | Lang::SLOVAK => true, + // In Spanish the hyphen is required only if the word next to hyphen is + // not capitalized. Otherwise, the hyphen must not be repeated. + // + // See § 4.1.1.1.2.e on the "Ortografía de la lengua española" + // https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea + Lang::SPANISH => pred_line.bidi.text[pred_line.end..] + .chars() + .next() + .map(|c| !c.is_uppercase()) + .unwrap_or(false), + _ => false, + } +} diff --git a/crates/typst/src/layout/inline/shaping.rs b/crates/typst/src/layout/inline/shaping.rs index ff13f7766..57b94230c 100644 --- a/crates/typst/src/layout/inline/shaping.rs +++ b/crates/typst/src/layout/inline/shaping.rs @@ -447,6 +447,15 @@ impl<'a> ShapedText<'a> { /// Push a hyphen to end of the text. pub fn push_hyphen(&mut self, engine: &Engine, fallback: bool) { + self.insert_hyphen(engine, fallback, Side::Right) + } + + /// Prepend a hyphen to start of the text. + pub fn prepend_hyphen(&mut self, engine: &Engine, fallback: bool) { + self.insert_hyphen(engine, fallback, Side::Left) + } + + fn insert_hyphen(&mut self, engine: &Engine, fallback: bool, side: Side) { let world = engine.world; let book = world.book(); let fallback_func = if fallback { @@ -464,17 +473,17 @@ impl<'a> ShapedText<'a> { let ttf = font.ttf(); let glyph_id = ttf.glyph_index('-')?; let x_advance = font.to_em(ttf.glyph_hor_advance(glyph_id)?); - let range = self - .glyphs - .last() - .map(|g| g.range.end..g.range.end) - // In the unlikely chance that we hyphenate after an empty line, - // ensure that the glyph range still falls after self.base so - // that subtracting either of the endpoints by self.base doesn't - // underflow. See . - .unwrap_or_else(|| self.base..self.base); + let range = match side { + Side::Left => self.glyphs.first().map(|g| g.range.start..g.range.start), + Side::Right => self.glyphs.last().map(|g| g.range.end..g.range.end), + } + // In the unlikely chance that we hyphenate after an empty line, + // ensure that the glyph range still falls after self.base so + // that subtracting either of the endpoints by self.base doesn't + // underflow. See . + .unwrap_or_else(|| self.base..self.base); self.width += x_advance.at(self.size); - self.glyphs.to_mut().push(ShapedGlyph { + let glyph = ShapedGlyph { font, glyph_id: glyph_id.0, x_advance, @@ -487,7 +496,11 @@ impl<'a> ShapedText<'a> { span: (Span::detached(), 0), is_justifiable: false, script: Script::Common, - }); + }; + match side { + Side::Left => self.glyphs.to_mut().insert(0, glyph), + Side::Right => self.glyphs.to_mut().push(glyph), + } Some(()) }); } diff --git a/crates/typst/src/text/lang.rs b/crates/typst/src/text/lang.rs index 67df0c6e0..6809238aa 100644 --- a/crates/typst/src/text/lang.rs +++ b/crates/typst/src/text/lang.rs @@ -57,6 +57,7 @@ impl Lang { pub const BOKMÅL: Self = Self(*b"nb ", 2); pub const CATALAN: Self = Self(*b"ca ", 2); pub const CHINESE: Self = Self(*b"zh ", 2); + pub const CROATIAN: Self = Self(*b"hr ", 2); pub const CZECH: Self = Self(*b"cs ", 2); pub const DANISH: Self = Self(*b"da ", 2); pub const DUTCH: Self = Self(*b"nl ", 2); @@ -70,12 +71,14 @@ impl Lang { pub const HUNGARIAN: Self = Self(*b"hu ", 2); pub const ITALIAN: Self = Self(*b"it ", 2); pub const JAPANESE: Self = Self(*b"ja ", 2); + pub const LOWER_SORBIAN: Self = Self(*b"dsb", 3); pub const NYNORSK: Self = Self(*b"nn ", 2); pub const POLISH: Self = Self(*b"pl ", 2); pub const PORTUGUESE: Self = Self(*b"pt ", 2); pub const ROMANIAN: Self = Self(*b"ro ", 2); pub const RUSSIAN: Self = Self(*b"ru ", 2); pub const SERBIAN: Self = Self(*b"sr ", 2); + pub const SLOVAK: Self = Self(*b"sk ", 2); pub const SLOVENIAN: Self = Self(*b"sl ", 2); pub const SPANISH: Self = Self(*b"es ", 2); pub const SWEDISH: Self = Self(*b"sv ", 2); diff --git a/tests/ref/hyphenate-es-captalized-names.png b/tests/ref/hyphenate-es-captalized-names.png new file mode 100644 index 0000000000000000000000000000000000000000..803d6795873eda161c8f54f9b1c424f1cee576a1 GIT binary patch literal 4238 zcmV;95OME`P)snxY000nGNkll2o40r}S z1D*lTfZuy?)jeLVEt#TH0OUE3;d$M+Kvh8n(`+=Y?>B+9%(c}oYw&8(yWmB`G5{K9 zbNwQg)u1pC!t=U51XWkfd40WbPV^F3y&P4svs{dXs5(4075EMzm$=KTHR-^MHn#(S z8kv`o0l0*(KTjv6$Li1r2 zI3(1c zJ>B;7mCOog-1_*JzZ=p?usn& zJA3nwjn{!}`A_H(S~SPnNWc#-4%CyS9mVxYXena7N(nG!^(%p@SQ4VM0rK zzXd*(qQ-*qwq6xDJY;D%JXsFRlW;yU^D+)m2w-3R1(!AvrdfR7c_0_P%0Aa%O@KKZ zO`_rm&JnC0A=-lGh)CI+!G1=N^&`Wp(JqM^kFkdF589L+)mwnzZFDGul#AHXG`@tB zseD4B5l8MOYbj4z*RG-b0mzuQst^*|h{&ZZVmx8V+@jTaqnJ-oB zs1R*e#0urX@DCfMI2 z-`S-PcyP*h!pHD2C|xe!!7M0|6l1Z6e~4|yv7GD1<>&0NzZz*Jx$W*D0a}lTTBQ$M zu&W38HItqz5v~zH(J^1;n4NbH*G>8^TPPiz>kU1Xu9vfVja^#KgpOZu^znXlE#0Ud z;*Nz;=8oHqNUP$L;jyj(&NE9l1D*lD=U@e|)|o5gACo;C=Jho+@mm`xoCbKnCCct9 zt(Ne-hv#`Ekx_Cg|5)F$Y5?SuyvY9naLOK3^_aqoCYZ8EbEgK6Xbr3l!m(~ zeLd`>VBq_ckbnYcn84S<+FNXg3>z|*);YA+xMoKCK@~(J(3i8wMt(@gQRPE{_0>*B-w*fLA>vGq?38O;7yy5||?e#Aha`MWjKKQ)+ zP2f|(@F=#ZZWksteB4@hNU^T!tk2`GMuoP6o2vGvrm~>}ut?=l?` zFdMANGr%IYf3q?l*fqpLbeff4_ZpPm=S^UnGa&i8s7V{MGz7G7X0j`7hwVl$VK6%>_m?ZOs zqItvUpZgF$(IWn0^M~Wgff{X+fJ?V<^OG-VxoV6UFm^=afz=tr?0T&Hqyn+>3u?Dx z8k?hzqgnH}^ymO$P0nUb{Dgqg4p2F zNY@$niveS2Ewh3CLv|Jp*mvQq^t$C(!8NBw;I4U`UK6A^TX%8ghV!{cW-g^b!6Bh;cGnfJOsoSPf0yB? zz4Fhsbe=B#Gt)c+{@({rH1kdyZ)Vy}KKTt`zb(AeJa>Ysf_Pq=lIQ(2kKE+91-I_t z9kinlR5j{=s^K19+ZqMWQ)8&_t-(9vF0{}b7C>MKl2~VGF(@@nms=EwjC;ro$)hOTqh+p}Xu!j#8E9PN6Q)ok`*+|FOSPB( z7@!RFFHLpCHIcl~fQDKAJ>4F@9gMhIFXToNg;9AxrBR_tArddYO6(Gi+kOQ+zbERl zWJ~w)q_WTJ#OJRnXV+BeV#U^p;aROoXIUk0UCH{%bf9DtF&)7A#F6O zBL0hW5FlO(3r9W!HKH828~Wuipaz?2KLNem6f<};xOpqN2BQCzGFGJxrpYW&wr(8U z$o}dHH69+4&6TlAo;9!tAnxnxXW&-$bPAnNudzA9gd%fbA?x~>H;9Jx8Q41JMslyl z&)}AC7~2T!3!;awh&w%0(7lw!3dN@qR`ECS*jz$wo*ZD+5TK?+b7E~B``Y47e+}%n zqtiyG$4}QsY_siA|3UL=jV$4oCSBoKmnhA8=UwgDmmMFX8{`2X*Wel0(l?QwBaX+d zM~_^Mdv?*)u%pD-EgNV)9O6&*xv1^+vG#=8S~ml2+s#X`HFMT#I_e*Y=Vwua@y&z`eg93VVC+dfzF*Bdd9B7i-{o)f0ey%yb{&2vi^A)x33HK~sW9SJR!8 z1lfKCKu6+ja=a1V?pboJK^`I$;hAUGVAD*6`=%HQ!bA}1`Z1&i_K*{h3%KRm4z@<5 zN*u}@V+l)_#6~b0ARHUL9W~i%-fo@4$nEG!ii9ZukRDt{gMXZ|&brB} z6reUD2k&WtO*vE7cR(HwT;gcUs3VXQU$ zSZW`LeGFNV3z(-sjm3UpX&l+o3WPh6x;{L#!#qw3Jm0$?=!q8VE| z5HJ#Ee2q)%>HyfA53wc;2v7q7;HwiJIyepR(08nwbc(1PjMoEJDy$M%K%Or9Y7lgx z-4S&i^oo7YU>TZXjeunp%vG#QL($Nar2_~ZC|Cjt`zTyv>vnS^5CLfym`}xl$v=)T ztlfV5D&LEk&yKDn%dRICYmoqkjtm!Vr)LL?6wC)=wt>>!e@bw`*A@CcrYG{w*`@-z zeisZUubJ9?)8ehySbD-URC;u0Tg&<@qgq!^?khSNxwhyhO_z1y6`SntK056f2jjZC zJ2|KC*KfZs^adQfep*MO9$+{2(-&D5>Q5sy9J_DWIPET68Cbd?>zb{@o+>>rzpj-) z*M^JfSh|=umkLtud{qqc!)F6~4;&i*zZwqvXr|k6#Cx!GlHDgHOfwnp1Gv6kI&+rz z{{)sZMRKN}{I%cZOql}Y(*s){0Py8`QNz5P-v*{#?%&~gRsW_ymHv6As`i6^w~f;S zKhgzA1Av2&Gr`MsYVq3iCwR5RuX~S`af#P_=q)EQBWa;E~lPJ{i&8*!GctwxQMwr~RRRun!Tu-KZbpZ^0 zm{q)#(`buX%=jYDgo-V=xTwHq*xTJ^FdY3Ru<XF)+x_|B zK7m%yy_Qa`;g7?-?*r-s>@145CjSu1IV_V8&I$7_D7>!R#?dQ6S5D=D8cY2H!vVDk z)cDRTFhm9J2wT?LzRXSz2;f`MV8pg81fbc4XgeY9D)4aRuG_M8XGh)ywsR%z+RtF? z5CnWy2P}kbPr0=}7o%u`{hoCK=F8Wo0gz!b_`OoQxm0#6L_;T;ZSE9sSogKp7mp; zro2Uhcd&Gs?kP;&;M_aB+9j{%_)?}{@1Lvx=U~|c(@2zG$g3&J%A~wSjrpZBho2en zy94*D|HR8lQ>8nCNR;x5emC}^H%ra<-?S#S@@$U$3FzDNKui1EX8Bp|T#woprYU6pU z@DTQY(Oq_8ct`)|;>?%?C+RMuqPY_#hND}m^Bjw_e@C!M8bkivhF0Sq%=%tK zXJB2wUr5I`F8>S`nS>E%S2waR{P(;i7Ta##-=TJ0sDRK98ux3oes1Ia!GjH3OaJcH zxX3)B>aU(ohJN#lzcmoecJAC3adODo$FR^h_;&-Nn3MwJytWAdWK4ppF<^WWrE+FC kX23Jx8So5v2K*lP-&ArYQad+3inP^UUj~=kv_-C;l+~X@R;xU7#*d z7pNZ@RsNubUtPsxpfM8|e-~=>zh7rw_5GZxlf*mKYz8yMYXOBTp;t=@z|^t%Q2CsB z)wzqGWtYmkC4-q74gsu!YT3-6^P>J8AT7YD2N<;#{V-u0I->iWTM{3D>ym3zy74Y2 zg`3O7+>Z5{J}q|FnI_10RVwikkyPiVo#ic~Ov=36OGuHJ0~YSyM~8?FaT+h*E4F~H zJz3_q1e-NzCiLr-7f0ES%}{%TU8=$l449=R>(JxuUZC`e@SKa_vTcZhYC@s)Z^OU$MQt z1V}!HziWveX$CAS83T?cu!LGIoWqrY@^ueS2h314QbgLEiHk+Cl09wxX<u}KtrB=kfZ%E52ZFfJaE9zL@CM0;Ax`*Jo}U6n0+lYEAZ%n#SKYc;02`I!H)x%sV_+ z)dqRb(ky?Q38+{S$onOn+SL=lCjbIxZ>?J;;R z@}kkpt$)fVBpm>%V?5=7LE&dQfGO{Qyaan3Z<2le4q`9y+4`n`h&tN6{Hg#U*!^j! zNMZH>XbCqT8;SI6dvqbgFn9D@vohBZ>f8)9Y4Y~1n;&(c>LnXxd0iS6yPw1#LOoSj ziw_ph`J$$G6ZXmEpJI(NzrgefyJ`SrTPyWrO4m&Ki-3zcV_`!ZNwCG29g4OuN8|0; zBxU{d#8LqYj3N>nHbJsyiHta?o?KHc**uzCPn2|)SUlH&gAW8829RJuUAiBvmMqE&i& zzUThsS5*08*xU}&hFjWejuH?KG;c8wJ}Z9Fgen8uUsLa!*Keamb4UeLhv!8FdUA)? zmMxMiMy}g)_yvntjf;fE$d=)FOUOU2_;ZJ{<8%t=E9(_7@Rwl*O0|(V0*6CrnO;eV zakCa`U2WS*H(~Rju&j@5N=TWCbLiZGwI^_s!zHRY46ul<&6s2hX(pqe$?~_dc!Y16 zFLl&3IF&G?$Z3lUO?qK@n$J7}MV61W9;RuR7U~tc>a{#NHc zJimUlHs?&G)!FWN{dleFG8>=r4=@24T?=cqKwY4ISZbY;xsx|A&&Jy4NgX-N+zC{N zGgIxSL(JWsujjhuOjiu?Yn>1E=@4_~k-}65%(Qu&xf53Nw(>7CQ=SPe({G_B1SMT; zar0v$Ib?`@Ng$v|3<{2nj&UrmNxaS@r<_YK{p}T`hPF-YI$snNN!K-zv?nQC&X)1< zT0poJ)WF@)B%n1ZN}#~c|6!3C*FzYUh@`9r$PxMH<=GG!!+_Xd2K0#ore)D?jkY^d z<3e%Fq0Z-oNsnm*cf*=yp?3Yf+e06JIMfC1e<~ubs`;DOv4^Dn}e)b4OAb- z$B!>5)?shEnVnKugX2%hah8>^rWh%rrr}q$;sTXBO(H8Zo|THjhTQ|A1sg zCh*X^@dEV+p*nnjI;uQ1!u+0G620j)HQ%spEzlKa;`WLPz?%5XC)lj309wNI!X>t( zIG{quUd5+bnboWY{JjzTp+eQM;s4CasDtkR&dO*U*r6wVMQzg{+V7Dbc_#c&rihPI zorOBEnFvob>h()roRnn`tOSz9xk{?{Bdo*nE=zc<&!u#)TcaDXN+I{m#3!TlZ5xlj z%KD+sdw@n9(%Po7J-~W}D6CcEHC0tsy_J7$MJoRqs676fx?IRVF#Rw6NMYiWVWjC; z#XK;5P4IKASwY$vjunMODrTcbf8S_)wnZcOhE0@qt8RE8@NBQ1ZioBc5-x<=Sl-+6 z!tHRYj!mvf{JyH}V@1~5?Fs7MI8ka|-9)*zmw@7M26R2}4j!C|q=`!eZo2iW0 zDS&&vDz)TAR{zPocK_mnpovw>1O2Acw&trsx~eXNcLe@?LB|_(x0g2=yo>iPo{vN7Tvo<>N4h>^NN~WaL*mJ z(vsz9+dgOs9y*9o{Y~JdfpGao^9ALGf!V2sKZ+&7YbyT*8f6ksI9}O#R@jfVz9dFw zlg_?2pirwK|0nA8EA+2n-6WlR2?%Fr-(;MFF>s`aqqV0j#FC$=9Z?7FDl~NZ0nH}P zrVqy$`MjdaZ}=QRZJ7U1oXdyIxD`D5ItFkL$K!_Rh$LoYcB+*%N=_F5%lgsii}HY@ zW!+bj`tUYD;7Tfcn*r8a0I-xNT6Ai$MoQamL?LYJIaU4SJa8_8{bgoOD1p}9!bZU+ zt#C5Z6wqwsc;l`Si8tR&SmeGu{E9mM4X2-y=z$8kW9vE~_&g3Vv6>*vP94y&iErI$ z2|P0t#ZH_&*wwhzVQ|~&GZkZ6mNC8i%-S|x1Mn3Cq%Td!#@D8`Xq5>6Elnr%n$}19 z+4-w?Leti(+E;qjZEpZR8)}Y+nqJJZbjSfTZ9*cs?BjeqcXf2MS@@170}qe7mYpVw zXUF5V6dH${OvCsdE5pOm;};{Q-f;q=3+cPS&x(-qYtwA{fNK?L^gQ7i|2IDK^C2=sgmeI7lkAPaDj<>ILskll77+*C0*jl!|qHP(@sUsUk zy)^3A1vhTORB@)^_oue*CrZS(qo@FEnAFSXWCbdGCF5*nqBAF6|B%KXi~s$pojVEf zX7dQk0oF;+g>&=;E7;J;xsn7Yj+L{V9pgT+zdsc%djqwCMX~vxK86?n2(Ml1@}0?z zzZDeg2D+iE-t>*|%2yhD;z+%!(#D=2Ws|iv!1Sf=-4O*a~4Vw#w|VK(QzmYANy} z3|U|WVGCmDutO#<9V>_~K%sPD&=zP5l(za^Nw%anNS7@nGkxyP@8q0Ma&q!)VY9O( zCNOuiatL{lcQj72w}eX4k-8fg5GPz>?x?Gj14Q%FK@r}T^%WQH2H>;w?z*ARe96Ci_3lZ!b znx0z|Mck0Lrr{0YO$+)u&HAF_)e-D5i-fIe4!XBJV!At(q~XM&y9o#!G^paDeqQPLVK6|3O&%TLb$|_j*7}zkK-vF@kuSE+$!YCWY z99evbFiE`@*XZ4^Vr8;0Sxdl3s;U)GhEk#i;BhnMsOTy@)Qh5{* zwQ#=8w{c6{AEh6#f$k0cz&zLAhDIqhp&f){*rOk?1T0=U&E)iMZ0{1eI3G!MK4H$R zBBFb;JB>$xjx3+(*~PWAKc2T^1Jesj*jBE5dW+hx%=fIX6N>G*1*}t!0L9hJj2~$& zESepDU(*~vo|Aqru73VxL0+LQw<+;Ac#YVt()9A7D2+}V6O(np!f`le*%&sp>Wg8g4F;3002ovPDHLk FV1oaB-5LM@ literal 0 HcmV?d00001 diff --git a/tests/ref/hyphenate-pt-no-repeat-hyphen.png b/tests/ref/hyphenate-pt-no-repeat-hyphen.png new file mode 100644 index 0000000000000000000000000000000000000000..d0e34c9b9bf27d55658d9782cb8581395a59ae38 GIT binary patch literal 1533 zcmVDSb8M_xdJF^$-(5;8Dte~?+5zVgRwp%R~4}gM2*b0ImauTH#5J5nW5(tFK zS&|CML5&IlQ3MKtQl%V%&{D%85RyOwrCghkb6u3YR90B2xnFNXmJoiMN@ z^;Q-)yK=FA2k`Z@M@dx3xBgW?QRp`=vJ3tgvW1nj#risFJ!F)KBI!uF*ba~`jx*N0 zpNB*kt$Myo5+FGlQW}vy^zq%07_@OY;uVSKh>gbj#m{?yF{_!((|kM@n{dl&ZWjtC z&Q?73;Hu6R-CE#kJV~ij+xWqSdUxy^rnG;a(w0Z!T**3u@8SC3d0m@1anv4&=i*hi z%&4~mY5@f)IQRdra%i}38-q@qyPk6zRJO2j0zxFnk1mL1VQ8&!&|t8Gsz_XZzy`qp ziNgA#T5}>cz4Vq4Rk-lF5j#L>Pfk3aJ_`+HQ1_R~yLPkTS6kL=6#Df_<4&Ob{}kov z{-y2P%=6=H8e!5JBq2CG;e)vd5kVb*Sy2&~agC|)*lxd?a=&gU_3Jx#b|4^s`m5*` zZU^4}P!={DP!%6wMEXr!JcYTLV2d_Mc$dkN zONr8yRAhy5{Z?s1P32;MmkEiAgaS=KYscPsV@C2Sg|;kh2p~UYL!D6~>{?b#CcK$- z4}Vk&xa7~?uC_4jkewkR16#a1*gg$qif9=n1O0Calh&$(QQ& z#Vm*o;!o;c;0`W z^2o@yK2#ooCm~!-Xt|eKRJbQiLgRwK2H>h;=JfMv6 z8uZI8BKMRsA~*g5@c2N&1YxlWArJRUN8A%4Q)(lnp1q*^GAb^jGg#``xzY^~)5Tl} zuXYexi{9bRsX8JlQ}Ajb(br1#zweeaS~mH+5oh||foS&n%C_?Qf)-7@rT8kM7V!Fd zz*Onw6^Pa1tV{DZM8o>iw@>O?OH;$=8VVkfRaOu>{`7AmRjcGCz-V%WfQfWrKTs?# z3L2xKpc!MXsHNEtq*>k1{JO$#;eJR?x#+1`vFRL}xi5culJlNV40~`Hm`P*@jhiQ9 z^00vzJ~05FX@>NB0h4&tPmcs?Mm;>i{^%BHNz$|j?zXMY2Wp6JS{$v>Zh;`#Q&cM)a z2Hjfyu;#D`C9Bzg1ub@ytRXuC&kVE-n-O2`@H*6S=TwPP^U-4}4jgkWQhF30sye@K z#NTT=?CkiFD}I-Oe2?7it!=?+GGNFmKX&w`N3mPYkDq_lBHvc?;$UoD$1Z83Lu2Ru zd&90VcgDUjBSsCN4mDkBHu&(ShIYTz|Cn>Uy0pjip*!A?%6U_ne}DEiB03$Injh!Q jO{jF1O>Pk_;&0Y3ZNIG=-)V0N00000NkvXXu0mjfEvNE0 literal 0 HcmV?d00001 diff --git a/tests/ref/hyphenate-pt-repeat-hyphen-hyphenate-true-with-emphasis.png b/tests/ref/hyphenate-pt-repeat-hyphen-hyphenate-true-with-emphasis.png new file mode 100644 index 0000000000000000000000000000000000000000..0bb23ab1a460feb86daae808248527c8505064ab GIT binary patch literal 1350 zcmV-M1-bf(P)=bdw2l!FkMEJdimBMK{VoDOfHC6FDs{$GC**uz+$6 za0qe;lrljqt#UXJke2%@P}=tfA;g3@AtKolJ`cZ7Udiv5yx(HJ_tt{AATEgiVX?Mg zK1B;Hq+2nA9Wc#_s5li2ZQGf_Mwk=Pzt?O+RLML<`8LW&-xxxC!w=?rdQvC%>jLzC zk`&^iF6pZ_(_bYkG`nVg@_*hi>R|JSgjcs zclD?G<9L@a5%4J z9hWu0OT-{|Ev08;qtDfbes8mH039$ zLr2nsQiUB5;-vF*)87jU?FV$uHIC|{N02ShAPKLUNdXK8;F@|~n`0N6lDp8=|GB8G zoOT*6(|0O(VN+3UcsvJU{LyKr_IC_vFV;3f#^lbs%z-G2oSyA1Wc&hCD}>4A3SV2i z8){Wc-gP^GW@kzg1|x&IN?j193Q{m8h^0WGEF2&`wwOCYD&$@wttuqA2{2uih#F%; zOc!z~m5E<1s#Kl#FJVSj1lv%nv?Uh0^2o!DN1p`TW^+KHfHTi>gQ00}jb$ZtU&CTt zv_y1gY$I)EfFzQ`%G!N|Nwqs`b{4)$>-MWm#)>_ z7NKJRNe)H5pYVW9ja5Hjt;}&PdcO`7E5;0ckmbR0GMsQN>1BAS-e?KL2mK>zzr@>5 zwRptXGZQi1GecnX35yE^;8{3aanxX4S(1m_A3diE9Et(yxp*VAp8M_0J$L(df!z^z z%De*WfYFNqo0Rx@i_Z+iI>d~=nF2Jrfl$xXPF1|=<_vB7H94QDHLVo9X%S2dC7&zN zsF`a~9y9G!AO6ChRlGfy;*K83WZFiX5h!7DYv)4Di7TcwQPQ|b+)|U80t}rm_lYaI zP}SJoR3n#>{pAaOjJxbDL_KY0q|J^D#TQ@t=$BCiQysp)J z(KE7{vqgZHoHfH*b553xy+_3{zEfhZ;ZO^(jB|3l^Bx{T_A|~II;xf#8EQAqM(np0 zrE)x4LZINeeYC4L^d5IrZ_T{Etxj=Ty!)2V-lsQDXzB%$WT41Av&ShS4F5gtPPPMw z?5z{HViz^wqMn}uR7k;oUYpZsnVwuQuOh~-g_19b3*zhVZ&I@CQ+H&qX#fBK07*qo IM6N<$g5L^-y#N3J literal 0 HcmV?d00001 diff --git a/tests/ref/hyphenate-pt-repeat-hyphen-hyphenate-true.png b/tests/ref/hyphenate-pt-repeat-hyphen-hyphenate-true.png new file mode 100644 index 0000000000000000000000000000000000000000..d08859fb2bc639620746fa6a09636a643db5a17c GIT binary patch literal 1341 zcmV-D1;YA?P)!~uwKLtZ=mB_;rFscGkFbzLoK;KE@ zzKU>nZ=lqg*x&EkF|iV;7G3b*xEdQJ{AmpNbP% zw&U-rjSEw1u{*w<5wu z(}Ib7T%`gO(O*(^A@9yGb4NNrZ4(<+xWieSMyh`^A;;Dm{F=9-wpA$`WB`$ zX`iQ9DLiFCjv^7D(5&paL+j%N@syb3BI9pmm>wRB3x5hUr-b(F#l+~q5Tw#*fChh| zsOwj?%AE0dN#Xm%uBzuFdnmIYgk3|E5B8|K<6HJs>@3iWdAc=34@i;$kHqY6fjEs! zJ}ExB%w}#~#X}Bk(DoW)ozEKlrG?+TPs|O8R_)baH8XZgZC*$;4;-(LDGxs_9ngz| z81N#}=`vt=uS^XzM!0!RicMB8t#C*u!wz~jpfkii9|N{0msQT|29{`v}(ghf66rOBrDvl}vsepJXV5FH8J?@MwAF#Sg*DOS0 z?c_)69OJwt0K~UKl)Y@k-7f!ldg;LjvS6uWH?-RX6a(_WSchD*s5awM2a6`IM2Y~) z&xj4FKPx$t)NOky{{t<+Z!HW9V7l>*AAECeS|=yAz;yASlq~J)$0O=$7FtwDXMDYg zfC=98P{CMrLi`-Xy8*2$*fE}=-%wvnJAw1A6Dw48rtDX< zihN5f%dvPqwy9@I3{tSCykw&$P@>(9S-s{^+u{7ViMBqOnIx>863e9~d4Ru?P2=4= zFs;D=)k(ehC0OQ6i9gm7EUNyQq$?a4HS51;azCSEZlbR={I)P72AsTr}`djymhkXg_AfL$TdpuagGc}c%aSMa^To$(|7`Hq6S>m zi?aYvByf6BH>Y7oT5|THis-tRmV8NE66fDPB$Vm4(!d+X00000NkvXXu0mjf-yD7< literal 0 HcmV?d00001 diff --git a/tests/ref/hyphenate-pt-repeat-hyphen-natural-word-breaking.png b/tests/ref/hyphenate-pt-repeat-hyphen-natural-word-breaking.png new file mode 100644 index 0000000000000000000000000000000000000000..d08859fb2bc639620746fa6a09636a643db5a17c GIT binary patch literal 1341 zcmV-D1;YA?P)!~uwKLtZ=mB_;rFscGkFbzLoK;KE@ zzKU>nZ=lqg*x&EkF|iV;7G3b*xEdQJ{AmpNbP% zw&U-rjSEw1u{*w<5wu z(}Ib7T%`gO(O*(^A@9yGb4NNrZ4(<+xWieSMyh`^A;;Dm{F=9-wpA$`WB`$ zX`iQ9DLiFCjv^7D(5&paL+j%N@syb3BI9pmm>wRB3x5hUr-b(F#l+~q5Tw#*fChh| zsOwj?%AE0dN#Xm%uBzuFdnmIYgk3|E5B8|K<6HJs>@3iWdAc=34@i;$kHqY6fjEs! zJ}ExB%w}#~#X}Bk(DoW)ozEKlrG?+TPs|O8R_)baH8XZgZC*$;4;-(LDGxs_9ngz| z81N#}=`vt=uS^XzM!0!RicMB8t#C*u!wz~jpfkii9|N{0msQT|29{`v}(ghf66rOBrDvl}vsepJXV5FH8J?@MwAF#Sg*DOS0 z?c_)69OJwt0K~UKl)Y@k-7f!ldg;LjvS6uWH?-RX6a(_WSchD*s5awM2a6`IM2Y~) z&xj4FKPx$t)NOky{{t<+Z!HW9V7l>*AAECeS|=yAz;yASlq~J)$0O=$7FtwDXMDYg zfC=98P{CMrLi`-Xy8*2$*fE}=-%wvnJAw1A6Dw48rtDX< zihN5f%dvPqwy9@I3{tSCykw&$P@>(9S-s{^+u{7ViMBqOnIx>863e9~d4Ru?P2=4= zFs;D=)k(ehC0OQ6i9gm7EUNyQq$?a4HS51;azCSEZlbR={I)P72AsTr}`djymhkXg_AfL$TdpuagGc}c%aSMa^To$(|7`Hq6S>m zi?aYvByf6BH>Y7oT5|THis-tRmV8NE66fDPB$Vm4(!d+X00000NkvXXu0mjf-yD7< literal 0 HcmV?d00001 diff --git a/tests/suite/layout/inline/hyphenate.typ b/tests/suite/layout/inline/hyphenate.typ index bcad4d93f..c366b38f9 100644 --- a/tests/suite/layout/inline/hyphenate.typ +++ b/tests/suite/layout/inline/hyphenate.typ @@ -50,6 +50,58 @@ It's a #emph[Tree]beard. #set text(hyphenate: true) #h(6pt) networks, the rest. +--- hyphenate-pt-repeat-hyphen-natural-word-breaking --- +// The word breaker naturally breaks arco-da-velha at arco-/-da-velha, +// so we shall repeat the hyphen, even that hyphenate is set to false. +#set page(width: 4cm) +#set text(lang: "pt") + +Alguma coisa no arco-da-velha é algo que está muito longe. + +--- hyphenate-pt-repeat-hyphen-hyphenate-true --- +#set page(width: 4cm) +#set text(lang: "pt", hyphenate: true) + +Alguma coisa no arco-da-velha é algo que está muito longe. + +--- hyphenate-pt-repeat-hyphen-hyphenate-true-with-emphasis --- +#set page(width: 4cm) +#set text(lang: "pt", hyphenate: true) + +Alguma coisa no _arco-da-velha_ é algo que está muito longe. + +--- hyphenate-pt-no-repeat-hyphen --- +#set page(width: 4cm) +#set text(lang: "pt", hyphenate: true) + +Um médico otorrinolaringologista cuida da garganta do paciente. + +--- hyphenate-pt-dash-emphasis --- +// If the hyphen is followed by a space we shall not repeat the hyphen +// at the next line +#set page(width: 4cm) +#set text(lang: "pt", hyphenate: true) + +Quebabe é a -melhor- comida que existe. + +--- hyphenate-es-repeat-hyphen --- +#set page(width: 6cm) +#set text(lang: "es", hyphenate: true) + +Lo que entendemos por nivel léxico-semántico, en cuanto su sentido más +gramatical: es aquel que estudia el origen y forma de las palabras de +un idioma. + +--- hyphenate-es-captalized-names --- +// If the hyphen is followed by a capitalized word we shall not repeat +// the hyphen at the next line +#set page(width: 6.2cm) +#set text(lang: "es", hyphenate: true) + +Tras el estallido de la contienda Ruiz-Giménez fue detenido junto a sus +dos hermanos y puesto bajo custodia por las autoridades republicanas, con +el objetivo de protegerle de las patrullas de milicianos. + --- costs-widow-orphan --- #set page(height: 60pt)