From 4494b443bb34fed2208ee3fc87e9a18e7d14b2ab Mon Sep 17 00:00:00 2001 From: Laurenz Date: Sat, 16 Apr 2022 22:42:49 +0200 Subject: [PATCH] Ellipsis --- src/parse/mod.rs | 1 + src/parse/tokens.rs | 16 +++++++++++----- src/syntax/ast.rs | 1 + src/syntax/highlight.rs | 1 + src/syntax/mod.rs | 5 +++++ tests/ref/text/shorthands.png | Bin 3838 -> 4375 bytes tests/typ/text/shorthands.typ | 4 ++++ 7 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 7536b2ca0..be947170b 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -214,6 +214,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::Shy | NodeKind::EnDash | NodeKind::EmDash + | NodeKind::Ellipsis | NodeKind::Quote(_) | NodeKind::Linebreak(_) | NodeKind::Raw(_) diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index ae3d7b9c5..f6e1f296d 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -140,6 +140,7 @@ impl<'s> Tokens<'s> { // Markup. '~' => NodeKind::NonBreakingSpace, '-' => self.hyph(), + '.' if self.s.eat_if("..") => NodeKind::Ellipsis, '\'' => NodeKind::Quote(false), '"' => NodeKind::Quote(true), '*' if !self.in_word() => NodeKind::Star, @@ -216,7 +217,7 @@ impl<'s> Tokens<'s> { // Comments, parentheses, code. '/' | '[' | ']' | '{' | '}' | '#' | // Markup - '~' | '\'' | '"' | '*' | '_' | '`' | '$' | '-' | '\\' + '~' | '-' | '.' | '\'' | '"' | '*' | '_' | '`' | '$' | '\\' }; loop { @@ -224,12 +225,17 @@ impl<'s> Tokens<'s> { TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace()) }); + // Allow a single space, optionally preceded by . or - if something + // alphanumeric follows directly. This leads to less text nodes, + // which is good for performance. let mut s = self.s; - if !(s.eat_if(' ') && s.at(char::is_alphanumeric)) { + s.eat_if(['.', '-']); + s.eat_if(' '); + if !s.at(char::is_alphanumeric) { break; } - self.s.eat(); + self.s = s; } NodeKind::Text(self.s.from(start).into()) @@ -831,7 +837,7 @@ mod tests { fn test_tokenize_text() { // Test basic text. t!(Markup[" /"]: "hello" => Text("hello")); - t!(Markup[" /"]: "hello-world" => Text("hello"), Minus, Text("world")); + t!(Markup[" /"]: "hello-world" => Text("hello-world")); // Test code symbols in text. t!(Markup[" /"]: "a():\"b" => Text("a():"), Quote(true), Text("b")); @@ -897,7 +903,7 @@ mod tests { t!(Markup[" "]: "." => EnumNumbering(None)); t!(Markup[" "]: "1." => EnumNumbering(Some(1))); t!(Markup[" "]: "1.a" => EnumNumbering(Some(1)), Text("a")); - t!(Markup[" /"]: "a1." => Text("a1.")); + t!(Markup[" /"]: "a1." => Text("a1"), EnumNumbering(None)); } #[test] diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index b01eeb47a..608566913 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -69,6 +69,7 @@ impl Markup { NodeKind::Shy => Some(MarkupNode::Text('\u{00AD}'.into())), NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())), NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())), + NodeKind::Ellipsis => Some(MarkupNode::Text('\u{2026}'.into())), NodeKind::Quote(d) => Some(MarkupNode::Quote(*d)), NodeKind::Strong => node.cast().map(MarkupNode::Strong), NodeKind::Emph => node.cast().map(MarkupNode::Emph), diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index 34e5b4a70..004ff9576 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -131,6 +131,7 @@ impl Category { NodeKind::Shy => Some(Category::Shortcut), NodeKind::EnDash => Some(Category::Shortcut), NodeKind::EmDash => Some(Category::Shortcut), + NodeKind::Ellipsis => Some(Category::Shortcut), NodeKind::Escape(_) => Some(Category::Escape), NodeKind::Not => Some(Category::Keyword), NodeKind::And => Some(Category::Keyword), diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 1f02217a1..d18b6a3d5 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -599,6 +599,8 @@ pub enum NodeKind { EnDash, /// An em-dash: `---`. EmDash, + /// An ellipsis: `...`. + Ellipsis, /// A smart quote: `'` (`false`) or `"` (true). Quote(bool), /// A slash and the letter "u" followed by a hexadecimal unicode entity @@ -774,6 +776,7 @@ impl NodeKind { | Self::NonBreakingSpace | Self::EnDash | Self::EmDash + | Self::Ellipsis | Self::Quote(_) | Self::Escape(_) | Self::Strong @@ -869,6 +872,7 @@ impl NodeKind { Self::Shy => "soft hyphen", Self::EnDash => "en dash", Self::EmDash => "em dash", + Self::Ellipsis => "ellipsis", Self::Quote(false) => "single quote", Self::Quote(true) => "double quote", Self::Escape(_) => "escape sequence", @@ -992,6 +996,7 @@ impl Hash for NodeKind { Self::Shy => {} Self::EnDash => {} Self::EmDash => {} + Self::Ellipsis => {} Self::Quote(d) => d.hash(state), Self::Escape(c) => c.hash(state), Self::Strong => {} diff --git a/tests/ref/text/shorthands.png b/tests/ref/text/shorthands.png index ad09967d2d0ebb31afaf844fd3146dae1022128f..c21d49df2e2d5e0457d701a7864d0d18668c4e0e 100644 GIT binary patch literal 4375 zcmZ|TcTm&Kx(D!INPy6TNRti<0#XE&F1fT_*dKr^EUi`~2XGT{Eoo`r~a*CVa@(@U_%!f6i|UX^H} zm|L}VJXiVesZi;S4UeVHv)VUrr zO{6)eV(ME>Jbi+A=QS0u*|o)dEfPZDRYyt+l| zQF|`z3g{A3B!Ta$?gF%NiRPeK(eyk(#t2fIyYMBUY zj6up@Uj9IMs8NFFL%N#^p?-eS{mEK5S!w}`Vi>4`9e_-94$=m6$kJ&B zV*AgawCt-GvRTbW4XH(}LRdE>)7vIkwO=HLw8N3NWD0sI#3n zuS>8EnXFg4Q&mp6F(!>G31)Y=13@$C;HI&5_inna^W+C6_qM(S-zq6oPygpODjzFP z7X3K1)bp#Ny1MnG;UGEpROakFjL9l;a>r6HvsD)yEMkAhFbc(u-PM>EBQL<&wz5^% zQu4g*3X>wpV2V(0D>Q)=UYggw?BRls4_}yA38+34Cs40?4=Vr4{C$rFAk1ce=ihTF01hp~#?rUg2hKpaE0a_^_ac&!)g`bJtA=LF=k`X19{XSpj$ zNta!Wy;&My_9pGxx_Q5whE?=pu0w-q`$4zHS|U(z%derQMRcl-fO^kf`t1HU)=W8m z({p$F&cuZ%gZq2hM%7BFd*(HT&42FHVh~H5#Yz zDajzmHSxdyr9+st4A8mlxVVM1o%McvYju5DUi~WYDQ6ZepTi4sJv?+iNj>d>FxSkO ztlN$1(6A`omZ5Geg@6jS*pOF?s*H7k4wrJq0)tW(Q4L*4M1*BaLVNV^DQl7AP1+KU z07m;SdP@xZ=5x`f_acGj6S#U+LvOQaqcQo$w-&MluOsTgmG#V+arf{944=@VQCG3^ z214aPm56otXQRfsM?JH9fNy_9h8Gv5rz|PybZ+R%1swy>50m9ILJ|*GQehtGUB&$% zDPgi#f4Dh`RE`S3tjy30$6I0SZqCds1RmGac!t0f(;b*Rz?_pblv`LpV-$^-fm*%_ zL^F)XT?0J1K{CJaUFF2KLk#?6pY8*h9#)>gflBksVjm!8(KFiNH zA8ae)h6)0?P9)M(Z5TDHKzFC7g@bjHLQpQCx+dqTzlhWghezX~bbVALkz5agtt~is z4Bgml@S?ykY5e}4I7wuiWy~-RCP>PsAQNRo>!c>-T=tk$OykzxxU-!o%xIuA6F!wk zn@IxN3>L9wU8@`#!By0tesToVz_^S^kHt;Khn;R&RO*|YV;|YdkFYEq%yIX{CGqzg z$i2a9Ih&TU@l_P78?97gX}PlL-jqR|xW%shfv4>?gERGq7T;N=Frcuy3$*)Wi?h~gKCo^%)=Yoto`iN%>+#>9QcP{q ze#N_F!e6hr=HGFOZ$P}Pq_#vnv{l1wf?cpyH|ux20!Vn{>aTIJQnHBS8^;zlyLy}R zpU@~q+T$93R$5TM^U6jIa&Wgy-WgTBgi%fg3^GKzBn6cT&Yf9PVkCNWj#OyckIAg4O$UMcKCs&6@pXKMZuY|YqlqRjNDIGt}YsOgk%^R(PA zGI&@r&~7QyapFoHmVUE%dc1lY%Nmpgnf_h|J;jd0RG~&~a<4G6qd94p#5^&Logv#$h`;+fSWEti+9r~w*o7V(b zW%9t(66u*;riqYo#Nj5WD?kJf-#&O9_OQ0RjFpM$D0R>(K9FEj_IAui4!oj5E&0`P zx)_9%aejL-ks)HF7kM_2{dT=|8$_BVt2GfLJO1ish{>a|7*l)>bcUP&qi-(<;WQr5 zbMH7{nDG2jLz0QMa$Ji;ED4MsIP{*_o#Wfa;SH2YTXUTa_``a$X(6%Esg!~qLV3_T zVuH|Ym;h3@w$rCi4;c|Z!O^Fb#$MCknm=;m=g_tEMwLLOM^^+9S97H68}rNzU(%9C z!;yy0CNmC-ueR_`3A=~&3EgXq6Jv^{e_y#fL!7#MErl>^)#%@Icp|KinT&_z7JwO{ zQnU}w;}X;Z;^AICb4nELxo950c*{`3y-xvDJ-*BoC&4*+WyMZmV5p<4DGzT3ZK(J9 z>nE*9m05NS;P%{X^4ej_`ozYis{w~3MDG!9a+z8Q{qvP@mPj@N+1Yz=Xe3?)w)jY~ zNea4C-@yzmv8nn=R$MBdWjDmO=p2}`D2Bo+27IJwst_k~r7z{oFGJbL;kG@$m2Xo^ zvMueQIPQhw>U9x}O(4xnba9+xyJ-}gCk)@^vXLjB?iX#Og(*sk0GIID%8>8EC(`@( zH5=8A#wO;Qo)Hb9weDb`8Dy2ZTLKYDy&MhHelIFj`n_&r*4boQ!L0rXAl8(^F8m<$eX#=hX@#(zBt6zzj0L;a%}-2*jrQLwWK_gkn% zqV-=n@aORrez^&dJC`jC*5ztJ2zmJkIol)o6_ok%f*>9;pP%&3js%PQa72OPL3hn8 zRvW0TLkf9tjOt=@my&M$==k`QVWFSe_7P2DTzKL1h>mzT!nFHHm zjOUS~h5fce1Y58oDuV8RKAky$5=-&%Jo5?v%1yP_p+?0yu?+S4=2 zq2rGWs;CuEC`0KDYp=CDnXU|O=JpwT3;wB_?3YI4ZZj3s*$>;qiQd=dho{c*g}Xhq zKGM2zLv?b`B(m2+n45VdOMCTX(33?uE=F)_zS_fOf#ZIZ%ef(my#i;u@a_vLD-@-9 zt?!8jH}gE^=bVLbv9{86Fq@6V-bW&K7}qyB?+Z3&vbI~UNH9PqR=dG{+9_1UMD*g6 z5<|jFCiD|HQx(&dBeMOX)k%9POk@<=_Vq7(TN+17qaJNVlk8%b?$CwQUZW(ZyT=b- z3rd)+S%0u>(+ShpM3mYwTD4|fV=2%H2{(5yEqm_bqPbZ*F~_?*+7*-?hZT=dt+B%v*z~Kg{R*&R#+MT@Gm#x(S2GOhbOjL6dSEOThzy- zD@DfX>-|z0TX`%5okkq8g^`D6js6Nlzr`dZ?d)>ye?l-XCfKoYsVbCchwv5q#?M?a zf~l05ZZ`|hfX!^08^Anvc!u1mPRyzRyIb&Q9Z61;@}JP*xI2msmxKC%_GGR5`spv{ z_ns%yr8)m>((^ObaN)c8AVI>bxb?n2dgb^JW1B&1# zw({pt1RKgn{xNGop+&{SdKQ=~m)A0UlH$}cg!;LG4iH%@>%nGOK0{iTS9d_RcQuK# z7@2kltHX~nGh!{UQ~e{=ttX^P6O>X$GvOXnqTrk0g6EC#Ve)N{^I$^@An4uvUi*Kg z4Eg|TNjL0$fFh9n4m+q7p0um9rZJUobl4=z-poM07Z?WkO*yIbq})@>GH1Uh(Z?Ye z)nANq4J9ZjO9^#-ht*2?Jv=jAU|gz;&wC_?PUkJS9M#=mBatipWI;i>$6K^@3$5(G zvYJi$fj%An!P`PS|HfZs27$-~7J*|6WqrKAkgCV&iQ0+Zoxg*YYHumCyIi1W`v-1! z1V+6UTvUG66!d&+=LOWf#@EpRLAc~SB^A;X>hgLq*BUy()`_i(@fef|>+DdZ^Tks#Pmk&&1nI>)DFZb`M7|F-AZ!U=U_^gq0#N~zayCpt~ zxBt>P{FwJUSUGNkj50O$ntx~wk!8U{;(fLC1#g(7Z(nrto=T7+6EA!jhjV=uqRr3H z4L#^3j$Vs0$b*O zf!{SIIUm1_{cZIsBXY**6-i&GKE{Kw$63HHOL93#ftK#K{dd8r4IZCYOM+(~fjLSbjofyNFDB(cs0KG{L>hlE^Tz<)P78f z9V|-iby?Zy;;Z)PuwnMQ(vDjQZC>c53kn__S`6Xgoafu_+tH#RdY(yz7@dEZoPiuU zG-VJzZ}MpLCC|({aUC7Kw%sQ_(b~X)MkW<^(T(ND{T*Na^T!}UxzYea|8HrR_QE1t W;Ven;^7=nN9$ISps^!X-5&r^XpDdgJ literal 3838 zcmZ{nX*3jU8^_0(%$Q<~b*zOf*(qE0bwo^-G_nl}HOg3%b!Z|xk>#-`Tf|^utPK)P zSt3%R5i<5QYw_wi=XpOo=Y4-4?sHxDxzD*iT>o?3|10*21(F5I2L%8CESF3StpET9 z&~ZEtIX(c@p~NfzfVKOQp`K09m*t5Nw9QCo&-%|QhG>n1FIjO)k{PbCypvRtbr3ts zr6g}n*@8zSE>rGlG?%RtpShU?SC_rh5a}c7sph1z>AIz&Jny8ErJlKEYXVJSQ!8TW z^8ll!&sHu^Jn!f#EdO&zO`H4XlpS{UGj_}j!u1bNa!$RJf$9lMf*uyKaYpH1f&4r9 zl?s_~F3*Eh%&Mv;*W^JKU)U~UpVp7uemQCjlHC)z8dAiD|FPcRUBQv;dGeq%869LS z-?=a%!>=-hZ@{Vf?S7pGc<`K-7SQpU3|Q4ia8K$o)i5Ha{X$|CHiO$PJG9gmB^pws z@V5>b`VxiXBfS*0ntf6{u{+{~=4^|Gha^BoEHh4V(%%UQ$yuWHG$+2p6o8uu=m-z;)#9x;I&E_jYAAUCN;qC>h05|4B8yLjhrsE zz18y&%B|ZP`d6DaE`K#e8TGQ;?PbgNX@iOGiYE#@7@Llzv@Q+s&_<+eH<=;v^6t*1 zm{IvF3X7R;^Nb$pfVG@EgS(15W%QoI>2dk?c1FzG+dZGUdVT;f1?P81vR8_p2e?^@ z=LfzDzJF-+8%^6n8o2Y2;jHF@7jC|4XvS@`E!rkt^y{THiH#(th`q`A%gE%N)0x^2 zKXlarlk2ge<>6yc8ji7`;a;yr$;HkCUrjI0f-Yl4**YV#=N+)`g7MaZ%)!D{izj_6 zu#Id0nmu^**1R_Y>~LGc{`LNaesHz6>;m%L;40-XqV8(uV7Wusz%t8?8|n8GdYRmr z2ydbf%zDQ7V$V<&wydnvk~i3f(BtYJ`ajg8E~kOpsphsN?6}itj67&J>+Yl zkk~{(HSf?;V9Uhhj7!_)&7d=Kcg=KL&5(oPl!@i3=ROFWKDkHMu~GG4CW>E}0bjje zly)OZ%Qre?9$fqJZsYi7H}=a0BDLmT!pB7ximnbgqHX;6dih**C3=<^)(Gtg8+SAq ztI70cGV*?@$e*Yr^azW&jk##YTjVIPhMQr|6%B1@sr=Pmk^aG;e*uz0hb7JD@$K=m zh@-_OB7(T-t6TT(G!;mW!s7@k-?QalA8h0U@rz+BBF{obW%bp>CqKX~;y$+iO|hEB zXm*{jM468!o5T}@^iPURD-^ob1}L0;KwS5>3XCy1A12H}vyvUZ>3AZKsZ z*Apxp-AmsL#zsQk&!9G?N3l=FDMKuH+hrfn_Jfh^WL8bNwy7Wkk0veN_;}U9e z8EtzfFctQos?1fZ@8dj@*qgwM%JL^lwL+heTTCOp4}C4hH_KNy0dGb4-|Q}fHw>I2 zYhJt3f_L^5Jk)h(>W@pEj##NbF)?f64}$iw-1*kZVfQKpYrcTmzpFP#@XR^5=u)HD zt^^#^oSAZJd;HBm8RM;Ng!=O9*03!rkrZ}zI)1CrIFkmS&f#yTk4jL2W4(?-=E7Ad zjS``A-`x8%D~gtX7Uf2Mk2Nye-{Sm?c{SY->pU(Z5{DNG?`o&gU3Y;!#nI_pIr*7( zBMHRO)zs81@kHvbOWpj#{Ijtewz~YxUhzkFsP`fJa!klFHMq96 z_TK(M)o|cZMSXg-5R$>HGrxh!tuc*KA;iS{+w#MMpQz5H?QkXfEIZ~JScf;=*G>oV zDd(r5=(w*RD^5}UGGXhGg_3IP{AkRZ!Q(oa4SxFCy`J*l%hEMCp-=MW^4t^jw%xjw zTllEKo}bE@NlzBaOyaiwTH2qJD?IzEYNL`G1I+cEuaU$R_@C48scuH4jy%>jC&`zS zP*ttnVS&488!;jYPrCe)h4$|jI@s95@pYg0P0z6iO7rWEKl>6@7yx{zybzC#fa)hE z{C9riuyFD0fscV8ge;URO23>7g7i8;Q69IzciZV&x*tuwa{!#otc~cGQ|+`Jck(vE zC8vi<=Yvccdpfs!@geqz<~*=#bN_`$m-sqi9a@@Gi|>!z{5&VY2(MG;9?H7%msl>N z^a6u)&Xl^WjvT}zWKVu}Td+ERv|JO+#-svz8vQn+gTAJu)Gdd<7nY=d`Fv&zN^OWX z>FT|LxYlbRkaGV;48>U}hUa%4V6ofx(fq;+;vO<4@8^%C)RWrvduQTElGR{~2jD)y zOdRt?V6joXiTbxnI6dXPGq~37$DK4C19=XBx^G4j#dB}%$pOjI#M_A0XA+{zeRHbe z5}d+c2f@UCeJZMO{l{i~#Jw8hv9s==v8jp`t%4J*yGcwAdG>1lpk+>Vi}y?_VY4NW z5pkyx^-8Me1k zETS-^hAaijGY>l+y71$*rd)hnl$t$q}C2JReWTcBVE5$3ivB&(_) zvv>1M2XO5LdWAd9S}Guc*Y)%kYrmRxyW3aU(u^8~?x!A}EJnR4R@8yY+pK(12kWob z60>SVDa{i0Bj-=VgbeB-6Wj&nu@+cy($*6Vd8Bi3-KIUUhKaVqPL zw>ET72885x$LluT+FTpB@EkJP_z~)FbkMvE?MkHl#y4M(rJdtXkD`}~+KjGGW@os# zHG18@Kr4enz$F8AC?mXJ$4XUnk3GK5B5p!EEyRp>j^^AAiuyE&pZ9r)-7s?^cpF znNHZlDqOn`GEU|v;fh_F1-WwW_ z#b{R9BhR6lp~=0;M551Y{CW%nSs`H1ES`FSuyifvwi|4?M_o%$xI<~HAtOAzk{i?r zg^O;z6hp-hW}T@#$BYnL3^=7K2TJCn3m4dhA z3R0D!Q=;ce9GtHD4YGNO)ci^@ttKu&i1X|29mdX0Ox0&i;fG`Hf|0J-w(Va^&dt`7 zjDRocH@Y{XFvf8#HDd_HZR&(>l1Y?PBr7Hz&6xHHc(bTWO2o@~wFu&9W&H=3 z%|qfl9ak##UYh=6np-M4^!KB7?SVOmQR%F&|Bz#g~(=}$uQk#L=oFWkZpD7&m=^?+Q=HdTmS$7 diff --git a/tests/typ/text/shorthands.typ b/tests/typ/text/shorthands.typ index ef0bf8666..2efd1ad1d 100644 --- a/tests/typ/text/shorthands.typ +++ b/tests/typ/text/shorthands.typ @@ -6,3 +6,7 @@ The non-breaking~space does work. --- - En dash: -- - Em dash: --- + +--- +#set text("Roboto") +A... vs {"A..."}