diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 7536b2ca0..be947170b 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -214,6 +214,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::Shy | NodeKind::EnDash | NodeKind::EmDash + | NodeKind::Ellipsis | NodeKind::Quote(_) | NodeKind::Linebreak(_) | NodeKind::Raw(_) diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index ae3d7b9c5..f6e1f296d 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -140,6 +140,7 @@ impl<'s> Tokens<'s> { // Markup. '~' => NodeKind::NonBreakingSpace, '-' => self.hyph(), + '.' if self.s.eat_if("..") => NodeKind::Ellipsis, '\'' => NodeKind::Quote(false), '"' => NodeKind::Quote(true), '*' if !self.in_word() => NodeKind::Star, @@ -216,7 +217,7 @@ impl<'s> Tokens<'s> { // Comments, parentheses, code. '/' | '[' | ']' | '{' | '}' | '#' | // Markup - '~' | '\'' | '"' | '*' | '_' | '`' | '$' | '-' | '\\' + '~' | '-' | '.' | '\'' | '"' | '*' | '_' | '`' | '$' | '\\' }; loop { @@ -224,12 +225,17 @@ impl<'s> Tokens<'s> { TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace()) }); + // Allow a single space, optionally preceded by . or - if something + // alphanumeric follows directly. This leads to less text nodes, + // which is good for performance. let mut s = self.s; - if !(s.eat_if(' ') && s.at(char::is_alphanumeric)) { + s.eat_if(['.', '-']); + s.eat_if(' '); + if !s.at(char::is_alphanumeric) { break; } - self.s.eat(); + self.s = s; } NodeKind::Text(self.s.from(start).into()) @@ -831,7 +837,7 @@ mod tests { fn test_tokenize_text() { // Test basic text. t!(Markup[" /"]: "hello" => Text("hello")); - t!(Markup[" /"]: "hello-world" => Text("hello"), Minus, Text("world")); + t!(Markup[" /"]: "hello-world" => Text("hello-world")); // Test code symbols in text. t!(Markup[" /"]: "a():\"b" => Text("a():"), Quote(true), Text("b")); @@ -897,7 +903,7 @@ mod tests { t!(Markup[" "]: "." => EnumNumbering(None)); t!(Markup[" "]: "1." => EnumNumbering(Some(1))); t!(Markup[" "]: "1.a" => EnumNumbering(Some(1)), Text("a")); - t!(Markup[" /"]: "a1." => Text("a1.")); + t!(Markup[" /"]: "a1." => Text("a1"), EnumNumbering(None)); } #[test] diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index b01eeb47a..608566913 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -69,6 +69,7 @@ impl Markup { NodeKind::Shy => Some(MarkupNode::Text('\u{00AD}'.into())), NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())), NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())), + NodeKind::Ellipsis => Some(MarkupNode::Text('\u{2026}'.into())), NodeKind::Quote(d) => Some(MarkupNode::Quote(*d)), NodeKind::Strong => node.cast().map(MarkupNode::Strong), NodeKind::Emph => node.cast().map(MarkupNode::Emph), diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs index 34e5b4a70..004ff9576 100644 --- a/src/syntax/highlight.rs +++ b/src/syntax/highlight.rs @@ -131,6 +131,7 @@ impl Category { NodeKind::Shy => Some(Category::Shortcut), NodeKind::EnDash => Some(Category::Shortcut), NodeKind::EmDash => Some(Category::Shortcut), + NodeKind::Ellipsis => Some(Category::Shortcut), NodeKind::Escape(_) => Some(Category::Escape), NodeKind::Not => Some(Category::Keyword), NodeKind::And => Some(Category::Keyword), diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 1f02217a1..d18b6a3d5 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -599,6 +599,8 @@ pub enum NodeKind { EnDash, /// An em-dash: `---`. EmDash, + /// An ellipsis: `...`. + Ellipsis, /// A smart quote: `'` (`false`) or `"` (true). Quote(bool), /// A slash and the letter "u" followed by a hexadecimal unicode entity @@ -774,6 +776,7 @@ impl NodeKind { | Self::NonBreakingSpace | Self::EnDash | Self::EmDash + | Self::Ellipsis | Self::Quote(_) | Self::Escape(_) | Self::Strong @@ -869,6 +872,7 @@ impl NodeKind { Self::Shy => "soft hyphen", Self::EnDash => "en dash", Self::EmDash => "em dash", + Self::Ellipsis => "ellipsis", Self::Quote(false) => "single quote", Self::Quote(true) => "double quote", Self::Escape(_) => "escape sequence", @@ -992,6 +996,7 @@ impl Hash for NodeKind { Self::Shy => {} Self::EnDash => {} Self::EmDash => {} + Self::Ellipsis => {} Self::Quote(d) => d.hash(state), Self::Escape(c) => c.hash(state), Self::Strong => {} diff --git a/tests/ref/text/shorthands.png b/tests/ref/text/shorthands.png index ad09967d2..c21d49df2 100644 Binary files a/tests/ref/text/shorthands.png and b/tests/ref/text/shorthands.png differ diff --git a/tests/typ/text/shorthands.typ b/tests/typ/text/shorthands.typ index ef0bf8666..2efd1ad1d 100644 --- a/tests/typ/text/shorthands.typ +++ b/tests/typ/text/shorthands.typ @@ -6,3 +6,7 @@ The non-breaking~space does work. --- - En dash: -- - Em dash: --- + +--- +#set text("Roboto") +A... vs {"A..."}