This commit is contained in:
Laurenz 2022-04-16 22:42:49 +02:00
parent c5b3f8ee98
commit 4494b443bb
7 changed files with 23 additions and 5 deletions

View File

@ -214,6 +214,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
| NodeKind::Shy | NodeKind::Shy
| NodeKind::EnDash | NodeKind::EnDash
| NodeKind::EmDash | NodeKind::EmDash
| NodeKind::Ellipsis
| NodeKind::Quote(_) | NodeKind::Quote(_)
| NodeKind::Linebreak(_) | NodeKind::Linebreak(_)
| NodeKind::Raw(_) | NodeKind::Raw(_)

View File

@ -140,6 +140,7 @@ impl<'s> Tokens<'s> {
// Markup. // Markup.
'~' => NodeKind::NonBreakingSpace, '~' => NodeKind::NonBreakingSpace,
'-' => self.hyph(), '-' => self.hyph(),
'.' if self.s.eat_if("..") => NodeKind::Ellipsis,
'\'' => NodeKind::Quote(false), '\'' => NodeKind::Quote(false),
'"' => NodeKind::Quote(true), '"' => NodeKind::Quote(true),
'*' if !self.in_word() => NodeKind::Star, '*' if !self.in_word() => NodeKind::Star,
@ -216,7 +217,7 @@ impl<'s> Tokens<'s> {
// Comments, parentheses, code. // Comments, parentheses, code.
'/' | '[' | ']' | '{' | '}' | '#' | '/' | '[' | ']' | '{' | '}' | '#' |
// Markup // Markup
'~' | '\'' | '"' | '*' | '_' | '`' | '$' | '-' | '\\' '~' | '-' | '.' | '\'' | '"' | '*' | '_' | '`' | '$' | '\\'
}; };
loop { loop {
@ -224,12 +225,17 @@ impl<'s> Tokens<'s> {
TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace()) TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
}); });
// Allow a single space, optionally preceded by . or - if something
// alphanumeric follows directly. This leads to less text nodes,
// which is good for performance.
let mut s = self.s; let mut s = self.s;
if !(s.eat_if(' ') && s.at(char::is_alphanumeric)) { s.eat_if(['.', '-']);
s.eat_if(' ');
if !s.at(char::is_alphanumeric) {
break; break;
} }
self.s.eat(); self.s = s;
} }
NodeKind::Text(self.s.from(start).into()) NodeKind::Text(self.s.from(start).into())
@ -831,7 +837,7 @@ mod tests {
fn test_tokenize_text() { fn test_tokenize_text() {
// Test basic text. // Test basic text.
t!(Markup[" /"]: "hello" => Text("hello")); t!(Markup[" /"]: "hello" => Text("hello"));
t!(Markup[" /"]: "hello-world" => Text("hello"), Minus, Text("world")); t!(Markup[" /"]: "hello-world" => Text("hello-world"));
// Test code symbols in text. // Test code symbols in text.
t!(Markup[" /"]: "a():\"b" => Text("a():"), Quote(true), Text("b")); t!(Markup[" /"]: "a():\"b" => Text("a():"), Quote(true), Text("b"));
@ -897,7 +903,7 @@ mod tests {
t!(Markup[" "]: "." => EnumNumbering(None)); t!(Markup[" "]: "." => EnumNumbering(None));
t!(Markup[" "]: "1." => EnumNumbering(Some(1))); t!(Markup[" "]: "1." => EnumNumbering(Some(1)));
t!(Markup[" "]: "1.a" => EnumNumbering(Some(1)), Text("a")); t!(Markup[" "]: "1.a" => EnumNumbering(Some(1)), Text("a"));
t!(Markup[" /"]: "a1." => Text("a1.")); t!(Markup[" /"]: "a1." => Text("a1"), EnumNumbering(None));
} }
#[test] #[test]

View File

@ -69,6 +69,7 @@ impl Markup {
NodeKind::Shy => Some(MarkupNode::Text('\u{00AD}'.into())), NodeKind::Shy => Some(MarkupNode::Text('\u{00AD}'.into())),
NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())), NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())),
NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())), NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())),
NodeKind::Ellipsis => Some(MarkupNode::Text('\u{2026}'.into())),
NodeKind::Quote(d) => Some(MarkupNode::Quote(*d)), NodeKind::Quote(d) => Some(MarkupNode::Quote(*d)),
NodeKind::Strong => node.cast().map(MarkupNode::Strong), NodeKind::Strong => node.cast().map(MarkupNode::Strong),
NodeKind::Emph => node.cast().map(MarkupNode::Emph), NodeKind::Emph => node.cast().map(MarkupNode::Emph),

View File

@ -131,6 +131,7 @@ impl Category {
NodeKind::Shy => Some(Category::Shortcut), NodeKind::Shy => Some(Category::Shortcut),
NodeKind::EnDash => Some(Category::Shortcut), NodeKind::EnDash => Some(Category::Shortcut),
NodeKind::EmDash => Some(Category::Shortcut), NodeKind::EmDash => Some(Category::Shortcut),
NodeKind::Ellipsis => Some(Category::Shortcut),
NodeKind::Escape(_) => Some(Category::Escape), NodeKind::Escape(_) => Some(Category::Escape),
NodeKind::Not => Some(Category::Keyword), NodeKind::Not => Some(Category::Keyword),
NodeKind::And => Some(Category::Keyword), NodeKind::And => Some(Category::Keyword),

View File

@ -599,6 +599,8 @@ pub enum NodeKind {
EnDash, EnDash,
/// An em-dash: `---`. /// An em-dash: `---`.
EmDash, EmDash,
/// An ellipsis: `...`.
Ellipsis,
/// A smart quote: `'` (`false`) or `"` (true). /// A smart quote: `'` (`false`) or `"` (true).
Quote(bool), Quote(bool),
/// A slash and the letter "u" followed by a hexadecimal unicode entity /// A slash and the letter "u" followed by a hexadecimal unicode entity
@ -774,6 +776,7 @@ impl NodeKind {
| Self::NonBreakingSpace | Self::NonBreakingSpace
| Self::EnDash | Self::EnDash
| Self::EmDash | Self::EmDash
| Self::Ellipsis
| Self::Quote(_) | Self::Quote(_)
| Self::Escape(_) | Self::Escape(_)
| Self::Strong | Self::Strong
@ -869,6 +872,7 @@ impl NodeKind {
Self::Shy => "soft hyphen", Self::Shy => "soft hyphen",
Self::EnDash => "en dash", Self::EnDash => "en dash",
Self::EmDash => "em dash", Self::EmDash => "em dash",
Self::Ellipsis => "ellipsis",
Self::Quote(false) => "single quote", Self::Quote(false) => "single quote",
Self::Quote(true) => "double quote", Self::Quote(true) => "double quote",
Self::Escape(_) => "escape sequence", Self::Escape(_) => "escape sequence",
@ -992,6 +996,7 @@ impl Hash for NodeKind {
Self::Shy => {} Self::Shy => {}
Self::EnDash => {} Self::EnDash => {}
Self::EmDash => {} Self::EmDash => {}
Self::Ellipsis => {}
Self::Quote(d) => d.hash(state), Self::Quote(d) => d.hash(state),
Self::Escape(c) => c.hash(state), Self::Escape(c) => c.hash(state),
Self::Strong => {} Self::Strong => {}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 4.3 KiB

View File

@ -6,3 +6,7 @@ The non-breaking~space does work.
--- ---
- En dash: -- - En dash: --
- Em dash: --- - Em dash: ---
---
#set text("Roboto")
A... vs {"A..."}