Add lots of new tokens 🍪

2025-07-19 02:22:53 +08:00 · 2021-01-10 17:52:37 +01:00 · 2021-01-10 17:52:37 +01:00 · 9eac62c31a
commit 9eac62c31a
parent 3b2a28ca8e
5 changed files with 766 additions and 539 deletions
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@ -75,7 +75,7 @@ fn node(p: &mut Parser, at_start: bool) -> Option<Node> {
        Token::Underscore => Node::Emph,
        Token::Tilde => Node::Text("\u{00A0}".into()),
        Token::Backslash => Node::Linebreak,
-        Token::Hashtag => {
+        Token::Hash => {
            if at_start {
                return Some(Node::Heading(heading(p)));
            } else {
@ -98,10 +98,10 @@ fn node(p: &mut Parser, at_start: bool) -> Option<Node> {
 fn heading(p: &mut Parser) -> NodeHeading {
    // Count hashtags.
    let mut level = p.span(|p| {
-        p.eat_assert(Token::Hashtag);
+        p.eat_assert(Token::Hash);
        let mut level = 0u8;
-        while p.eat_if(Token::Hashtag) {
+        while p.eat_if(Token::Hash) {
            level = level.saturating_add(1);
        }
        level
@ -240,7 +240,7 @@ fn bracket_body(p: &mut Parser) -> Tree {
 fn expr(p: &mut Parser) -> Option<Expr> {
    binops(p, term, |token| match token {
        Token::Plus => Some(BinOp::Add),
-        Token::Hyphen => Some(BinOp::Sub),
+        Token::Hyph => Some(BinOp::Sub),
        _ => None,
    })
 }
@ -282,7 +282,7 @@ fn binops(
 /// Parse a factor of the form `-?value`.
 fn factor(p: &mut Parser) -> Option<Expr> {
    let op = |token| match token {
-        Token::Hyphen => Some(UnOp::Neg),
+        Token::Hyph => Some(UnOp::Neg),
        _ => None,
    };
--- a/src/parse/resolve.rs
+++ b/src/parse/resolve.rs
@ -17,6 +17,7 @@ pub fn resolve_string(string: &str) -> String {
            Some('\\') => out.push('\\'),
            Some('"') => out.push('"'),
            Some('n') => out.push('\n'),
            Some('r') => out.push('\r'),
            Some('t') => out.push('\t'),
            Some('u') if s.eat_if('{') => {
                // TODO: Feedback if closing brace is missing.
@ -137,7 +138,7 @@ mod tests {
        test(r#"av\u{6797"#,    "av林");
        test(r#"a\\"#,          "a\\");
        test(r#"a\\\nbc"#,      "a\\\nbc");
-        test(r#"a\tbc"#,        "a\tbc");
+        test(r#"a\t\r\nbc"#,    "a\t\r\nbc");
        test(r"🌎",             "🌎");
        test(r"🌎\",            r"🌎\");
        test(r"\🌎",            r"\🌎");
--- a/src/parse/tests.rs
+++ b/src/parse/tests.rs
@ -226,25 +226,31 @@ fn test_parse_simple_nodes() {
 #[test]
 fn test_parse_headings() {
    // Basics with spans.
-    t!("#a"
+    t!("# a"
-        nodes: [S(0..2, Heading(S(0..1, 0), Content![@S(1..2, Text("a"))]))],
+        nodes: [S(0..3, Heading(S(0..1, 0), Content![
            @S(1..2, Space), S(2..3, Text("a"))
        ]))],
        spans: true);
    // Multiple hashtags.
-    t!("###three"   Heading(2, Content![@Text("three")]));
+    t!("### three"   Heading(2, Content![@Space, Text("three")]));
    t!("###### six" Heading(5, Content![@Space, Text("six")]));
    // Start of heading.
    t!("/**/#"    Heading(0, Content![@]));
-    t!("[f][#ok]" Call!("f", Args![Content![Heading(0, Content![@Text("ok")])]]));
+    t!("[f][# ok]" Call!("f", Args![Content![Heading(0, Content![
        @Space, Text("ok")
    ])]]));
    // End of heading.
-    t!("#a\nb" Heading(0, Content![@Text("a")]), Space, Text("b"));
+    t!("# a\nb" Heading(0, Content![@Space, Text("a")]), Space, Text("b"));
    // Continued heading.
-    t!("#a{\n1\n}b"   Heading(0, Content![@Text("a"), Block(Int(1)), Text("b")]));
+    t!("# a{\n1\n}b"   Heading(0, Content![
-    t!("#a[f][\n\n]d" Heading(0, Content![@
+        @Space, Text("a"), Block(Int(1)), Text("b")
-        Text("a"), Call!("f", Args![Content![Parbreak]]), Text("d"),
+    ]));
    t!("# a[f][\n\n]d" Heading(0, Content![@
        Space, Text("a"), Call!("f", Args![Content![Parbreak]]), Text("d"),
    ]));
    // No heading.
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
--- a/src/syntax/token.rs
+++ b/src/syntax/token.rs
@ -1,68 +1,114 @@
-use crate::geom::LengthUnit;
+use crate::geom::{AngularUnit, LengthUnit};
 /// A minimal semantic entity of source code.
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub enum Token<'s> {
-    /// A consecutive non-markup string.
+    /// A left square bracket: `[`.
    Text(&'s str),
    /// One or more whitespace characters.
    ///
    /// The contained `usize` denotes the number of newlines that were contained
    /// in the whitespace.
    Space(usize),
    /// A line comment with inner string contents `//<str>\n`.
    LineComment(&'s str),
    /// A block comment with inner string contents `/*<str>*/`.
    ///
    /// The comment can contain nested block comments.
    BlockComment(&'s str),
    /// A left bracket: `[`.
    LeftBracket,
-    /// A right bracket: `]`.
+    /// A right square bracket: `]`.
    RightBracket,
-    /// A left brace: `{`.
+    /// A left curly brace: `{`.
    LeftBrace,
-    /// A right brace: `}`.
+    /// A right curly brace: `}`.
    RightBrace,
-    /// A left parenthesis: `(`.
+    /// A left round parenthesis: `(`.
    LeftParen,
-    /// A right parenthesis: `)`.
+    /// A right round parenthesis: `)`.
    RightParen,
-
+    /// An asterisk: `*`.
    /// A star: `*`.
    Star,
    /// An underscore: `_`.
    Underscore,
    /// A hashtag: `#`.
    Hash,
    /// A tilde: `~`.
    Tilde,
-    /// A backslash followed by whitespace: `\`.
+    /// A backslash followed by nothing or whitespace: `\`.
    Backslash,
    /// A hashtag indicating a section heading: `#`.
    Hashtag,
    /// A raw block: `` `...` ``.
    Raw(TokenRaw<'s>),
    /// A unicode escape sequence: `\u{1F5FA}`.
    UnicodeEscape(TokenUnicodeEscape<'s>),
    /// A colon: `:`.
    Colon,
    /// A comma: `,`.
    Comma,
    /// A colon: `:`.
    Colon,
    /// A pipe: `|`.
    Pipe,
    /// A plus: `+`.
    Plus,
    /// A hyphen: `-`.
-    Hyphen,
+    Hyph,
    /// A slash: `/`.
    Slash,
-
+    /// A single equals sign: `=`.
    Eq,
    /// Two equals signs: `==`.
    EqEq,
    /// An exclamation mark followed by an equals sign: `!=`.
    BangEq,
    /// A less-than sign: `<`.
    Lt,
    /// A less-than sign followed by an equals sign: `<=`.
    LtEq,
    /// A greater-than sign: `>`.
    Gt,
    /// A greater-than sign followed by an equals sign: `>=`.
    GtEq,
    /// A plus followed by an equals sign: `+=`.
    PlusEq,
    /// A hyphen followed by an equals sign: `-=`.
    HyphEq,
    /// An asterisk followed by an equals sign: `*=`.
    StarEq,
    /// A slash followed by an equals sign: `/=`.
    SlashEq,
    /// A question mark: `?`.
    Question,
    /// Two dots: `..`.
    Dots,
    /// An equals sign followed by a greater-than sign: `=>`.
    Arrow,
    /// The `not` operator.
    Not,
    /// The `and` operator.
    And,
    /// The `or` operator.
    Or,
    /// The `let` / `#let` keyword.
    Let,
    /// The `if` / `#if` keyword.
    If,
    /// The `else` / `#else` keyword.
    Else,
    /// The `for` / `#for` keyword.
    For,
    /// The `in` / `#in` keyword.
    In,
    /// The `while` / `#while` keyword.
    While,
    /// The `break` / `#break` keyword.
    Break,
    /// The `continue` / `#continue` keyword.
    Continue,
    /// The `return` / `#return` keyword.
    Return,
    /// The none literal: `none`.
    None,
    /// One or more whitespace characters.
    ///
    /// The contained `usize` denotes the number of newlines that were contained
    /// in the whitespace.
    Space(usize),
    /// A consecutive non-markup string.
    Text(&'s str),
    /// An arbitrary number of backticks followed by inner contents, terminated
    /// with the same number of backticks: `` `...` ``.
    Raw(TokenRaw<'s>),
    /// One or two dollar signs followed by inner contents, terminated with the
    /// same number of dollar signs.
    Math(TokenMath<'s>),
    /// A slash and the letter "u" followed by a hexadecimal unicode entity
    /// enclosed in curly braces: `\u{1F5FA}`.
    UnicodeEscape(TokenUnicodeEscape<'s>),
    /// An identifier: `center`.
    Ident(&'s str),
    /// A none: `none`.
    None,
    /// A boolean: `true`, `false`.
    Bool(bool),
    /// An integer: `120`.
@ -76,11 +122,20 @@ pub enum Token<'s> {
    /// _Note_: `50%` is stored as `50.0` here, as in the corresponding
    /// [literal](super::Expr::Percent).
    Percent(f64),
    /// An angle: `90deg`.
    Angle(f64, AngularUnit),
    /// A hex value: `#20d82a`.
    Hex(&'s str),
    /// A quoted string: `"..."`.
    Str(TokenStr<'s>),
-
+    /// Two slashes followed by inner contents, terminated with a newline:
    /// `//<str>\n`.
    LineComment(&'s str),
    /// A slash and a star followed by inner contents,  terminated with a star
    /// and a slash: `/*<str>*/`.
    ///
    /// The comment can contain nested block comments.
    BlockComment(&'s str),
    /// Things that are not valid tokens.
    Invalid(&'s str),
 }
@ -98,15 +153,6 @@ pub struct TokenStr<'s> {
    pub terminated: bool,
 }
 /// A unicode escape sequence: `\u{1F5FA}`.
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct TokenUnicodeEscape<'s> {
    /// The escape sequence between two braces.
    pub sequence: &'s str,
    /// Whether the closing brace was present.
    pub terminated: bool,
 }
 /// A raw block: `` `...` ``.
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct TokenRaw<'s> {
@ -118,48 +164,91 @@ pub struct TokenRaw<'s> {
    pub terminated: bool,
 }
 /// A math formula: `$2pi + x$`, `$$f'(x) = x^2$$`.
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct TokenMath<'s> {
    /// The formula between the dollars.
    pub formula: &'s str,
    /// Whether the formula was surrounded by one dollar (true) or two dollars
    /// (false).
    pub inline: bool,
    /// Whether the closing dollars were present.
    pub terminated: bool,
 }
 /// A unicode escape sequence: `\u{1F5FA}`.
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct TokenUnicodeEscape<'s> {
    /// The escape sequence between the braces.
    pub sequence: &'s str,
    /// Whether the closing brace was present.
    pub terminated: bool,
 }
 impl<'s> Token<'s> {
-    /// The natural-language name of this token for use in error messages.
+    /// The English name of this token for use in error messages.
    pub fn name(self) -> &'static str {
        match self {
            Self::Text(_) => "text",
            Self::Space(_) => "space",
            Self::LineComment(_) => "line comment",
            Self::BlockComment(_) => "block comment",
            Self::LeftBracket => "opening bracket",
            Self::RightBracket => "closing bracket",
            Self::LeftBrace => "opening brace",
            Self::RightBrace => "closing brace",
            Self::LeftParen => "opening paren",
            Self::RightParen => "closing paren",
            Self::Star => "star",
            Self::Underscore => "underscore",
            Self::Hash => "hashtag",
            Self::Tilde => "tilde",
            Self::Backslash => "backslash",
            Self::Hashtag => "hashtag",
            Self::Tilde => "tidle",
            Self::Raw { .. } => "raw block",
            Self::UnicodeEscape { .. } => "unicode escape sequence",
            Self::Colon => "colon",
            Self::Comma => "comma",
            Self::Colon => "colon",
            Self::Pipe => "pipe",
-            Self::Plus => "plus sign",
+            Self::Plus => "plus",
-            Self::Hyphen => "minus sign",
+            Self::Hyph => "minus",
            Self::Slash => "slash",
-
+            Self::Eq => "assignment operator",
            Self::EqEq => "equality operator",
            Self::BangEq => "inequality operator",
            Self::Lt => "less than operator",
            Self::LtEq => "less than or equal operator",
            Self::Gt => "greater than operator",
            Self::GtEq => "greater than or equal operator",
            Self::PlusEq => "add-assign operator",
            Self::HyphEq => "subtract-assign operator",
            Self::StarEq => "multiply-assign operator",
            Self::SlashEq => "divide-assign operator",
            Self::Question => "question mark",
            Self::Dots => "dots",
            Self::Arrow => "arrow",
            Self::Not => "not operator",
            Self::And => "and operator",
            Self::Or => "or operator",
            Self::Let => "let keyword",
            Self::If => "if keyword",
            Self::Else => "else keyword",
            Self::For => "for keyword",
            Self::In => "in keyword",
            Self::While => "while keyword",
            Self::Break => "break keyword",
            Self::Continue => "continue keyword",
            Self::Return => "return keyword",
            Self::None => "none",
            Self::Space(_) => "space",
            Self::Text(_) => "text",
            Self::Raw(_) => "raw block",
            Self::Math(_) => "math formula",
            Self::UnicodeEscape(_) => "unicode escape sequence",
            Self::Ident(_) => "identifier",
-            Self::Bool(_) => "bool",
+            Self::Bool(_) => "boolean",
            Self::Int(_) => "integer",
            Self::Float(_) => "float",
            Self::Length(..) => "length",
            Self::Angle(..) => "angle",
            Self::Percent(_) => "percentage",
            Self::Hex(_) => "hex value",
-            Self::Str { .. } => "string",
+            Self::Str(_) => "string",
-
+            Self::LineComment(_) => "line comment",
            Self::BlockComment(_) => "block comment",
            Self::Invalid("*/") => "end of block comment",
            Self::Invalid(_) => "invalid token",
        }