Add lots of new tokens 🍪

This commit is contained in:
Laurenz 2021-01-10 17:52:37 +01:00
parent 3b2a28ca8e
commit 9eac62c31a
5 changed files with 766 additions and 539 deletions

View File

@ -75,7 +75,7 @@ fn node(p: &mut Parser, at_start: bool) -> Option<Node> {
Token::Underscore => Node::Emph, Token::Underscore => Node::Emph,
Token::Tilde => Node::Text("\u{00A0}".into()), Token::Tilde => Node::Text("\u{00A0}".into()),
Token::Backslash => Node::Linebreak, Token::Backslash => Node::Linebreak,
Token::Hashtag => { Token::Hash => {
if at_start { if at_start {
return Some(Node::Heading(heading(p))); return Some(Node::Heading(heading(p)));
} else { } else {
@ -98,10 +98,10 @@ fn node(p: &mut Parser, at_start: bool) -> Option<Node> {
fn heading(p: &mut Parser) -> NodeHeading { fn heading(p: &mut Parser) -> NodeHeading {
// Count hashtags. // Count hashtags.
let mut level = p.span(|p| { let mut level = p.span(|p| {
p.eat_assert(Token::Hashtag); p.eat_assert(Token::Hash);
let mut level = 0u8; let mut level = 0u8;
while p.eat_if(Token::Hashtag) { while p.eat_if(Token::Hash) {
level = level.saturating_add(1); level = level.saturating_add(1);
} }
level level
@ -240,7 +240,7 @@ fn bracket_body(p: &mut Parser) -> Tree {
fn expr(p: &mut Parser) -> Option<Expr> { fn expr(p: &mut Parser) -> Option<Expr> {
binops(p, term, |token| match token { binops(p, term, |token| match token {
Token::Plus => Some(BinOp::Add), Token::Plus => Some(BinOp::Add),
Token::Hyphen => Some(BinOp::Sub), Token::Hyph => Some(BinOp::Sub),
_ => None, _ => None,
}) })
} }
@ -282,7 +282,7 @@ fn binops(
/// Parse a factor of the form `-?value`. /// Parse a factor of the form `-?value`.
fn factor(p: &mut Parser) -> Option<Expr> { fn factor(p: &mut Parser) -> Option<Expr> {
let op = |token| match token { let op = |token| match token {
Token::Hyphen => Some(UnOp::Neg), Token::Hyph => Some(UnOp::Neg),
_ => None, _ => None,
}; };

View File

@ -17,6 +17,7 @@ pub fn resolve_string(string: &str) -> String {
Some('\\') => out.push('\\'), Some('\\') => out.push('\\'),
Some('"') => out.push('"'), Some('"') => out.push('"'),
Some('n') => out.push('\n'), Some('n') => out.push('\n'),
Some('r') => out.push('\r'),
Some('t') => out.push('\t'), Some('t') => out.push('\t'),
Some('u') if s.eat_if('{') => { Some('u') if s.eat_if('{') => {
// TODO: Feedback if closing brace is missing. // TODO: Feedback if closing brace is missing.
@ -137,7 +138,7 @@ mod tests {
test(r#"av\u{6797"#, "av林"); test(r#"av\u{6797"#, "av林");
test(r#"a\\"#, "a\\"); test(r#"a\\"#, "a\\");
test(r#"a\\\nbc"#, "a\\\nbc"); test(r#"a\\\nbc"#, "a\\\nbc");
test(r#"a\tbc"#, "a\tbc"); test(r#"a\t\r\nbc"#, "a\t\r\nbc");
test(r"🌎", "🌎"); test(r"🌎", "🌎");
test(r"🌎\", r"🌎\"); test(r"🌎\", r"🌎\");
test(r"\🌎", r"\🌎"); test(r"\🌎", r"\🌎");

View File

@ -227,24 +227,30 @@ fn test_parse_simple_nodes() {
fn test_parse_headings() { fn test_parse_headings() {
// Basics with spans. // Basics with spans.
t!("# a" t!("# a"
nodes: [S(0..2, Heading(S(0..1, 0), Content![@S(1..2, Text("a"))]))], nodes: [S(0..3, Heading(S(0..1, 0), Content![
@S(1..2, Space), S(2..3, Text("a"))
]))],
spans: true); spans: true);
// Multiple hashtags. // Multiple hashtags.
t!("###three" Heading(2, Content![@Text("three")])); t!("### three" Heading(2, Content![@Space, Text("three")]));
t!("###### six" Heading(5, Content![@Space, Text("six")])); t!("###### six" Heading(5, Content![@Space, Text("six")]));
// Start of heading. // Start of heading.
t!("/**/#" Heading(0, Content![@])); t!("/**/#" Heading(0, Content![@]));
t!("[f][#ok]" Call!("f", Args![Content![Heading(0, Content![@Text("ok")])]])); t!("[f][# ok]" Call!("f", Args![Content![Heading(0, Content![
@Space, Text("ok")
])]]));
// End of heading. // End of heading.
t!("#a\nb" Heading(0, Content![@Text("a")]), Space, Text("b")); t!("# a\nb" Heading(0, Content![@Space, Text("a")]), Space, Text("b"));
// Continued heading. // Continued heading.
t!("#a{\n1\n}b" Heading(0, Content![@Text("a"), Block(Int(1)), Text("b")])); t!("# a{\n1\n}b" Heading(0, Content![
@Space, Text("a"), Block(Int(1)), Text("b")
]));
t!("# a[f][\n\n]d" Heading(0, Content![@ t!("# a[f][\n\n]d" Heading(0, Content![@
Text("a"), Call!("f", Args![Content![Parbreak]]), Text("d"), Space, Text("a"), Call!("f", Args![Content![Parbreak]]), Text("d"),
])); ]));
// No heading. // No heading.

File diff suppressed because it is too large Load Diff

View File

@ -1,68 +1,114 @@
use crate::geom::LengthUnit; use crate::geom::{AngularUnit, LengthUnit};
/// A minimal semantic entity of source code. /// A minimal semantic entity of source code.
#[derive(Debug, Copy, Clone, PartialEq)] #[derive(Debug, Copy, Clone, PartialEq)]
pub enum Token<'s> { pub enum Token<'s> {
/// A consecutive non-markup string. /// A left square bracket: `[`.
Text(&'s str),
/// One or more whitespace characters.
///
/// The contained `usize` denotes the number of newlines that were contained
/// in the whitespace.
Space(usize),
/// A line comment with inner string contents `//<str>\n`.
LineComment(&'s str),
/// A block comment with inner string contents `/*<str>*/`.
///
/// The comment can contain nested block comments.
BlockComment(&'s str),
/// A left bracket: `[`.
LeftBracket, LeftBracket,
/// A right bracket: `]`. /// A right square bracket: `]`.
RightBracket, RightBracket,
/// A left brace: `{`. /// A left curly brace: `{`.
LeftBrace, LeftBrace,
/// A right brace: `}`. /// A right curly brace: `}`.
RightBrace, RightBrace,
/// A left parenthesis: `(`. /// A left round parenthesis: `(`.
LeftParen, LeftParen,
/// A right parenthesis: `)`. /// A right round parenthesis: `)`.
RightParen, RightParen,
/// An asterisk: `*`.
/// A star: `*`.
Star, Star,
/// An underscore: `_`. /// An underscore: `_`.
Underscore, Underscore,
/// A hashtag: `#`.
Hash,
/// A tilde: `~`. /// A tilde: `~`.
Tilde, Tilde,
/// A backslash followed by whitespace: `\`. /// A backslash followed by nothing or whitespace: `\`.
Backslash, Backslash,
/// A hashtag indicating a section heading: `#`.
Hashtag,
/// A raw block: `` `...` ``.
Raw(TokenRaw<'s>),
/// A unicode escape sequence: `\u{1F5FA}`.
UnicodeEscape(TokenUnicodeEscape<'s>),
/// A colon: `:`.
Colon,
/// A comma: `,`. /// A comma: `,`.
Comma, Comma,
/// A colon: `:`.
Colon,
/// A pipe: `|`. /// A pipe: `|`.
Pipe, Pipe,
/// A plus: `+`. /// A plus: `+`.
Plus, Plus,
/// A hyphen: `-`. /// A hyphen: `-`.
Hyphen, Hyph,
/// A slash: `/`. /// A slash: `/`.
Slash, Slash,
/// A single equals sign: `=`.
Eq,
/// Two equals signs: `==`.
EqEq,
/// An exclamation mark followed by an equals sign: `!=`.
BangEq,
/// A less-than sign: `<`.
Lt,
/// A less-than sign followed by an equals sign: `<=`.
LtEq,
/// A greater-than sign: `>`.
Gt,
/// A greater-than sign followed by an equals sign: `>=`.
GtEq,
/// A plus followed by an equals sign: `+=`.
PlusEq,
/// A hyphen followed by an equals sign: `-=`.
HyphEq,
/// An asterisk followed by an equals sign: `*=`.
StarEq,
/// A slash followed by an equals sign: `/=`.
SlashEq,
/// A question mark: `?`.
Question,
/// Two dots: `..`.
Dots,
/// An equals sign followed by a greater-than sign: `=>`.
Arrow,
/// The `not` operator.
Not,
/// The `and` operator.
And,
/// The `or` operator.
Or,
/// The `let` / `#let` keyword.
Let,
/// The `if` / `#if` keyword.
If,
/// The `else` / `#else` keyword.
Else,
/// The `for` / `#for` keyword.
For,
/// The `in` / `#in` keyword.
In,
/// The `while` / `#while` keyword.
While,
/// The `break` / `#break` keyword.
Break,
/// The `continue` / `#continue` keyword.
Continue,
/// The `return` / `#return` keyword.
Return,
/// The none literal: `none`.
None,
/// One or more whitespace characters.
///
/// The contained `usize` denotes the number of newlines that were contained
/// in the whitespace.
Space(usize),
/// A consecutive non-markup string.
Text(&'s str),
/// An arbitrary number of backticks followed by inner contents, terminated
/// with the same number of backticks: `` `...` ``.
Raw(TokenRaw<'s>),
/// One or two dollar signs followed by inner contents, terminated with the
/// same number of dollar signs.
Math(TokenMath<'s>),
/// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`.
UnicodeEscape(TokenUnicodeEscape<'s>),
/// An identifier: `center`. /// An identifier: `center`.
Ident(&'s str), Ident(&'s str),
/// A none: `none`.
None,
/// A boolean: `true`, `false`. /// A boolean: `true`, `false`.
Bool(bool), Bool(bool),
/// An integer: `120`. /// An integer: `120`.
@ -76,11 +122,20 @@ pub enum Token<'s> {
/// _Note_: `50%` is stored as `50.0` here, as in the corresponding /// _Note_: `50%` is stored as `50.0` here, as in the corresponding
/// [literal](super::Expr::Percent). /// [literal](super::Expr::Percent).
Percent(f64), Percent(f64),
/// An angle: `90deg`.
Angle(f64, AngularUnit),
/// A hex value: `#20d82a`. /// A hex value: `#20d82a`.
Hex(&'s str), Hex(&'s str),
/// A quoted string: `"..."`. /// A quoted string: `"..."`.
Str(TokenStr<'s>), Str(TokenStr<'s>),
/// Two slashes followed by inner contents, terminated with a newline:
/// `//<str>\n`.
LineComment(&'s str),
/// A slash and a star followed by inner contents, terminated with a star
/// and a slash: `/*<str>*/`.
///
/// The comment can contain nested block comments.
BlockComment(&'s str),
/// Things that are not valid tokens. /// Things that are not valid tokens.
Invalid(&'s str), Invalid(&'s str),
} }
@ -98,15 +153,6 @@ pub struct TokenStr<'s> {
pub terminated: bool, pub terminated: bool,
} }
/// A unicode escape sequence: `\u{1F5FA}`.
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct TokenUnicodeEscape<'s> {
/// The escape sequence between two braces.
pub sequence: &'s str,
/// Whether the closing brace was present.
pub terminated: bool,
}
/// A raw block: `` `...` ``. /// A raw block: `` `...` ``.
#[derive(Debug, Copy, Clone, PartialEq)] #[derive(Debug, Copy, Clone, PartialEq)]
pub struct TokenRaw<'s> { pub struct TokenRaw<'s> {
@ -118,48 +164,91 @@ pub struct TokenRaw<'s> {
pub terminated: bool, pub terminated: bool,
} }
/// A math formula: `$2pi + x$`, `$$f'(x) = x^2$$`.
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct TokenMath<'s> {
/// The formula between the dollars.
pub formula: &'s str,
/// Whether the formula was surrounded by one dollar (true) or two dollars
/// (false).
pub inline: bool,
/// Whether the closing dollars were present.
pub terminated: bool,
}
/// A unicode escape sequence: `\u{1F5FA}`.
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct TokenUnicodeEscape<'s> {
/// The escape sequence between the braces.
pub sequence: &'s str,
/// Whether the closing brace was present.
pub terminated: bool,
}
impl<'s> Token<'s> { impl<'s> Token<'s> {
/// The natural-language name of this token for use in error messages. /// The English name of this token for use in error messages.
pub fn name(self) -> &'static str { pub fn name(self) -> &'static str {
match self { match self {
Self::Text(_) => "text",
Self::Space(_) => "space",
Self::LineComment(_) => "line comment",
Self::BlockComment(_) => "block comment",
Self::LeftBracket => "opening bracket", Self::LeftBracket => "opening bracket",
Self::RightBracket => "closing bracket", Self::RightBracket => "closing bracket",
Self::LeftBrace => "opening brace", Self::LeftBrace => "opening brace",
Self::RightBrace => "closing brace", Self::RightBrace => "closing brace",
Self::LeftParen => "opening paren", Self::LeftParen => "opening paren",
Self::RightParen => "closing paren", Self::RightParen => "closing paren",
Self::Star => "star", Self::Star => "star",
Self::Underscore => "underscore", Self::Underscore => "underscore",
Self::Hash => "hashtag",
Self::Tilde => "tilde",
Self::Backslash => "backslash", Self::Backslash => "backslash",
Self::Hashtag => "hashtag",
Self::Tilde => "tidle",
Self::Raw { .. } => "raw block",
Self::UnicodeEscape { .. } => "unicode escape sequence",
Self::Colon => "colon",
Self::Comma => "comma", Self::Comma => "comma",
Self::Colon => "colon",
Self::Pipe => "pipe", Self::Pipe => "pipe",
Self::Plus => "plus sign", Self::Plus => "plus",
Self::Hyphen => "minus sign", Self::Hyph => "minus",
Self::Slash => "slash", Self::Slash => "slash",
Self::Eq => "assignment operator",
Self::EqEq => "equality operator",
Self::BangEq => "inequality operator",
Self::Lt => "less than operator",
Self::LtEq => "less than or equal operator",
Self::Gt => "greater than operator",
Self::GtEq => "greater than or equal operator",
Self::PlusEq => "add-assign operator",
Self::HyphEq => "subtract-assign operator",
Self::StarEq => "multiply-assign operator",
Self::SlashEq => "divide-assign operator",
Self::Question => "question mark",
Self::Dots => "dots",
Self::Arrow => "arrow",
Self::Not => "not operator",
Self::And => "and operator",
Self::Or => "or operator",
Self::Let => "let keyword",
Self::If => "if keyword",
Self::Else => "else keyword",
Self::For => "for keyword",
Self::In => "in keyword",
Self::While => "while keyword",
Self::Break => "break keyword",
Self::Continue => "continue keyword",
Self::Return => "return keyword",
Self::None => "none", Self::None => "none",
Self::Space(_) => "space",
Self::Text(_) => "text",
Self::Raw(_) => "raw block",
Self::Math(_) => "math formula",
Self::UnicodeEscape(_) => "unicode escape sequence",
Self::Ident(_) => "identifier", Self::Ident(_) => "identifier",
Self::Bool(_) => "bool", Self::Bool(_) => "boolean",
Self::Int(_) => "integer", Self::Int(_) => "integer",
Self::Float(_) => "float", Self::Float(_) => "float",
Self::Length(..) => "length", Self::Length(..) => "length",
Self::Angle(..) => "angle",
Self::Percent(_) => "percentage", Self::Percent(_) => "percentage",
Self::Hex(_) => "hex value", Self::Hex(_) => "hex value",
Self::Str { .. } => "string", Self::Str(_) => "string",
Self::LineComment(_) => "line comment",
Self::BlockComment(_) => "block comment",
Self::Invalid("*/") => "end of block comment", Self::Invalid("*/") => "end of block comment",
Self::Invalid(_) => "invalid token", Self::Invalid(_) => "invalid token",
} }