mirror of
https://github.com/typst/typst
synced 2025-05-14 17:15:28 +08:00
Add lots of new tokens 🍪
This commit is contained in:
parent
3b2a28ca8e
commit
9eac62c31a
@ -75,7 +75,7 @@ fn node(p: &mut Parser, at_start: bool) -> Option<Node> {
|
|||||||
Token::Underscore => Node::Emph,
|
Token::Underscore => Node::Emph,
|
||||||
Token::Tilde => Node::Text("\u{00A0}".into()),
|
Token::Tilde => Node::Text("\u{00A0}".into()),
|
||||||
Token::Backslash => Node::Linebreak,
|
Token::Backslash => Node::Linebreak,
|
||||||
Token::Hashtag => {
|
Token::Hash => {
|
||||||
if at_start {
|
if at_start {
|
||||||
return Some(Node::Heading(heading(p)));
|
return Some(Node::Heading(heading(p)));
|
||||||
} else {
|
} else {
|
||||||
@ -98,10 +98,10 @@ fn node(p: &mut Parser, at_start: bool) -> Option<Node> {
|
|||||||
fn heading(p: &mut Parser) -> NodeHeading {
|
fn heading(p: &mut Parser) -> NodeHeading {
|
||||||
// Count hashtags.
|
// Count hashtags.
|
||||||
let mut level = p.span(|p| {
|
let mut level = p.span(|p| {
|
||||||
p.eat_assert(Token::Hashtag);
|
p.eat_assert(Token::Hash);
|
||||||
|
|
||||||
let mut level = 0u8;
|
let mut level = 0u8;
|
||||||
while p.eat_if(Token::Hashtag) {
|
while p.eat_if(Token::Hash) {
|
||||||
level = level.saturating_add(1);
|
level = level.saturating_add(1);
|
||||||
}
|
}
|
||||||
level
|
level
|
||||||
@ -240,7 +240,7 @@ fn bracket_body(p: &mut Parser) -> Tree {
|
|||||||
fn expr(p: &mut Parser) -> Option<Expr> {
|
fn expr(p: &mut Parser) -> Option<Expr> {
|
||||||
binops(p, term, |token| match token {
|
binops(p, term, |token| match token {
|
||||||
Token::Plus => Some(BinOp::Add),
|
Token::Plus => Some(BinOp::Add),
|
||||||
Token::Hyphen => Some(BinOp::Sub),
|
Token::Hyph => Some(BinOp::Sub),
|
||||||
_ => None,
|
_ => None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -282,7 +282,7 @@ fn binops(
|
|||||||
/// Parse a factor of the form `-?value`.
|
/// Parse a factor of the form `-?value`.
|
||||||
fn factor(p: &mut Parser) -> Option<Expr> {
|
fn factor(p: &mut Parser) -> Option<Expr> {
|
||||||
let op = |token| match token {
|
let op = |token| match token {
|
||||||
Token::Hyphen => Some(UnOp::Neg),
|
Token::Hyph => Some(UnOp::Neg),
|
||||||
_ => None,
|
_ => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@ pub fn resolve_string(string: &str) -> String {
|
|||||||
Some('\\') => out.push('\\'),
|
Some('\\') => out.push('\\'),
|
||||||
Some('"') => out.push('"'),
|
Some('"') => out.push('"'),
|
||||||
Some('n') => out.push('\n'),
|
Some('n') => out.push('\n'),
|
||||||
|
Some('r') => out.push('\r'),
|
||||||
Some('t') => out.push('\t'),
|
Some('t') => out.push('\t'),
|
||||||
Some('u') if s.eat_if('{') => {
|
Some('u') if s.eat_if('{') => {
|
||||||
// TODO: Feedback if closing brace is missing.
|
// TODO: Feedback if closing brace is missing.
|
||||||
@ -137,7 +138,7 @@ mod tests {
|
|||||||
test(r#"av\u{6797"#, "av林");
|
test(r#"av\u{6797"#, "av林");
|
||||||
test(r#"a\\"#, "a\\");
|
test(r#"a\\"#, "a\\");
|
||||||
test(r#"a\\\nbc"#, "a\\\nbc");
|
test(r#"a\\\nbc"#, "a\\\nbc");
|
||||||
test(r#"a\tbc"#, "a\tbc");
|
test(r#"a\t\r\nbc"#, "a\t\r\nbc");
|
||||||
test(r"🌎", "🌎");
|
test(r"🌎", "🌎");
|
||||||
test(r"🌎\", r"🌎\");
|
test(r"🌎\", r"🌎\");
|
||||||
test(r"\🌎", r"\🌎");
|
test(r"\🌎", r"\🌎");
|
||||||
|
@ -226,25 +226,31 @@ fn test_parse_simple_nodes() {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_parse_headings() {
|
fn test_parse_headings() {
|
||||||
// Basics with spans.
|
// Basics with spans.
|
||||||
t!("#a"
|
t!("# a"
|
||||||
nodes: [S(0..2, Heading(S(0..1, 0), Content![@S(1..2, Text("a"))]))],
|
nodes: [S(0..3, Heading(S(0..1, 0), Content![
|
||||||
|
@S(1..2, Space), S(2..3, Text("a"))
|
||||||
|
]))],
|
||||||
spans: true);
|
spans: true);
|
||||||
|
|
||||||
// Multiple hashtags.
|
// Multiple hashtags.
|
||||||
t!("###three" Heading(2, Content![@Text("three")]));
|
t!("### three" Heading(2, Content![@Space, Text("three")]));
|
||||||
t!("###### six" Heading(5, Content![@Space, Text("six")]));
|
t!("###### six" Heading(5, Content![@Space, Text("six")]));
|
||||||
|
|
||||||
// Start of heading.
|
// Start of heading.
|
||||||
t!("/**/#" Heading(0, Content![@]));
|
t!("/**/#" Heading(0, Content![@]));
|
||||||
t!("[f][#ok]" Call!("f", Args![Content![Heading(0, Content![@Text("ok")])]]));
|
t!("[f][# ok]" Call!("f", Args![Content![Heading(0, Content![
|
||||||
|
@Space, Text("ok")
|
||||||
|
])]]));
|
||||||
|
|
||||||
// End of heading.
|
// End of heading.
|
||||||
t!("#a\nb" Heading(0, Content![@Text("a")]), Space, Text("b"));
|
t!("# a\nb" Heading(0, Content![@Space, Text("a")]), Space, Text("b"));
|
||||||
|
|
||||||
// Continued heading.
|
// Continued heading.
|
||||||
t!("#a{\n1\n}b" Heading(0, Content![@Text("a"), Block(Int(1)), Text("b")]));
|
t!("# a{\n1\n}b" Heading(0, Content![
|
||||||
t!("#a[f][\n\n]d" Heading(0, Content![@
|
@Space, Text("a"), Block(Int(1)), Text("b")
|
||||||
Text("a"), Call!("f", Args![Content![Parbreak]]), Text("d"),
|
]));
|
||||||
|
t!("# a[f][\n\n]d" Heading(0, Content![@
|
||||||
|
Space, Text("a"), Call!("f", Args![Content![Parbreak]]), Text("d"),
|
||||||
]));
|
]));
|
||||||
|
|
||||||
// No heading.
|
// No heading.
|
||||||
|
1033
src/parse/tokens.rs
1033
src/parse/tokens.rs
File diff suppressed because it is too large
Load Diff
@ -1,68 +1,114 @@
|
|||||||
use crate::geom::LengthUnit;
|
use crate::geom::{AngularUnit, LengthUnit};
|
||||||
|
|
||||||
/// A minimal semantic entity of source code.
|
/// A minimal semantic entity of source code.
|
||||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||||
pub enum Token<'s> {
|
pub enum Token<'s> {
|
||||||
/// A consecutive non-markup string.
|
/// A left square bracket: `[`.
|
||||||
Text(&'s str),
|
|
||||||
/// One or more whitespace characters.
|
|
||||||
///
|
|
||||||
/// The contained `usize` denotes the number of newlines that were contained
|
|
||||||
/// in the whitespace.
|
|
||||||
Space(usize),
|
|
||||||
|
|
||||||
/// A line comment with inner string contents `//<str>\n`.
|
|
||||||
LineComment(&'s str),
|
|
||||||
/// A block comment with inner string contents `/*<str>*/`.
|
|
||||||
///
|
|
||||||
/// The comment can contain nested block comments.
|
|
||||||
BlockComment(&'s str),
|
|
||||||
|
|
||||||
/// A left bracket: `[`.
|
|
||||||
LeftBracket,
|
LeftBracket,
|
||||||
/// A right bracket: `]`.
|
/// A right square bracket: `]`.
|
||||||
RightBracket,
|
RightBracket,
|
||||||
/// A left brace: `{`.
|
/// A left curly brace: `{`.
|
||||||
LeftBrace,
|
LeftBrace,
|
||||||
/// A right brace: `}`.
|
/// A right curly brace: `}`.
|
||||||
RightBrace,
|
RightBrace,
|
||||||
/// A left parenthesis: `(`.
|
/// A left round parenthesis: `(`.
|
||||||
LeftParen,
|
LeftParen,
|
||||||
/// A right parenthesis: `)`.
|
/// A right round parenthesis: `)`.
|
||||||
RightParen,
|
RightParen,
|
||||||
|
/// An asterisk: `*`.
|
||||||
/// A star: `*`.
|
|
||||||
Star,
|
Star,
|
||||||
/// An underscore: `_`.
|
/// An underscore: `_`.
|
||||||
Underscore,
|
Underscore,
|
||||||
|
/// A hashtag: `#`.
|
||||||
|
Hash,
|
||||||
/// A tilde: `~`.
|
/// A tilde: `~`.
|
||||||
Tilde,
|
Tilde,
|
||||||
/// A backslash followed by whitespace: `\`.
|
/// A backslash followed by nothing or whitespace: `\`.
|
||||||
Backslash,
|
Backslash,
|
||||||
/// A hashtag indicating a section heading: `#`.
|
|
||||||
Hashtag,
|
|
||||||
/// A raw block: `` `...` ``.
|
|
||||||
Raw(TokenRaw<'s>),
|
|
||||||
/// A unicode escape sequence: `\u{1F5FA}`.
|
|
||||||
UnicodeEscape(TokenUnicodeEscape<'s>),
|
|
||||||
|
|
||||||
/// A colon: `:`.
|
|
||||||
Colon,
|
|
||||||
/// A comma: `,`.
|
/// A comma: `,`.
|
||||||
Comma,
|
Comma,
|
||||||
|
/// A colon: `:`.
|
||||||
|
Colon,
|
||||||
/// A pipe: `|`.
|
/// A pipe: `|`.
|
||||||
Pipe,
|
Pipe,
|
||||||
/// A plus: `+`.
|
/// A plus: `+`.
|
||||||
Plus,
|
Plus,
|
||||||
/// A hyphen: `-`.
|
/// A hyphen: `-`.
|
||||||
Hyphen,
|
Hyph,
|
||||||
/// A slash: `/`.
|
/// A slash: `/`.
|
||||||
Slash,
|
Slash,
|
||||||
|
/// A single equals sign: `=`.
|
||||||
|
Eq,
|
||||||
|
/// Two equals signs: `==`.
|
||||||
|
EqEq,
|
||||||
|
/// An exclamation mark followed by an equals sign: `!=`.
|
||||||
|
BangEq,
|
||||||
|
/// A less-than sign: `<`.
|
||||||
|
Lt,
|
||||||
|
/// A less-than sign followed by an equals sign: `<=`.
|
||||||
|
LtEq,
|
||||||
|
/// A greater-than sign: `>`.
|
||||||
|
Gt,
|
||||||
|
/// A greater-than sign followed by an equals sign: `>=`.
|
||||||
|
GtEq,
|
||||||
|
/// A plus followed by an equals sign: `+=`.
|
||||||
|
PlusEq,
|
||||||
|
/// A hyphen followed by an equals sign: `-=`.
|
||||||
|
HyphEq,
|
||||||
|
/// An asterisk followed by an equals sign: `*=`.
|
||||||
|
StarEq,
|
||||||
|
/// A slash followed by an equals sign: `/=`.
|
||||||
|
SlashEq,
|
||||||
|
/// A question mark: `?`.
|
||||||
|
Question,
|
||||||
|
/// Two dots: `..`.
|
||||||
|
Dots,
|
||||||
|
/// An equals sign followed by a greater-than sign: `=>`.
|
||||||
|
Arrow,
|
||||||
|
/// The `not` operator.
|
||||||
|
Not,
|
||||||
|
/// The `and` operator.
|
||||||
|
And,
|
||||||
|
/// The `or` operator.
|
||||||
|
Or,
|
||||||
|
/// The `let` / `#let` keyword.
|
||||||
|
Let,
|
||||||
|
/// The `if` / `#if` keyword.
|
||||||
|
If,
|
||||||
|
/// The `else` / `#else` keyword.
|
||||||
|
Else,
|
||||||
|
/// The `for` / `#for` keyword.
|
||||||
|
For,
|
||||||
|
/// The `in` / `#in` keyword.
|
||||||
|
In,
|
||||||
|
/// The `while` / `#while` keyword.
|
||||||
|
While,
|
||||||
|
/// The `break` / `#break` keyword.
|
||||||
|
Break,
|
||||||
|
/// The `continue` / `#continue` keyword.
|
||||||
|
Continue,
|
||||||
|
/// The `return` / `#return` keyword.
|
||||||
|
Return,
|
||||||
|
/// The none literal: `none`.
|
||||||
|
None,
|
||||||
|
/// One or more whitespace characters.
|
||||||
|
///
|
||||||
|
/// The contained `usize` denotes the number of newlines that were contained
|
||||||
|
/// in the whitespace.
|
||||||
|
Space(usize),
|
||||||
|
/// A consecutive non-markup string.
|
||||||
|
Text(&'s str),
|
||||||
|
/// An arbitrary number of backticks followed by inner contents, terminated
|
||||||
|
/// with the same number of backticks: `` `...` ``.
|
||||||
|
Raw(TokenRaw<'s>),
|
||||||
|
/// One or two dollar signs followed by inner contents, terminated with the
|
||||||
|
/// same number of dollar signs.
|
||||||
|
Math(TokenMath<'s>),
|
||||||
|
/// A slash and the letter "u" followed by a hexadecimal unicode entity
|
||||||
|
/// enclosed in curly braces: `\u{1F5FA}`.
|
||||||
|
UnicodeEscape(TokenUnicodeEscape<'s>),
|
||||||
/// An identifier: `center`.
|
/// An identifier: `center`.
|
||||||
Ident(&'s str),
|
Ident(&'s str),
|
||||||
/// A none: `none`.
|
|
||||||
None,
|
|
||||||
/// A boolean: `true`, `false`.
|
/// A boolean: `true`, `false`.
|
||||||
Bool(bool),
|
Bool(bool),
|
||||||
/// An integer: `120`.
|
/// An integer: `120`.
|
||||||
@ -76,11 +122,20 @@ pub enum Token<'s> {
|
|||||||
/// _Note_: `50%` is stored as `50.0` here, as in the corresponding
|
/// _Note_: `50%` is stored as `50.0` here, as in the corresponding
|
||||||
/// [literal](super::Expr::Percent).
|
/// [literal](super::Expr::Percent).
|
||||||
Percent(f64),
|
Percent(f64),
|
||||||
|
/// An angle: `90deg`.
|
||||||
|
Angle(f64, AngularUnit),
|
||||||
/// A hex value: `#20d82a`.
|
/// A hex value: `#20d82a`.
|
||||||
Hex(&'s str),
|
Hex(&'s str),
|
||||||
/// A quoted string: `"..."`.
|
/// A quoted string: `"..."`.
|
||||||
Str(TokenStr<'s>),
|
Str(TokenStr<'s>),
|
||||||
|
/// Two slashes followed by inner contents, terminated with a newline:
|
||||||
|
/// `//<str>\n`.
|
||||||
|
LineComment(&'s str),
|
||||||
|
/// A slash and a star followed by inner contents, terminated with a star
|
||||||
|
/// and a slash: `/*<str>*/`.
|
||||||
|
///
|
||||||
|
/// The comment can contain nested block comments.
|
||||||
|
BlockComment(&'s str),
|
||||||
/// Things that are not valid tokens.
|
/// Things that are not valid tokens.
|
||||||
Invalid(&'s str),
|
Invalid(&'s str),
|
||||||
}
|
}
|
||||||
@ -98,15 +153,6 @@ pub struct TokenStr<'s> {
|
|||||||
pub terminated: bool,
|
pub terminated: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A unicode escape sequence: `\u{1F5FA}`.
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
|
||||||
pub struct TokenUnicodeEscape<'s> {
|
|
||||||
/// The escape sequence between two braces.
|
|
||||||
pub sequence: &'s str,
|
|
||||||
/// Whether the closing brace was present.
|
|
||||||
pub terminated: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A raw block: `` `...` ``.
|
/// A raw block: `` `...` ``.
|
||||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||||
pub struct TokenRaw<'s> {
|
pub struct TokenRaw<'s> {
|
||||||
@ -118,48 +164,91 @@ pub struct TokenRaw<'s> {
|
|||||||
pub terminated: bool,
|
pub terminated: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A math formula: `$2pi + x$`, `$$f'(x) = x^2$$`.
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||||
|
pub struct TokenMath<'s> {
|
||||||
|
/// The formula between the dollars.
|
||||||
|
pub formula: &'s str,
|
||||||
|
/// Whether the formula was surrounded by one dollar (true) or two dollars
|
||||||
|
/// (false).
|
||||||
|
pub inline: bool,
|
||||||
|
/// Whether the closing dollars were present.
|
||||||
|
pub terminated: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A unicode escape sequence: `\u{1F5FA}`.
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||||
|
pub struct TokenUnicodeEscape<'s> {
|
||||||
|
/// The escape sequence between the braces.
|
||||||
|
pub sequence: &'s str,
|
||||||
|
/// Whether the closing brace was present.
|
||||||
|
pub terminated: bool,
|
||||||
|
}
|
||||||
|
|
||||||
impl<'s> Token<'s> {
|
impl<'s> Token<'s> {
|
||||||
/// The natural-language name of this token for use in error messages.
|
/// The English name of this token for use in error messages.
|
||||||
pub fn name(self) -> &'static str {
|
pub fn name(self) -> &'static str {
|
||||||
match self {
|
match self {
|
||||||
Self::Text(_) => "text",
|
|
||||||
Self::Space(_) => "space",
|
|
||||||
|
|
||||||
Self::LineComment(_) => "line comment",
|
|
||||||
Self::BlockComment(_) => "block comment",
|
|
||||||
|
|
||||||
Self::LeftBracket => "opening bracket",
|
Self::LeftBracket => "opening bracket",
|
||||||
Self::RightBracket => "closing bracket",
|
Self::RightBracket => "closing bracket",
|
||||||
Self::LeftBrace => "opening brace",
|
Self::LeftBrace => "opening brace",
|
||||||
Self::RightBrace => "closing brace",
|
Self::RightBrace => "closing brace",
|
||||||
Self::LeftParen => "opening paren",
|
Self::LeftParen => "opening paren",
|
||||||
Self::RightParen => "closing paren",
|
Self::RightParen => "closing paren",
|
||||||
|
|
||||||
Self::Star => "star",
|
Self::Star => "star",
|
||||||
Self::Underscore => "underscore",
|
Self::Underscore => "underscore",
|
||||||
|
Self::Hash => "hashtag",
|
||||||
|
Self::Tilde => "tilde",
|
||||||
Self::Backslash => "backslash",
|
Self::Backslash => "backslash",
|
||||||
Self::Hashtag => "hashtag",
|
|
||||||
Self::Tilde => "tidle",
|
|
||||||
Self::Raw { .. } => "raw block",
|
|
||||||
Self::UnicodeEscape { .. } => "unicode escape sequence",
|
|
||||||
|
|
||||||
Self::Colon => "colon",
|
|
||||||
Self::Comma => "comma",
|
Self::Comma => "comma",
|
||||||
|
Self::Colon => "colon",
|
||||||
Self::Pipe => "pipe",
|
Self::Pipe => "pipe",
|
||||||
Self::Plus => "plus sign",
|
Self::Plus => "plus",
|
||||||
Self::Hyphen => "minus sign",
|
Self::Hyph => "minus",
|
||||||
Self::Slash => "slash",
|
Self::Slash => "slash",
|
||||||
|
Self::Eq => "assignment operator",
|
||||||
|
Self::EqEq => "equality operator",
|
||||||
|
Self::BangEq => "inequality operator",
|
||||||
|
Self::Lt => "less than operator",
|
||||||
|
Self::LtEq => "less than or equal operator",
|
||||||
|
Self::Gt => "greater than operator",
|
||||||
|
Self::GtEq => "greater than or equal operator",
|
||||||
|
Self::PlusEq => "add-assign operator",
|
||||||
|
Self::HyphEq => "subtract-assign operator",
|
||||||
|
Self::StarEq => "multiply-assign operator",
|
||||||
|
Self::SlashEq => "divide-assign operator",
|
||||||
|
Self::Question => "question mark",
|
||||||
|
Self::Dots => "dots",
|
||||||
|
Self::Arrow => "arrow",
|
||||||
|
Self::Not => "not operator",
|
||||||
|
Self::And => "and operator",
|
||||||
|
Self::Or => "or operator",
|
||||||
|
Self::Let => "let keyword",
|
||||||
|
Self::If => "if keyword",
|
||||||
|
Self::Else => "else keyword",
|
||||||
|
Self::For => "for keyword",
|
||||||
|
Self::In => "in keyword",
|
||||||
|
Self::While => "while keyword",
|
||||||
|
Self::Break => "break keyword",
|
||||||
|
Self::Continue => "continue keyword",
|
||||||
|
Self::Return => "return keyword",
|
||||||
Self::None => "none",
|
Self::None => "none",
|
||||||
|
Self::Space(_) => "space",
|
||||||
|
Self::Text(_) => "text",
|
||||||
|
Self::Raw(_) => "raw block",
|
||||||
|
Self::Math(_) => "math formula",
|
||||||
|
Self::UnicodeEscape(_) => "unicode escape sequence",
|
||||||
Self::Ident(_) => "identifier",
|
Self::Ident(_) => "identifier",
|
||||||
Self::Bool(_) => "bool",
|
Self::Bool(_) => "boolean",
|
||||||
Self::Int(_) => "integer",
|
Self::Int(_) => "integer",
|
||||||
Self::Float(_) => "float",
|
Self::Float(_) => "float",
|
||||||
Self::Length(..) => "length",
|
Self::Length(..) => "length",
|
||||||
|
Self::Angle(..) => "angle",
|
||||||
Self::Percent(_) => "percentage",
|
Self::Percent(_) => "percentage",
|
||||||
Self::Hex(_) => "hex value",
|
Self::Hex(_) => "hex value",
|
||||||
Self::Str { .. } => "string",
|
Self::Str(_) => "string",
|
||||||
|
Self::LineComment(_) => "line comment",
|
||||||
|
Self::BlockComment(_) => "block comment",
|
||||||
Self::Invalid("*/") => "end of block comment",
|
Self::Invalid("*/") => "end of block comment",
|
||||||
Self::Invalid(_) => "invalid token",
|
Self::Invalid(_) => "invalid token",
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user