mirror of
https://github.com/typst/typst
synced 2025-05-13 20:46:23 +08:00
Parsing improvements 🧽
- Simplified scanner code - Peek eagerly - Skip whitespace and comments automatically in header mode - Parse simple block expressions - Move literal definitions into expression module - Raw resolving tests
This commit is contained in:
parent
ba3d43f7b2
commit
4069f0744d
128
src/parse/mod.rs
128
src/parse/mod.rs
@ -32,7 +32,7 @@ fn tree(p: &mut Parser) -> SynTree {
|
||||
let mut at_start = true;
|
||||
let mut tree = vec![];
|
||||
while !p.eof() {
|
||||
if let Some(node) = node(p, at_start) {
|
||||
if let Some(node) = p.span_if(|p| node(p, at_start)) {
|
||||
if node.v == SynNode::Parbreak {
|
||||
at_start = true;
|
||||
} else if node.v != SynNode::Space {
|
||||
@ -45,8 +45,8 @@ fn tree(p: &mut Parser) -> SynTree {
|
||||
}
|
||||
|
||||
/// Parse a syntax node.
|
||||
fn node(p: &mut Parser, at_start: bool) -> Option<Spanned<SynNode>> {
|
||||
let start = p.pos();
|
||||
fn node(p: &mut Parser, at_start: bool) -> Option<SynNode> {
|
||||
let start = p.next_start();
|
||||
let node = match p.eat()? {
|
||||
// Spaces.
|
||||
Token::Space(newlines) => {
|
||||
@ -84,6 +84,12 @@ fn node(p: &mut Parser, at_start: bool) -> Option<Spanned<SynNode>> {
|
||||
SynNode::Expr(Expr::Call(bracket_call(p)))
|
||||
}
|
||||
|
||||
// Blocks.
|
||||
Token::LeftBrace => {
|
||||
p.jump(start);
|
||||
SynNode::Expr(block_expr(p)?)
|
||||
}
|
||||
|
||||
// Bad tokens.
|
||||
_ => {
|
||||
p.jump(start);
|
||||
@ -91,23 +97,27 @@ fn node(p: &mut Parser, at_start: bool) -> Option<Spanned<SynNode>> {
|
||||
return None;
|
||||
}
|
||||
};
|
||||
Some(node.span_with(start .. p.pos()))
|
||||
Some(node)
|
||||
}
|
||||
|
||||
/// Parse a heading.
|
||||
fn heading(p: &mut Parser, start: Pos) -> NodeHeading {
|
||||
// Parse the section depth.
|
||||
let count = p.eat_while(|c| c == Token::Hashtag);
|
||||
let span = Span::new(start, p.pos());
|
||||
let level = (count.min(5) as u8).span_with(span);
|
||||
if count > 5 {
|
||||
p.diag(warning!(span, "section depth should be at most 6"));
|
||||
let mut level = 0u8;
|
||||
while p.eat_if(Token::Hashtag) {
|
||||
level = level.saturating_add(1);
|
||||
}
|
||||
|
||||
let mut level = level.span_with(start .. p.last_end());
|
||||
if level.v > 5 {
|
||||
p.diag(warning!(level.span, "section depth should be at most 6"));
|
||||
level.v = 5;
|
||||
}
|
||||
|
||||
// Parse the heading contents.
|
||||
let mut contents = vec![];
|
||||
while p.check(|t| !matches!(t, Token::Space(n) if n >= 1)) {
|
||||
if let Some(node) = node(p, false) {
|
||||
if let Some(node) = p.span_if(|p| node(p, false)) {
|
||||
contents.push(node);
|
||||
}
|
||||
}
|
||||
@ -120,7 +130,7 @@ fn raw(p: &mut Parser, token: TokenRaw) -> NodeRaw {
|
||||
let raw = resolve::resolve_raw(token.text, token.backticks);
|
||||
|
||||
if !token.terminated {
|
||||
p.diag(error!(p.pos(), "expected backtick(s)"));
|
||||
p.diag(error!(p.last_end(), "expected backtick(s)"));
|
||||
}
|
||||
|
||||
raw
|
||||
@ -128,7 +138,7 @@ fn raw(p: &mut Parser, token: TokenRaw) -> NodeRaw {
|
||||
|
||||
/// Parse a unicode escape sequence.
|
||||
fn unicode_escape(p: &mut Parser, token: TokenUnicodeEscape, start: Pos) -> String {
|
||||
let span = Span::new(start, p.pos());
|
||||
let span = Span::new(start, p.last_end());
|
||||
let text = if let Some(c) = resolve::resolve_hex(token.sequence) {
|
||||
c.to_string()
|
||||
} else {
|
||||
@ -147,16 +157,16 @@ fn unicode_escape(p: &mut Parser, token: TokenUnicodeEscape, start: Pos) -> Stri
|
||||
|
||||
/// Parse a bracketed function call.
|
||||
fn bracket_call(p: &mut Parser) -> ExprCall {
|
||||
p.start_group(Group::Bracket);
|
||||
p.push_mode(TokenMode::Header);
|
||||
p.start_group(Group::Bracket);
|
||||
|
||||
// One header is guaranteed, but there may be more (through chaining).
|
||||
let mut outer = vec![];
|
||||
let mut inner = p.span(|p| bracket_subheader(p));
|
||||
let mut inner = p.span(bracket_subheader);
|
||||
|
||||
while p.eat_if(Token::Pipe) {
|
||||
outer.push(inner);
|
||||
inner = p.span(|p| bracket_subheader(p));
|
||||
inner = p.span(bracket_subheader);
|
||||
}
|
||||
|
||||
p.pop_mode();
|
||||
@ -182,10 +192,9 @@ fn bracket_call(p: &mut Parser) -> ExprCall {
|
||||
/// Parse one subheader of a bracketed function call.
|
||||
fn bracket_subheader(p: &mut Parser) -> ExprCall {
|
||||
p.start_group(Group::Subheader);
|
||||
let start = p.pos();
|
||||
|
||||
p.skip_white();
|
||||
let name = p.span(|p| ident(p)).transpose().unwrap_or_else(|| {
|
||||
let start = p.next_start();
|
||||
let name = p.span_if(ident).unwrap_or_else(|| {
|
||||
let what = "function name";
|
||||
if p.eof() {
|
||||
p.diag_expected_at(what, start);
|
||||
@ -195,7 +204,6 @@ fn bracket_subheader(p: &mut Parser) -> ExprCall {
|
||||
Ident(String::new()).span_with(start)
|
||||
});
|
||||
|
||||
p.skip_white();
|
||||
let args = p.span(|p| dict_contents(p).0);
|
||||
|
||||
p.end_group();
|
||||
@ -204,8 +212,8 @@ fn bracket_subheader(p: &mut Parser) -> ExprCall {
|
||||
|
||||
/// Parse the body of a bracketed function call.
|
||||
fn bracket_body(p: &mut Parser) -> SynTree {
|
||||
p.start_group(Group::Bracket);
|
||||
p.push_mode(TokenMode::Body);
|
||||
p.start_group(Group::Bracket);
|
||||
let tree = tree(p);
|
||||
p.pop_mode();
|
||||
p.end_group();
|
||||
@ -220,6 +228,16 @@ fn paren_call(p: &mut Parser, name: Spanned<Ident>) -> ExprCall {
|
||||
ExprCall { name, args }
|
||||
}
|
||||
|
||||
/// Parse a block expression.
|
||||
fn block_expr(p: &mut Parser) -> Option<Expr> {
|
||||
p.push_mode(TokenMode::Header);
|
||||
p.start_group(Group::Brace);
|
||||
let expr = expr(p);
|
||||
p.pop_mode();
|
||||
p.end_group();
|
||||
expr
|
||||
}
|
||||
|
||||
/// Parse the contents of a dictionary.
|
||||
fn dict_contents(p: &mut Parser) -> (LitDict, bool) {
|
||||
let mut dict = LitDict::new();
|
||||
@ -227,7 +245,6 @@ fn dict_contents(p: &mut Parser) -> (LitDict, bool) {
|
||||
let mut expected_comma = None;
|
||||
|
||||
loop {
|
||||
p.skip_white();
|
||||
if p.eof() {
|
||||
break;
|
||||
}
|
||||
@ -252,7 +269,6 @@ fn dict_contents(p: &mut Parser) -> (LitDict, bool) {
|
||||
let behind = entry.expr.span.end;
|
||||
dict.0.push(entry);
|
||||
|
||||
p.skip_white();
|
||||
if p.eof() {
|
||||
break;
|
||||
}
|
||||
@ -270,14 +286,12 @@ fn dict_contents(p: &mut Parser) -> (LitDict, bool) {
|
||||
|
||||
/// Parse a single entry in a dictionary.
|
||||
fn dict_entry(p: &mut Parser) -> Option<LitDictEntry> {
|
||||
if let Some(ident) = p.span(|p| ident(p)).transpose() {
|
||||
p.skip_white();
|
||||
if let Some(ident) = p.span_if(ident) {
|
||||
match p.peek() {
|
||||
// Key-value pair.
|
||||
Some(Token::Colon) => {
|
||||
p.eat_assert(Token::Colon);
|
||||
p.skip_white();
|
||||
if let Some(expr) = expr(p) {
|
||||
if let Some(expr) = p.span_if(expr) {
|
||||
Some(LitDictEntry {
|
||||
key: Some(ident.map(|id| DictKey::Str(id.0))),
|
||||
expr,
|
||||
@ -294,7 +308,7 @@ fn dict_entry(p: &mut Parser) -> Option<LitDictEntry> {
|
||||
expr: {
|
||||
let start = ident.span.start;
|
||||
let call = paren_call(p, ident);
|
||||
Expr::Call(call).span_with(start .. p.pos())
|
||||
Expr::Call(call).span_with(start .. p.last_end())
|
||||
},
|
||||
}),
|
||||
|
||||
@ -304,7 +318,7 @@ fn dict_entry(p: &mut Parser) -> Option<LitDictEntry> {
|
||||
expr: ident.map(|id| Expr::Lit(Lit::Ident(id))),
|
||||
}),
|
||||
}
|
||||
} else if let Some(expr) = expr(p) {
|
||||
} else if let Some(expr) = p.span_if(expr) {
|
||||
Some(LitDictEntry { key: None, expr })
|
||||
} else {
|
||||
None
|
||||
@ -312,7 +326,7 @@ fn dict_entry(p: &mut Parser) -> Option<LitDictEntry> {
|
||||
}
|
||||
|
||||
/// Parse an expression: `term (+ term)*`.
|
||||
fn expr(p: &mut Parser) -> Option<Spanned<Expr>> {
|
||||
fn expr(p: &mut Parser) -> Option<Expr> {
|
||||
binops(p, "summand", term, |token| match token {
|
||||
Token::Plus => Some(BinOp::Add),
|
||||
Token::Hyphen => Some(BinOp::Sub),
|
||||
@ -321,7 +335,7 @@ fn expr(p: &mut Parser) -> Option<Spanned<Expr>> {
|
||||
}
|
||||
|
||||
/// Parse a term: `factor (* factor)*`.
|
||||
fn term(p: &mut Parser) -> Option<Spanned<Expr>> {
|
||||
fn term(p: &mut Parser) -> Option<Expr> {
|
||||
binops(p, "factor", factor, |token| match token {
|
||||
Token::Star => Some(BinOp::Mul),
|
||||
Token::Slash => Some(BinOp::Div),
|
||||
@ -333,25 +347,21 @@ fn term(p: &mut Parser) -> Option<Spanned<Expr>> {
|
||||
fn binops(
|
||||
p: &mut Parser,
|
||||
operand_name: &str,
|
||||
operand: fn(&mut Parser) -> Option<Spanned<Expr>>,
|
||||
operand: fn(&mut Parser) -> Option<Expr>,
|
||||
op: fn(Token) -> Option<BinOp>,
|
||||
) -> Option<Spanned<Expr>> {
|
||||
let mut lhs = operand(p)?;
|
||||
) -> Option<Expr> {
|
||||
let mut lhs = p.span_if(operand)?;
|
||||
|
||||
loop {
|
||||
p.skip_white();
|
||||
if let Some(op) = p.span(|p| p.eat_map(op)).transpose() {
|
||||
p.skip_white();
|
||||
|
||||
if let Some(rhs) = operand(p) {
|
||||
if let Some(op) = p.span_if(|p| p.eat_map(op)) {
|
||||
if let Some(rhs) = p.span_if(operand) {
|
||||
let span = lhs.span.join(rhs.span);
|
||||
let expr = Expr::Binary(ExprBinary {
|
||||
lhs: lhs.map(Box::new),
|
||||
lhs: Box::new(lhs),
|
||||
op,
|
||||
rhs: rhs.map(Box::new),
|
||||
rhs: Box::new(rhs),
|
||||
});
|
||||
lhs = expr.span_with(span);
|
||||
p.skip_white();
|
||||
} else {
|
||||
let span = lhs.span.join(op.span);
|
||||
p.diag(error!(span, "missing right {}", operand_name));
|
||||
@ -362,35 +372,31 @@ fn binops(
|
||||
}
|
||||
}
|
||||
|
||||
Some(lhs)
|
||||
Some(lhs.v)
|
||||
}
|
||||
|
||||
/// Parse a factor of the form `-?value`.
|
||||
fn factor(p: &mut Parser) -> Option<Spanned<Expr>> {
|
||||
fn factor(p: &mut Parser) -> Option<Expr> {
|
||||
let op = |token| match token {
|
||||
Token::Hyphen => Some(UnOp::Neg),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
p.span(|p| {
|
||||
if let Some(op) = p.span(|p| p.eat_map(op)).transpose() {
|
||||
p.skip_white();
|
||||
if let Some(expr) = factor(p) {
|
||||
Some(Expr::Unary(ExprUnary { op, expr: expr.map(Box::new) }))
|
||||
} else {
|
||||
p.diag(error!(op.span, "missing factor"));
|
||||
None
|
||||
}
|
||||
if let Some(op) = p.span_if(|p| p.eat_map(op)) {
|
||||
if let Some(expr) = p.span_if(factor) {
|
||||
Some(Expr::Unary(ExprUnary { op, expr: Box::new(expr) }))
|
||||
} else {
|
||||
value(p)
|
||||
p.diag(error!(op.span, "missing factor"));
|
||||
None
|
||||
}
|
||||
})
|
||||
.transpose()
|
||||
} else {
|
||||
value(p)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a value.
|
||||
fn value(p: &mut Parser) -> Option<Expr> {
|
||||
let start = p.pos();
|
||||
let start = p.next_start();
|
||||
Some(match p.eat()? {
|
||||
// Bracketed function call.
|
||||
Token::LeftBracket => {
|
||||
@ -414,9 +420,7 @@ fn value(p: &mut Parser) -> Option<Expr> {
|
||||
// Function or just ident.
|
||||
Token::Ident(id) => {
|
||||
let ident = Ident(id.into());
|
||||
let after = p.pos();
|
||||
|
||||
p.skip_white();
|
||||
let after = p.last_end();
|
||||
if p.peek() == Some(Token::LeftParen) {
|
||||
let name = ident.span_with(start .. after);
|
||||
Expr::Call(paren_call(p, name))
|
||||
@ -444,8 +448,8 @@ fn value(p: &mut Parser) -> Option<Expr> {
|
||||
|
||||
// Parse a content expression: `{...}`.
|
||||
fn content(p: &mut Parser) -> SynTree {
|
||||
p.start_group(Group::Brace);
|
||||
p.push_mode(TokenMode::Body);
|
||||
p.start_group(Group::Brace);
|
||||
let tree = tree(p);
|
||||
p.pop_mode();
|
||||
p.end_group();
|
||||
@ -477,7 +481,7 @@ fn ident(p: &mut Parser) -> Option<Ident> {
|
||||
fn color(p: &mut Parser, hex: &str, start: Pos) -> RgbaColor {
|
||||
RgbaColor::from_str(hex).unwrap_or_else(|_| {
|
||||
// Replace color with black.
|
||||
p.diag(error!(start .. p.pos(), "invalid color"));
|
||||
p.diag(error!(start .. p.last_end(), "invalid color"));
|
||||
RgbaColor::new(0, 0, 0, 255)
|
||||
})
|
||||
}
|
||||
@ -485,7 +489,7 @@ fn color(p: &mut Parser, hex: &str, start: Pos) -> RgbaColor {
|
||||
/// Parse a string.
|
||||
fn string(p: &mut Parser, token: TokenStr) -> String {
|
||||
if !token.terminated {
|
||||
p.diag_expected_at("quote", p.pos());
|
||||
p.diag_expected_at("quote", p.last_end());
|
||||
}
|
||||
|
||||
resolve::resolve_string(token.string)
|
||||
|
@ -7,23 +7,35 @@ use crate::syntax::{Pos, Span, SpanWith, Spanned, Token};
|
||||
|
||||
/// A convenient token-based parser.
|
||||
pub struct Parser<'s> {
|
||||
/// An iterator over the source tokens.
|
||||
tokens: Tokens<'s>,
|
||||
/// The next token.
|
||||
/// (Only `None` if we are at the end of group or end of file).
|
||||
peeked: Option<Token<'s>>,
|
||||
/// The start position of the peeked token.
|
||||
next_start: Pos,
|
||||
/// The end position of the last (non-whitespace if in header) token.
|
||||
last_end: Pos,
|
||||
/// The stack of modes we were in.
|
||||
modes: Vec<TokenMode>,
|
||||
/// The stack of open groups.
|
||||
groups: Vec<Group>,
|
||||
pos: Pos,
|
||||
/// Accumulated feedback.
|
||||
f: Feedback,
|
||||
}
|
||||
|
||||
impl<'s> Parser<'s> {
|
||||
/// Create a new parser for the source string.
|
||||
pub fn new(src: &'s str) -> Self {
|
||||
let mut tokens = Tokens::new(src, TokenMode::Body);
|
||||
let peeked = tokens.next();
|
||||
Self {
|
||||
tokens: Tokens::new(src, TokenMode::Body),
|
||||
peeked: None,
|
||||
tokens,
|
||||
peeked,
|
||||
next_start: Pos::ZERO,
|
||||
last_end: Pos::ZERO,
|
||||
modes: vec![],
|
||||
groups: vec![],
|
||||
pos: Pos::ZERO,
|
||||
f: Feedback::new(),
|
||||
}
|
||||
}
|
||||
@ -41,9 +53,9 @@ impl<'s> Parser<'s> {
|
||||
/// Eat the next token and add a diagnostic that it is not the expected
|
||||
/// `thing`.
|
||||
pub fn diag_expected(&mut self, what: &str) {
|
||||
let before = self.pos();
|
||||
let before = self.next_start;
|
||||
if let Some(found) = self.eat() {
|
||||
let after = self.pos();
|
||||
let after = self.last_end;
|
||||
self.diag(error!(
|
||||
before .. after,
|
||||
"expected {}, found {}",
|
||||
@ -51,7 +63,7 @@ impl<'s> Parser<'s> {
|
||||
found.name(),
|
||||
));
|
||||
} else {
|
||||
self.diag_expected_at(what, self.pos());
|
||||
self.diag_expected_at(what, self.next_start);
|
||||
}
|
||||
}
|
||||
|
||||
@ -62,9 +74,9 @@ impl<'s> Parser<'s> {
|
||||
|
||||
/// Eat the next token and add a diagnostic that it is unexpected.
|
||||
pub fn diag_unexpected(&mut self) {
|
||||
let before = self.pos();
|
||||
let before = self.next_start;
|
||||
if let Some(found) = self.eat() {
|
||||
let after = self.pos();
|
||||
let after = self.last_end;
|
||||
self.diag(match found {
|
||||
Token::Invalid(_) => error!(before .. after, "invalid token"),
|
||||
_ => error!(before .. after, "unexpected {}", found.name()),
|
||||
@ -126,35 +138,39 @@ impl<'s> Parser<'s> {
|
||||
};
|
||||
|
||||
if let Some(token) = end {
|
||||
// This `peek()` can't be used directly because it hides the end of
|
||||
// group token. To circumvent this, we drop down to `self.peeked`.
|
||||
self.peek();
|
||||
if self.peeked == Some(token) {
|
||||
self.bump();
|
||||
} else {
|
||||
self.diag(error!(self.pos(), "expected {}", token.name()));
|
||||
self.diag(error!(self.next_start, "expected {}", token.name()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Skip whitespace tokens.
|
||||
pub fn skip_white(&mut self) {
|
||||
self.eat_while(|t| {
|
||||
matches!(t, Token::Space(_) | Token::LineComment(_) | Token::BlockComment(_))
|
||||
});
|
||||
/// Execute `f` and return the result alongside the span of everything `f`
|
||||
/// ate. Excludes leading and trailing whitespace in header mode.
|
||||
pub fn span<T, F>(&mut self, f: F) -> Spanned<T>
|
||||
where
|
||||
F: FnOnce(&mut Self) -> T,
|
||||
{
|
||||
let start = self.next_start;
|
||||
let output = f(self);
|
||||
let end = self.last_end;
|
||||
output.span_with(start .. end)
|
||||
}
|
||||
|
||||
/// Execute `f` and return the result alongside the span of everything `f`
|
||||
/// ate.
|
||||
pub fn span<T>(&mut self, f: impl FnOnce(&mut Self) -> T) -> Spanned<T> {
|
||||
let start = self.pos;
|
||||
f(self).span_with(start .. self.pos)
|
||||
/// A version of [`span`](Self::span) that works better with options.
|
||||
pub fn span_if<T, F>(&mut self, f: F) -> Option<Spanned<T>>
|
||||
where
|
||||
F: FnOnce(&mut Self) -> Option<T>,
|
||||
{
|
||||
self.span(|p| f(p)).transpose()
|
||||
}
|
||||
|
||||
/// Consume the next token.
|
||||
pub fn eat(&mut self) -> Option<Token<'s>> {
|
||||
self.peek()?;
|
||||
self.bump()
|
||||
let token = self.peek()?;
|
||||
self.bump();
|
||||
Some(token)
|
||||
}
|
||||
|
||||
/// Consume the next token if it is the given one.
|
||||
@ -168,13 +184,16 @@ impl<'s> Parser<'s> {
|
||||
}
|
||||
|
||||
/// Consume the next token if the closure maps it a to `Some`-variant.
|
||||
pub fn eat_map<T>(&mut self, f: impl FnOnce(Token<'s>) -> Option<T>) -> Option<T> {
|
||||
pub fn eat_map<T, F>(&mut self, f: F) -> Option<T>
|
||||
where
|
||||
F: FnOnce(Token<'s>) -> Option<T>,
|
||||
{
|
||||
let token = self.peek()?;
|
||||
let out = f(token);
|
||||
if out.is_some() {
|
||||
let mapped = f(token);
|
||||
if mapped.is_some() {
|
||||
self.bump();
|
||||
}
|
||||
out
|
||||
mapped
|
||||
}
|
||||
|
||||
/// Consume the next token, debug-asserting that it is the given one.
|
||||
@ -183,58 +202,30 @@ impl<'s> Parser<'s> {
|
||||
debug_assert_eq!(next, Some(t));
|
||||
}
|
||||
|
||||
/// Consume tokens while the condition is true.
|
||||
///
|
||||
/// Returns how many tokens were eaten.
|
||||
pub fn eat_while(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize {
|
||||
self.eat_until(|t| !f(t))
|
||||
}
|
||||
|
||||
/// Consume tokens until the condition is true.
|
||||
///
|
||||
/// Returns how many tokens were eaten.
|
||||
pub fn eat_until(&mut self, mut f: impl FnMut(Token<'s>) -> bool) -> usize {
|
||||
let mut count = 0;
|
||||
while let Some(t) = self.peek() {
|
||||
if f(t) {
|
||||
break;
|
||||
}
|
||||
self.bump();
|
||||
count += 1;
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
/// Peek at the next token without consuming it.
|
||||
pub fn peek(&mut self) -> Option<Token<'s>> {
|
||||
let token = match self.peeked {
|
||||
Some(token) => token,
|
||||
None => {
|
||||
let token = self.tokens.next()?;
|
||||
self.peeked = Some(token);
|
||||
token
|
||||
}
|
||||
};
|
||||
|
||||
let group = match token {
|
||||
Token::RightParen => Group::Paren,
|
||||
Token::RightBracket => Group::Bracket,
|
||||
Token::RightBrace => Group::Brace,
|
||||
Token::Pipe => Group::Subheader,
|
||||
_ => return Some(token),
|
||||
let group = match self.peeked {
|
||||
Some(Token::RightParen) => Group::Paren,
|
||||
Some(Token::RightBracket) => Group::Bracket,
|
||||
Some(Token::RightBrace) => Group::Brace,
|
||||
Some(Token::Pipe) => Group::Subheader,
|
||||
other => return other,
|
||||
};
|
||||
|
||||
if self.groups.contains(&group) {
|
||||
None
|
||||
} else {
|
||||
Some(token)
|
||||
return None;
|
||||
}
|
||||
|
||||
self.peeked
|
||||
}
|
||||
|
||||
/// Checks whether the next token fulfills a condition.
|
||||
///
|
||||
/// Returns `false` if there is no next token.
|
||||
pub fn check(&mut self, f: impl FnOnce(Token<'s>) -> bool) -> bool {
|
||||
pub fn check<F>(&mut self, f: F) -> bool
|
||||
where
|
||||
F: FnOnce(Token<'s>) -> bool,
|
||||
{
|
||||
self.peek().map_or(false, f)
|
||||
}
|
||||
|
||||
@ -243,10 +234,16 @@ impl<'s> Parser<'s> {
|
||||
self.peek().is_none()
|
||||
}
|
||||
|
||||
/// The position in the string at which the last token ends and next token
|
||||
/// will start.
|
||||
pub fn pos(&self) -> Pos {
|
||||
self.pos
|
||||
/// The position at which the next token starts.
|
||||
pub fn next_start(&self) -> Pos {
|
||||
self.next_start
|
||||
}
|
||||
|
||||
/// The position at which the last token ended.
|
||||
///
|
||||
/// Refers to the end of the last _non-whitespace_ token in header mode.
|
||||
pub fn last_end(&self) -> Pos {
|
||||
self.last_end
|
||||
}
|
||||
|
||||
/// Jump to a position in the source string.
|
||||
@ -260,34 +257,49 @@ impl<'s> Parser<'s> {
|
||||
self.tokens.scanner().get(span.into().to_range())
|
||||
}
|
||||
|
||||
/// The full source string up to the current index.
|
||||
/// The full source string up to the end of the last token.
|
||||
pub fn eaten(&self) -> &'s str {
|
||||
self.tokens.scanner().get(.. self.pos.to_usize())
|
||||
self.tokens.scanner().get(.. self.last_end.to_usize())
|
||||
}
|
||||
|
||||
/// The source string from `start` to the current index.
|
||||
/// The source string from `start` to the end of the last token.
|
||||
pub fn eaten_from(&self, start: Pos) -> &'s str {
|
||||
self.tokens.scanner().get(start.to_usize() .. self.pos.to_usize())
|
||||
self.tokens
|
||||
.scanner()
|
||||
.get(start.to_usize() .. self.last_end.to_usize())
|
||||
}
|
||||
|
||||
/// The remaining source string after the current index.
|
||||
/// The remaining source string after the start of the next token.
|
||||
pub fn rest(&self) -> &'s str {
|
||||
self.tokens.scanner().get(self.pos.to_usize() ..)
|
||||
self.tokens.scanner().get(self.next_start.to_usize() ..)
|
||||
}
|
||||
|
||||
/// The underlying scanner.
|
||||
pub fn scanner(&self) -> Scanner<'s> {
|
||||
let mut scanner = self.tokens.scanner().clone();
|
||||
scanner.jump(self.pos.to_usize());
|
||||
scanner.jump(self.next_start.to_usize());
|
||||
scanner
|
||||
}
|
||||
|
||||
/// Set the position to the tokenizer's position and take the peeked token.
|
||||
fn bump(&mut self) -> Option<Token<'s>> {
|
||||
self.pos = self.tokens.pos();
|
||||
let token = self.peeked;
|
||||
self.peeked = None;
|
||||
token
|
||||
fn bump(&mut self) {
|
||||
self.last_end = self.tokens.pos();
|
||||
self.next_start = self.tokens.pos();
|
||||
self.peeked = self.tokens.next();
|
||||
|
||||
match self.tokens.mode() {
|
||||
TokenMode::Body => {}
|
||||
TokenMode::Header => {
|
||||
while matches!(
|
||||
self.peeked,
|
||||
Some(Token::Space(_)) |
|
||||
Some(Token::LineComment(_)) |
|
||||
Some(Token::BlockComment(_))
|
||||
) {
|
||||
self.next_start = self.tokens.pos();
|
||||
self.peeked = self.tokens.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -159,6 +159,38 @@ mod tests {
|
||||
test("`", "", "`");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_raw() {
|
||||
fn test(
|
||||
raw: &str,
|
||||
backticks: usize,
|
||||
lang: Option<&str>,
|
||||
lines: &[&str],
|
||||
inline: bool,
|
||||
) {
|
||||
assert_eq!(resolve_raw(raw, backticks), NodeRaw {
|
||||
lang: lang.map(|id| Ident(id.into())),
|
||||
lines: lines.iter().map(ToString::to_string).collect(),
|
||||
inline,
|
||||
});
|
||||
}
|
||||
|
||||
// Just one backtick.
|
||||
test("py", 1, None, &["py"], true);
|
||||
test("1\n2", 1, None, &["1", "2"], true);
|
||||
test("1\r\n2", 1, None, &["1", "2"], true);
|
||||
|
||||
// More than one backtick with lang tag.
|
||||
test("js alert()", 2, Some("js"), &["alert()"], true);
|
||||
test("py quit(\n\n) ", 3, Some("py"), &["quit(", "", ")"], false);
|
||||
test("♥", 2, None, &[], true);
|
||||
|
||||
// Trimming of whitespace (tested more thoroughly in separate test).
|
||||
test(" a", 2, None, &["a"], true);
|
||||
test(" a", 2, None, &[" a"], true);
|
||||
test(" \na", 2, None, &["a"], false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trim_raw() {
|
||||
fn test(text: &str, expected: Vec<&str>) {
|
||||
|
@ -2,25 +2,23 @@
|
||||
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::slice::SliceIndex;
|
||||
use std::str::Chars;
|
||||
|
||||
/// A low-level featureful char-based scanner.
|
||||
#[derive(Clone)]
|
||||
pub struct Scanner<'s> {
|
||||
src: &'s str,
|
||||
iter: Chars<'s>,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
impl<'s> Scanner<'s> {
|
||||
/// Create a new char scanner.
|
||||
pub fn new(src: &'s str) -> Self {
|
||||
Self { src, iter: src.chars(), index: 0 }
|
||||
Self { src, index: 0 }
|
||||
}
|
||||
|
||||
/// Consume the next char.
|
||||
pub fn eat(&mut self) -> Option<char> {
|
||||
let next = self.iter.next();
|
||||
let next = self.peek();
|
||||
if let Some(c) = next {
|
||||
self.index += c.len_utf8();
|
||||
}
|
||||
@ -32,11 +30,10 @@ impl<'s> Scanner<'s> {
|
||||
/// Returns whether the char was consumed.
|
||||
pub fn eat_if(&mut self, c: char) -> bool {
|
||||
// Don't decode the char twice through peek() and eat().
|
||||
if self.iter.next() == Some(c) {
|
||||
if self.peek() == Some(c) {
|
||||
self.index += c.len_utf8();
|
||||
true
|
||||
} else {
|
||||
self.reset();
|
||||
false
|
||||
}
|
||||
}
|
||||
@ -58,18 +55,21 @@ impl<'s> Scanner<'s> {
|
||||
}
|
||||
|
||||
/// Eat chars while the condition is true.
|
||||
pub fn eat_while(&mut self, mut f: impl FnMut(char) -> bool) -> &'s str {
|
||||
pub fn eat_while<F>(&mut self, mut f: F) -> &'s str
|
||||
where
|
||||
F: FnMut(char) -> bool,
|
||||
{
|
||||
self.eat_until(|c| !f(c))
|
||||
}
|
||||
|
||||
/// Eat chars until the condition is true.
|
||||
pub fn eat_until(&mut self, mut f: impl FnMut(char) -> bool) -> &'s str {
|
||||
pub fn eat_until<F>(&mut self, mut f: F) -> &'s str
|
||||
where
|
||||
F: FnMut(char) -> bool,
|
||||
{
|
||||
let start = self.index;
|
||||
while let Some(c) = self.iter.next() {
|
||||
while let Some(c) = self.peek() {
|
||||
if f(c) {
|
||||
// Undo the previous `next()` without peeking all the time
|
||||
// during iteration.
|
||||
self.reset();
|
||||
break;
|
||||
}
|
||||
self.index += c.len_utf8();
|
||||
@ -80,29 +80,31 @@ impl<'s> Scanner<'s> {
|
||||
/// Uneat the last eaten char.
|
||||
pub fn uneat(&mut self) {
|
||||
self.index = self.last_index();
|
||||
self.reset();
|
||||
}
|
||||
|
||||
/// Peek at the next char without consuming it.
|
||||
pub fn peek(&self) -> Option<char> {
|
||||
self.iter.clone().next()
|
||||
self.src[self.index ..].chars().next()
|
||||
}
|
||||
|
||||
/// Peek at the nth-next char without consuming anything.
|
||||
pub fn peek_nth(&self, n: usize) -> Option<char> {
|
||||
self.iter.clone().nth(n)
|
||||
self.src[self.index ..].chars().nth(n)
|
||||
}
|
||||
|
||||
/// Checks whether the next char fulfills a condition.
|
||||
///
|
||||
/// Returns `false` if there is no next char.
|
||||
pub fn check(&self, f: impl FnOnce(char) -> bool) -> bool {
|
||||
pub fn check<F>(&self, f: F) -> bool
|
||||
where
|
||||
F: FnOnce(char) -> bool,
|
||||
{
|
||||
self.peek().map(f).unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Whether the end of the source string is reached.
|
||||
pub fn eof(&self) -> bool {
|
||||
self.iter.as_str().is_empty()
|
||||
self.index == self.src.len()
|
||||
}
|
||||
|
||||
/// The previous index in the source string.
|
||||
@ -122,7 +124,6 @@ impl<'s> Scanner<'s> {
|
||||
/// Jump to an index in the source string.
|
||||
pub fn jump(&mut self, index: usize) {
|
||||
self.index = index;
|
||||
self.reset();
|
||||
}
|
||||
|
||||
/// The full source string.
|
||||
@ -152,11 +153,6 @@ impl<'s> Scanner<'s> {
|
||||
pub fn rest(&self) -> &'s str {
|
||||
&self.src[self.index ..]
|
||||
}
|
||||
|
||||
/// Go back to the where the index says.
|
||||
fn reset(&mut self) {
|
||||
self.iter = self.src[self.index ..].chars();
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Scanner<'_> {
|
||||
|
@ -102,7 +102,7 @@ macro_rules! Call {
|
||||
fn Unary(op: impl Into<Spanned<UnOp>>, expr: impl Into<Spanned<Expr>>) -> Expr {
|
||||
Expr::Unary(ExprUnary {
|
||||
op: op.into(),
|
||||
expr: expr.into().map(Box::new),
|
||||
expr: Box::new(expr.into()),
|
||||
})
|
||||
}
|
||||
|
||||
@ -112,9 +112,9 @@ fn Binary(
|
||||
rhs: impl Into<Spanned<Expr>>,
|
||||
) -> Expr {
|
||||
Expr::Binary(ExprBinary {
|
||||
lhs: lhs.into().map(Box::new),
|
||||
lhs: Box::new(lhs.into()),
|
||||
op: op.into(),
|
||||
rhs: rhs.into().map(Box::new),
|
||||
rhs: Box::new(rhs.into()),
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,9 @@
|
||||
//! Expressions.
|
||||
|
||||
use super::*;
|
||||
use crate::color::RgbaColor;
|
||||
use crate::eval::DictKey;
|
||||
use crate::geom::Unit;
|
||||
|
||||
/// An expression.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
@ -33,7 +36,7 @@ pub struct ExprUnary {
|
||||
/// The operator: `-`.
|
||||
pub op: Spanned<UnOp>,
|
||||
/// The expression to operator on: `x`.
|
||||
pub expr: Spanned<Box<Expr>>,
|
||||
pub expr: Box<Spanned<Expr>>,
|
||||
}
|
||||
|
||||
/// A unary operator.
|
||||
@ -47,11 +50,11 @@ pub enum UnOp {
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ExprBinary {
|
||||
/// The left-hand side of the operation: `a`.
|
||||
pub lhs: Spanned<Box<Expr>>,
|
||||
pub lhs: Box<Spanned<Expr>>,
|
||||
/// The operator: `+`.
|
||||
pub op: Spanned<BinOp>,
|
||||
/// The right-hand side of the operation: `b`.
|
||||
pub rhs: Spanned<Box<Expr>>,
|
||||
pub rhs: Box<Spanned<Expr>>,
|
||||
}
|
||||
|
||||
/// A binary operator.
|
||||
@ -66,3 +69,51 @@ pub enum BinOp {
|
||||
/// The division operator: `/`.
|
||||
Div,
|
||||
}
|
||||
|
||||
/// A literal.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Lit {
|
||||
/// A identifier literal: `left`.
|
||||
Ident(Ident),
|
||||
/// A boolean literal: `true`, `false`.
|
||||
Bool(bool),
|
||||
/// An integer literal: `120`.
|
||||
Int(i64),
|
||||
/// A floating-point literal: `1.2`, `10e-4`.
|
||||
Float(f64),
|
||||
/// A length literal: `12pt`, `3cm`.
|
||||
Length(f64, Unit),
|
||||
/// A percent literal: `50%`.
|
||||
///
|
||||
/// _Note_: `50%` is stored as `50.0` here, but as `0.5` in the
|
||||
/// corresponding [value](crate::geom::Relative).
|
||||
Percent(f64),
|
||||
/// A color literal: `#ffccee`.
|
||||
Color(RgbaColor),
|
||||
/// A string literal: `"hello!"`.
|
||||
Str(String),
|
||||
/// A dictionary literal: `(false, 12cm, greeting: "hi")`.
|
||||
Dict(LitDict),
|
||||
/// A content literal: `{*Hello* there!}`.
|
||||
Content(SynTree),
|
||||
}
|
||||
|
||||
/// A dictionary literal: `(false, 12cm, greeting: "hi")`.
|
||||
#[derive(Debug, Default, Clone, PartialEq)]
|
||||
pub struct LitDict(pub Vec<LitDictEntry>);
|
||||
|
||||
/// An entry in a dictionary literal: `false` or `greeting: "hi"`.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct LitDictEntry {
|
||||
/// The key of the entry if there was one: `greeting`.
|
||||
pub key: Option<Spanned<DictKey>>,
|
||||
/// The value of the entry: `"hi"`.
|
||||
pub expr: Spanned<Expr>,
|
||||
}
|
||||
|
||||
impl LitDict {
|
||||
/// Create an empty dict literal.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
}
|
||||
|
@ -1,54 +0,0 @@
|
||||
//! Literals.
|
||||
|
||||
use super::*;
|
||||
use crate::color::RgbaColor;
|
||||
use crate::eval::DictKey;
|
||||
use crate::geom::Unit;
|
||||
|
||||
/// A literal.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Lit {
|
||||
/// A identifier literal: `left`.
|
||||
Ident(Ident),
|
||||
/// A boolean literal: `true`, `false`.
|
||||
Bool(bool),
|
||||
/// An integer literal: `120`.
|
||||
Int(i64),
|
||||
/// A floating-point literal: `1.2`, `10e-4`.
|
||||
Float(f64),
|
||||
/// A length literal: `12pt`, `3cm`.
|
||||
Length(f64, Unit),
|
||||
/// A percent literal: `50%`.
|
||||
///
|
||||
/// _Note_: `50%` is stored as `50.0` here, but as `0.5` in the
|
||||
/// corresponding [value](crate::geom::Relative).
|
||||
Percent(f64),
|
||||
/// A color literal: `#ffccee`.
|
||||
Color(RgbaColor),
|
||||
/// A string literal: `"hello!"`.
|
||||
Str(String),
|
||||
/// A dictionary literal: `(false, 12cm, greeting: "hi")`.
|
||||
Dict(LitDict),
|
||||
/// A content literal: `{*Hello* there!}`.
|
||||
Content(SynTree),
|
||||
}
|
||||
|
||||
/// A dictionary literal: `(false, 12cm, greeting: "hi")`.
|
||||
#[derive(Debug, Default, Clone, PartialEq)]
|
||||
pub struct LitDict(pub Vec<LitDictEntry>);
|
||||
|
||||
/// An entry in a dictionary literal: `false` or `greeting: "hi"`.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct LitDictEntry {
|
||||
/// The key of the entry if there was one: `greeting`.
|
||||
pub key: Option<Spanned<DictKey>>,
|
||||
/// The value of the entry: `"hi"`.
|
||||
pub expr: Spanned<Expr>,
|
||||
}
|
||||
|
||||
impl LitDict {
|
||||
/// Create an empty dict literal.
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
}
|
@ -2,14 +2,12 @@
|
||||
|
||||
mod expr;
|
||||
mod ident;
|
||||
mod lit;
|
||||
mod node;
|
||||
mod span;
|
||||
mod token;
|
||||
|
||||
pub use expr::*;
|
||||
pub use ident::*;
|
||||
pub use lit::*;
|
||||
pub use node::*;
|
||||
pub use span::*;
|
||||
pub use token::*;
|
||||
|
Loading…
x
Reference in New Issue
Block a user