typst/src/parse/mod.rs
Laurenz 515fe89c5e Style changes
Co-Authored-By: Martin <mhaug@live.de>
2021-11-05 13:46:42 +01:00

718 lines
19 KiB
Rust

//! Parsing and tokenization.
mod parser;
mod resolve;
mod scanner;
mod tokens;
pub use parser::*;
pub use resolve::*;
pub use scanner::*;
pub use tokens::*;
use std::rc::Rc;
use crate::syntax::ast::{Associativity, BinOp, UnOp};
use crate::syntax::{ErrorPosition, GreenNode, NodeKind};
/// Parse a source file.
pub fn parse(source: &str) -> Rc<GreenNode> {
let mut p = Parser::new(source);
markup(&mut p);
p.finish()
}
/// Parse markup.
fn markup(p: &mut Parser) {
markup_while(p, true, &mut |_| true)
}
/// Parse markup that stays right of the given column.
fn markup_indented(p: &mut Parser, column: usize) {
p.eat_while(|t| match t {
NodeKind::Space(n) => *n == 0,
NodeKind::LineComment | NodeKind::BlockComment => true,
_ => false,
});
markup_while(p, false, &mut |p| match p.peek() {
Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.next_end()) >= column,
_ => true,
})
}
/// Parse a syntax tree while the peeked NodeKind satisifies a condition.
///
/// If `at_start` is true, things like headings that may only appear at the
/// beginning of a line or template are allowed.
fn markup_while<F>(p: &mut Parser, mut at_start: bool, f: &mut F)
where
F: FnMut(&mut Parser) -> bool,
{
p.perform(NodeKind::Markup, |p| {
while !p.eof() && f(p) {
markup_node(p, &mut at_start);
}
});
}
/// Parse a markup node.
fn markup_node(p: &mut Parser, at_start: &mut bool) {
let token = match p.peek() {
Some(t) => t,
None => return,
};
match token {
// Whitespace.
NodeKind::Space(newlines) => {
*at_start |= *newlines > 0;
if *newlines < 2 {
p.eat();
} else {
p.convert(NodeKind::Parbreak);
}
return;
}
// Comments.
NodeKind::LineComment | NodeKind::BlockComment => {
p.eat();
return;
}
// Text and markup.
NodeKind::Text(_)
| NodeKind::EnDash
| NodeKind::EmDash
| NodeKind::NonBreakingSpace
| NodeKind::Emph
| NodeKind::Strong
| NodeKind::Linebreak
| NodeKind::Raw(_)
| NodeKind::UnicodeEscape(_) => {
p.eat();
}
NodeKind::Eq if *at_start => heading(p),
NodeKind::ListBullet if *at_start => list_node(p),
NodeKind::EnumNumbering(_) if *at_start => enum_node(p),
// Line-based markup that is not currently at the start of the line.
NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => {
p.convert(NodeKind::Text(p.peek_src().into()));
}
// Hashtag + keyword / identifier.
NodeKind::Ident(_)
| NodeKind::Let
| NodeKind::If
| NodeKind::While
| NodeKind::For
| NodeKind::Import
| NodeKind::Include => {
let stmt = matches!(token, NodeKind::Let | NodeKind::Import);
let group = if stmt { Group::Stmt } else { Group::Expr };
p.start_group(group, TokenMode::Code);
let res = expr_prec(p, true, 0);
if stmt && res.is_ok() && !p.eof() {
p.expected_at("semicolon or line break");
}
p.end_group();
}
// Block and template.
NodeKind::LeftBrace => block(p),
NodeKind::LeftBracket => template(p),
NodeKind::Error(_, _) => p.eat(),
_ => p.unexpected(),
};
*at_start = false;
}
/// Parse a heading.
fn heading(p: &mut Parser) {
p.perform(NodeKind::Heading, |p| {
p.eat_assert(&NodeKind::Eq);
while p.eat_if(&NodeKind::Eq) {}
let column = p.column(p.prev_end());
markup_indented(p, column);
});
}
/// Parse a single list item.
fn list_node(p: &mut Parser) {
p.perform(NodeKind::List, |p| {
p.eat_assert(&NodeKind::ListBullet);
let column = p.column(p.prev_end());
markup_indented(p, column);
});
}
/// Parse a single enum item.
fn enum_node(p: &mut Parser) {
p.perform(NodeKind::Enum, |p| {
p.eat();
let column = p.column(p.prev_end());
markup_indented(p, column);
});
}
/// Parse an expression.
fn expr(p: &mut Parser) -> ParseResult {
expr_prec(p, false, 0)
}
/// Parse an expression with operators having at least the minimum precedence.
///
/// If `atomic` is true, this does not parse binary operations and arrow
/// functions, which is exactly what we want in a shorthand expression directly
/// in markup.
///
/// Stops parsing at operations with lower precedence than `min_prec`,
fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult {
let marker = p.marker();
// Start the unary expression.
match p.eat_map(|x| UnOp::from_token(&x)) {
Some(op) => {
let prec = op.precedence();
expr_prec(p, atomic, prec)?;
marker.end(p, NodeKind::Unary);
}
None => primary(p, atomic)?,
};
loop {
// Exclamation mark, parenthesis or bracket means this is a function
// call.
if matches!(
p.peek_direct(),
Some(NodeKind::LeftParen | NodeKind::LeftBracket)
) {
call(p, &marker)?;
continue;
}
if atomic {
break;
}
if p.peek() == Some(&NodeKind::With) {
with_expr(p, &marker)?;
}
let op = match p.peek().and_then(BinOp::from_token) {
Some(binop) => binop,
None => break,
};
let mut prec = op.precedence();
if prec < min_prec {
break;
}
p.eat();
match op.associativity() {
Associativity::Left => prec += 1,
Associativity::Right => {}
}
marker.perform(p, NodeKind::Binary, |p| expr_prec(p, atomic, prec))?;
}
Ok(())
}
/// Parse a primary expression.
fn primary(p: &mut Parser, atomic: bool) -> ParseResult {
if literal(p) {
return Ok(());
}
match p.peek() {
// Things that start with an identifier.
Some(NodeKind::Ident(_)) => {
// Start closure params.
let marker = p.marker();
p.eat();
// Arrow means this is a closure's lone parameter.
if !atomic && p.peek() == Some(&NodeKind::Arrow) {
marker.end(p, NodeKind::ClosureParams);
p.eat();
marker.perform(p, NodeKind::Closure, expr)
} else {
Ok(())
}
}
// Structures.
Some(NodeKind::LeftParen) => parenthesized(p),
Some(NodeKind::LeftBracket) => {
template(p);
Ok(())
}
Some(NodeKind::LeftBrace) => {
block(p);
Ok(())
}
// Keywords.
Some(NodeKind::Let) => let_expr(p),
Some(NodeKind::If) => if_expr(p),
Some(NodeKind::While) => while_expr(p),
Some(NodeKind::For) => for_expr(p),
Some(NodeKind::Import) => import_expr(p),
Some(NodeKind::Include) => include_expr(p),
Some(NodeKind::Error(_, _)) => {
p.eat();
Err(())
}
// Nothing.
_ => {
p.expected("expression");
Err(())
}
}
}
/// Parse a literal.
fn literal(p: &mut Parser) -> bool {
match p.peek() {
// Basic values.
Some(
NodeKind::None
| NodeKind::Auto
| NodeKind::Int(_)
| NodeKind::Float(_)
| NodeKind::Bool(_)
| NodeKind::Fraction(_)
| NodeKind::Length(_, _)
| NodeKind::Angle(_, _)
| NodeKind::Percentage(_)
| NodeKind::Str(_),
) => {
p.eat();
true
}
_ => false,
}
}
/// Parse something that starts with a parenthesis, which can be either of:
/// - Array literal
/// - Dictionary literal
/// - Parenthesized expression
/// - Parameter list of closure expression
fn parenthesized(p: &mut Parser) -> ParseResult {
let marker = p.marker();
p.start_group(Group::Paren, TokenMode::Code);
let colon = p.eat_if(&NodeKind::Colon);
let kind = collection(p).0;
p.end_group();
// Leading colon makes this a (empty) dictionary.
if colon {
dict(p, &marker);
return Ok(());
}
// Arrow means this is a closure's parameter list.
if p.peek() == Some(&NodeKind::Arrow) {
params(p, &marker, true);
marker.end(p, NodeKind::ClosureParams);
p.eat_assert(&NodeKind::Arrow);
return marker.perform(p, NodeKind::Closure, expr);
}
// Find out which kind of collection this is.
match kind {
CollectionKind::Group => marker.end(p, NodeKind::Group),
CollectionKind::Positional => array(p, &marker),
CollectionKind::Named => dict(p, &marker),
}
Ok(())
}
/// The type of a collection.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum CollectionKind {
/// The collection is only one item and has no comma.
Group,
/// The collection starts with a positional and has more items or a trailing
/// comma.
Positional,
/// The collection starts with a named item.
Named,
}
/// Parse a collection.
///
/// Returns the length of the collection and whether the literal contained any
/// commas.
fn collection(p: &mut Parser) -> (CollectionKind, usize) {
let mut items = 0;
let mut kind = CollectionKind::Positional;
let mut can_group = true;
let mut missing_coma: Option<Marker> = None;
while !p.eof() {
if let Ok(item_kind) = item(p) {
if items == 0 && item_kind == NodeKind::Named {
kind = CollectionKind::Named;
can_group = false;
}
if item_kind == NodeKind::Spread {
can_group = false;
}
items += 1;
if let Some(marker) = missing_coma.take() {
marker.expected_at(p, "comma");
}
if p.eof() {
break;
}
if p.eat_if(&NodeKind::Comma) {
can_group = false;
} else {
missing_coma = Some(p.marker());
}
}
}
if can_group && items == 1 {
kind = CollectionKind::Group;
}
(kind, items)
}
/// Parse an expression or a named pair. Returns if this is a named pair.
fn item(p: &mut Parser) -> ParseResult<NodeKind> {
let marker = p.marker();
if p.eat_if(&NodeKind::Dots) {
marker.perform(p, NodeKind::Spread, expr)?;
return Ok(NodeKind::Spread);
}
expr(p)?;
if p.peek() == Some(&NodeKind::Colon) {
marker.perform(p, NodeKind::Named, |p| {
if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) {
p.eat();
expr(p)
} else {
marker.end(
p,
NodeKind::Error(ErrorPosition::Full, "expected identifier".into()),
);
p.eat();
expr(p).ok();
Err(())
}
})?;
Ok(NodeKind::Named)
} else {
Ok(p.last_child().unwrap().kind().clone())
}
}
/// Convert a collection into an array, producing errors for anything other than
/// expressions.
fn array(p: &mut Parser, marker: &Marker) {
marker.filter_children(p, |x| match x.kind() {
NodeKind::Named => Err((
ErrorPosition::Full,
"expected expression, found named pair".into(),
)),
NodeKind::Spread => {
Err((ErrorPosition::Full, "spreading is not allowed here".into()))
}
_ => Ok(()),
});
marker.end(p, NodeKind::Array);
}
/// Convert a collection into a dictionary, producing errors for anything other
/// than named pairs.
fn dict(p: &mut Parser, marker: &Marker) {
marker.filter_children(p, |x| match x.kind() {
NodeKind::Named | NodeKind::Comma | NodeKind::Colon => Ok(()),
NodeKind::Spread => {
Err((ErrorPosition::Full, "spreading is not allowed here".into()))
}
_ if x.kind().is_paren() => Ok(()),
_ => Err((
ErrorPosition::Full,
"expected named pair, found expression".into(),
)),
});
marker.end(p, NodeKind::Dict);
}
/// Convert a collection into a list of parameters, producing errors for
/// anything other than identifiers, spread operations and named pairs.
fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) {
marker.filter_children(p, |x| match x.kind() {
NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => Ok(()),
NodeKind::Spread
if matches!(
x.children().last().map(|x| x.kind()),
Some(&NodeKind::Ident(_))
) =>
{
Ok(())
}
_ if allow_parens && x.kind().is_paren() => Ok(()),
_ => Err((ErrorPosition::Full, "expected identifier".into())),
});
}
// Parse a template block: `[...]`.
fn template(p: &mut Parser) {
p.perform(NodeKind::Template, |p| {
p.start_group(Group::Bracket, TokenMode::Markup);
markup(p);
p.end_group();
});
}
/// Parse a code block: `{...}`.
fn block(p: &mut Parser) {
p.perform(NodeKind::Block, |p| {
p.start_group(Group::Brace, TokenMode::Code);
while !p.eof() {
p.start_group(Group::Stmt, TokenMode::Code);
if expr(p).is_ok() && !p.eof() {
p.expected_at("semicolon or line break");
}
p.end_group();
// Forcefully skip over newlines since the group's contents can't.
p.eat_while(|t| matches!(t, NodeKind::Space(_)));
}
p.end_group();
});
}
/// Parse a function call.
fn call(p: &mut Parser, callee: &Marker) -> ParseResult {
callee.perform(p, NodeKind::Call, |p| match p.peek_direct() {
Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => {
args(p, true);
Ok(())
}
_ => {
p.expected_at("argument list");
Err(())
}
})
}
/// Parse the arguments to a function call.
fn args(p: &mut Parser, allow_template: bool) {
p.perform(NodeKind::CallArgs, |p| {
if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) {
p.start_group(Group::Paren, TokenMode::Code);
collection(p);
p.end_group();
}
while allow_template && p.peek_direct() == Some(&NodeKind::LeftBracket) {
template(p);
}
})
}
/// Parse a with expression.
fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult {
marker.perform(p, NodeKind::WithExpr, |p| {
p.eat_assert(&NodeKind::With);
if p.peek() == Some(&NodeKind::LeftParen) {
args(p, false);
Ok(())
} else {
p.expected("argument list");
Err(())
}
})
}
/// Parse a let expression.
fn let_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::LetExpr, |p| {
p.eat_assert(&NodeKind::Let);
let marker = p.marker();
ident(p)?;
if p.peek() == Some(&NodeKind::With) {
with_expr(p, &marker)?;
} else {
// If a parenthesis follows, this is a function definition.
let has_params = p.peek_direct() == Some(&NodeKind::LeftParen);
if has_params {
p.perform(NodeKind::ClosureParams, |p| {
p.start_group(Group::Paren, TokenMode::Code);
let marker = p.marker();
collection(p);
params(p, &marker, true);
p.end_group();
});
}
if p.eat_if(&NodeKind::Eq) {
expr(p)?;
} else if has_params {
// Function definitions must have a body.
p.expected_at("body");
}
// Rewrite into a closure expression if it's a function definition.
if has_params {
marker.end(p, NodeKind::Closure);
}
}
Ok(())
})
}
/// Parse an if expresion.
fn if_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::IfExpr, |p| {
p.eat_assert(&NodeKind::If);
expr(p)?;
body(p)?;
if p.eat_if(&NodeKind::Else) {
if p.peek() == Some(&NodeKind::If) {
if_expr(p)?;
} else {
body(p)?;
}
}
Ok(())
})
}
/// Parse a while expresion.
fn while_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::WhileExpr, |p| {
p.eat_assert(&NodeKind::While);
expr(p)?;
body(p)?;
Ok(())
})
}
/// Parse a for expression.
fn for_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::ForExpr, |p| {
p.eat_assert(&NodeKind::For);
for_pattern(p)?;
p.eat_expect(&NodeKind::In)?;
expr(p)?;
body(p)?;
Ok(())
})
}
/// Parse a for loop pattern.
fn for_pattern(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::ForPattern, |p| {
ident(p)?;
if p.eat_if(&NodeKind::Comma) {
ident(p)?;
}
Ok(())
})
}
/// Parse an import expression.
fn import_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::ImportExpr, |p| {
p.eat_assert(&NodeKind::Import);
if !p.eat_if(&NodeKind::Star) {
// This is the list of identifiers scenario.
p.perform(NodeKind::ImportItems, |p| {
p.start_group(Group::Imports, TokenMode::Code);
let marker = p.marker();
let items = collection(p).1;
if items == 0 {
p.expected_at("import items");
}
p.end_group();
marker.filter_children(p, |n| match n.kind() {
NodeKind::Ident(_) | NodeKind::Comma => Ok(()),
_ => Err((ErrorPosition::Full, "expected identifier".into())),
});
});
};
p.eat_expect(&NodeKind::From)?;
expr(p)?;
Ok(())
})
}
/// Parse an include expression.
fn include_expr(p: &mut Parser) -> ParseResult {
p.perform(NodeKind::IncludeExpr, |p| {
p.eat_assert(&NodeKind::Include);
expr(p)?;
Ok(())
})
}
/// Parse an identifier.
fn ident(p: &mut Parser) -> ParseResult {
match p.peek() {
Some(NodeKind::Ident(_)) => {
p.eat();
Ok(())
}
_ => {
p.expected("identifier");
Err(())
}
}
}
/// Parse a control flow body.
fn body(p: &mut Parser) -> ParseResult {
match p.peek() {
Some(NodeKind::LeftBracket) => template(p),
Some(NodeKind::LeftBrace) => block(p),
_ => {
p.expected_at("body");
return Err(());
}
}
Ok(())
}