//! Parsing and tokenization. mod parser; mod resolve; mod scanner; mod tokens; pub use parser::*; pub use resolve::*; pub use scanner::*; pub use tokens::*; use std::rc::Rc; use crate::syntax::ast::{Associativity, BinOp, UnOp}; use crate::syntax::{ErrorPosition, GreenNode, NodeKind}; /// Parse a source file. pub fn parse(source: &str) -> Rc { let mut p = Parser::new(source); markup(&mut p); p.finish() } /// Parse markup. fn markup(p: &mut Parser) { markup_while(p, true, &mut |_| true) } /// Parse markup that stays right of the given column. fn markup_indented(p: &mut Parser, column: usize) { p.eat_while(|t| match t { NodeKind::Space(n) => *n == 0, NodeKind::LineComment | NodeKind::BlockComment => true, _ => false, }); markup_while(p, false, &mut |p| match p.peek() { Some(NodeKind::Space(n)) if *n >= 1 => p.column(p.next_end()) >= column, _ => true, }) } /// Parse a syntax tree while the peeked NodeKind satisifies a condition. /// /// If `at_start` is true, things like headings that may only appear at the /// beginning of a line or template are allowed. fn markup_while(p: &mut Parser, mut at_start: bool, f: &mut F) where F: FnMut(&mut Parser) -> bool, { p.perform(NodeKind::Markup, |p| { while !p.eof() && f(p) { markup_node(p, &mut at_start); } }); } /// Parse a markup node. fn markup_node(p: &mut Parser, at_start: &mut bool) { let token = match p.peek() { Some(t) => t, None => return, }; match token { // Whitespace. NodeKind::Space(newlines) => { *at_start |= *newlines > 0; if *newlines < 2 { p.eat(); } else { p.convert(NodeKind::Parbreak); } return; } // Comments. NodeKind::LineComment | NodeKind::BlockComment => { p.eat(); return; } // Text and markup. NodeKind::Text(_) | NodeKind::EnDash | NodeKind::EmDash | NodeKind::NonBreakingSpace | NodeKind::Emph | NodeKind::Strong | NodeKind::Linebreak | NodeKind::Raw(_) | NodeKind::UnicodeEscape(_) => { p.eat(); } NodeKind::Eq if *at_start => heading(p), NodeKind::ListBullet if *at_start => list_node(p), NodeKind::EnumNumbering(_) if *at_start => enum_node(p), // Line-based markup that is not currently at the start of the line. NodeKind::Eq | NodeKind::ListBullet | NodeKind::EnumNumbering(_) => { p.convert(NodeKind::Text(p.peek_src().into())); } // Hashtag + keyword / identifier. NodeKind::Ident(_) | NodeKind::Let | NodeKind::If | NodeKind::While | NodeKind::For | NodeKind::Import | NodeKind::Include => { let stmt = matches!(token, NodeKind::Let | NodeKind::Import); let group = if stmt { Group::Stmt } else { Group::Expr }; p.start_group(group, TokenMode::Code); let res = expr_prec(p, true, 0); if stmt && res.is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } p.end_group(); } // Block and template. NodeKind::LeftBrace => block(p), NodeKind::LeftBracket => template(p), NodeKind::Error(_, _) => p.eat(), _ => p.unexpected(), }; *at_start = false; } /// Parse a heading. fn heading(p: &mut Parser) { p.perform(NodeKind::Heading, |p| { p.eat_assert(&NodeKind::Eq); while p.eat_if(&NodeKind::Eq) {} let column = p.column(p.prev_end()); markup_indented(p, column); }); } /// Parse a single list item. fn list_node(p: &mut Parser) { p.perform(NodeKind::List, |p| { p.eat_assert(&NodeKind::ListBullet); let column = p.column(p.prev_end()); markup_indented(p, column); }); } /// Parse a single enum item. fn enum_node(p: &mut Parser) { p.perform(NodeKind::Enum, |p| { p.eat(); let column = p.column(p.prev_end()); markup_indented(p, column); }); } /// Parse an expression. fn expr(p: &mut Parser) -> ParseResult { expr_prec(p, false, 0) } /// Parse an expression with operators having at least the minimum precedence. /// /// If `atomic` is true, this does not parse binary operations and arrow /// functions, which is exactly what we want in a shorthand expression directly /// in markup. /// /// Stops parsing at operations with lower precedence than `min_prec`, fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let marker = p.marker(); // Start the unary expression. match p.eat_map(|x| UnOp::from_token(&x)) { Some(op) => { let prec = op.precedence(); expr_prec(p, atomic, prec)?; marker.end(p, NodeKind::Unary); } None => primary(p, atomic)?, }; loop { // Exclamation mark, parenthesis or bracket means this is a function // call. if matches!( p.peek_direct(), Some(NodeKind::LeftParen | NodeKind::LeftBracket) ) { call(p, &marker)?; continue; } if atomic { break; } if p.peek() == Some(&NodeKind::With) { with_expr(p, &marker)?; } let op = match p.peek().and_then(BinOp::from_token) { Some(binop) => binop, None => break, }; let mut prec = op.precedence(); if prec < min_prec { break; } p.eat(); match op.associativity() { Associativity::Left => prec += 1, Associativity::Right => {} } marker.perform(p, NodeKind::Binary, |p| expr_prec(p, atomic, prec))?; } Ok(()) } /// Parse a primary expression. fn primary(p: &mut Parser, atomic: bool) -> ParseResult { if literal(p) { return Ok(()); } match p.peek() { // Things that start with an identifier. Some(NodeKind::Ident(_)) => { // Start closure params. let marker = p.marker(); p.eat(); // Arrow means this is a closure's lone parameter. if !atomic && p.peek() == Some(&NodeKind::Arrow) { marker.end(p, NodeKind::ClosureParams); p.eat(); marker.perform(p, NodeKind::Closure, expr) } else { Ok(()) } } // Structures. Some(NodeKind::LeftParen) => parenthesized(p), Some(NodeKind::LeftBracket) => { template(p); Ok(()) } Some(NodeKind::LeftBrace) => { block(p); Ok(()) } // Keywords. Some(NodeKind::Let) => let_expr(p), Some(NodeKind::If) => if_expr(p), Some(NodeKind::While) => while_expr(p), Some(NodeKind::For) => for_expr(p), Some(NodeKind::Import) => import_expr(p), Some(NodeKind::Include) => include_expr(p), Some(NodeKind::Error(_, _)) => { p.eat(); Err(()) } // Nothing. _ => { p.expected("expression"); Err(()) } } } /// Parse a literal. fn literal(p: &mut Parser) -> bool { match p.peek() { // Basic values. Some( NodeKind::None | NodeKind::Auto | NodeKind::Int(_) | NodeKind::Float(_) | NodeKind::Bool(_) | NodeKind::Fraction(_) | NodeKind::Length(_, _) | NodeKind::Angle(_, _) | NodeKind::Percentage(_) | NodeKind::Str(_), ) => { p.eat(); true } _ => false, } } /// Parse something that starts with a parenthesis, which can be either of: /// - Array literal /// - Dictionary literal /// - Parenthesized expression /// - Parameter list of closure expression fn parenthesized(p: &mut Parser) -> ParseResult { let marker = p.marker(); p.start_group(Group::Paren, TokenMode::Code); let colon = p.eat_if(&NodeKind::Colon); let kind = collection(p).0; p.end_group(); // Leading colon makes this a (empty) dictionary. if colon { dict(p, &marker); return Ok(()); } // Arrow means this is a closure's parameter list. if p.peek() == Some(&NodeKind::Arrow) { params(p, &marker, true); marker.end(p, NodeKind::ClosureParams); p.eat_assert(&NodeKind::Arrow); return marker.perform(p, NodeKind::Closure, expr); } // Find out which kind of collection this is. match kind { CollectionKind::Group => marker.end(p, NodeKind::Group), CollectionKind::Positional => array(p, &marker), CollectionKind::Named => dict(p, &marker), } Ok(()) } /// The type of a collection. #[derive(Debug, Copy, Clone, Eq, PartialEq)] enum CollectionKind { /// The collection is only one item and has no comma. Group, /// The collection starts with a positional and has more items or a trailing /// comma. Positional, /// The collection starts with a named item. Named, } /// Parse a collection. /// /// Returns the length of the collection and whether the literal contained any /// commas. fn collection(p: &mut Parser) -> (CollectionKind, usize) { let mut items = 0; let mut kind = CollectionKind::Positional; let mut can_group = true; let mut missing_coma: Option = None; while !p.eof() { if let Ok(item_kind) = item(p) { if items == 0 && item_kind == NodeKind::Named { kind = CollectionKind::Named; can_group = false; } if item_kind == NodeKind::Spread { can_group = false; } items += 1; if let Some(marker) = missing_coma.take() { marker.expected_at(p, "comma"); } if p.eof() { break; } if p.eat_if(&NodeKind::Comma) { can_group = false; } else { missing_coma = Some(p.marker()); } } } if can_group && items == 1 { kind = CollectionKind::Group; } (kind, items) } /// Parse an expression or a named pair. Returns if this is a named pair. fn item(p: &mut Parser) -> ParseResult { let marker = p.marker(); if p.eat_if(&NodeKind::Dots) { marker.perform(p, NodeKind::Spread, expr)?; return Ok(NodeKind::Spread); } expr(p)?; if p.peek() == Some(&NodeKind::Colon) { marker.perform(p, NodeKind::Named, |p| { if matches!(marker.child_at(p).unwrap().kind(), &NodeKind::Ident(_)) { p.eat(); expr(p) } else { marker.end( p, NodeKind::Error(ErrorPosition::Full, "expected identifier".into()), ); p.eat(); expr(p).ok(); Err(()) } })?; Ok(NodeKind::Named) } else { Ok(p.last_child().unwrap().kind().clone()) } } /// Convert a collection into an array, producing errors for anything other than /// expressions. fn array(p: &mut Parser, marker: &Marker) { marker.filter_children(p, |x| match x.kind() { NodeKind::Named => Err(( ErrorPosition::Full, "expected expression, found named pair".into(), )), NodeKind::Spread => { Err((ErrorPosition::Full, "spreading is not allowed here".into())) } _ => Ok(()), }); marker.end(p, NodeKind::Array); } /// Convert a collection into a dictionary, producing errors for anything other /// than named pairs. fn dict(p: &mut Parser, marker: &Marker) { marker.filter_children(p, |x| match x.kind() { NodeKind::Named | NodeKind::Comma | NodeKind::Colon => Ok(()), NodeKind::Spread => { Err((ErrorPosition::Full, "spreading is not allowed here".into())) } _ if x.kind().is_paren() => Ok(()), _ => Err(( ErrorPosition::Full, "expected named pair, found expression".into(), )), }); marker.end(p, NodeKind::Dict); } /// Convert a collection into a list of parameters, producing errors for /// anything other than identifiers, spread operations and named pairs. fn params(p: &mut Parser, marker: &Marker, allow_parens: bool) { marker.filter_children(p, |x| match x.kind() { NodeKind::Named | NodeKind::Comma | NodeKind::Ident(_) => Ok(()), NodeKind::Spread if matches!( x.children().last().map(|x| x.kind()), Some(&NodeKind::Ident(_)) ) => { Ok(()) } _ if allow_parens && x.kind().is_paren() => Ok(()), _ => Err((ErrorPosition::Full, "expected identifier".into())), }); } // Parse a template block: `[...]`. fn template(p: &mut Parser) { p.perform(NodeKind::Template, |p| { p.start_group(Group::Bracket, TokenMode::Markup); markup(p); p.end_group(); }); } /// Parse a code block: `{...}`. fn block(p: &mut Parser) { p.perform(NodeKind::Block, |p| { p.start_group(Group::Brace, TokenMode::Code); while !p.eof() { p.start_group(Group::Stmt, TokenMode::Code); if expr(p).is_ok() && !p.eof() { p.expected_at("semicolon or line break"); } p.end_group(); // Forcefully skip over newlines since the group's contents can't. p.eat_while(|t| matches!(t, NodeKind::Space(_))); } p.end_group(); }); } /// Parse a function call. fn call(p: &mut Parser, callee: &Marker) -> ParseResult { callee.perform(p, NodeKind::Call, |p| match p.peek_direct() { Some(NodeKind::LeftParen) | Some(NodeKind::LeftBracket) => { args(p, true); Ok(()) } _ => { p.expected_at("argument list"); Err(()) } }) } /// Parse the arguments to a function call. fn args(p: &mut Parser, allow_template: bool) { p.perform(NodeKind::CallArgs, |p| { if !allow_template || p.peek_direct() == Some(&NodeKind::LeftParen) { p.start_group(Group::Paren, TokenMode::Code); collection(p); p.end_group(); } while allow_template && p.peek_direct() == Some(&NodeKind::LeftBracket) { template(p); } }) } /// Parse a with expression. fn with_expr(p: &mut Parser, marker: &Marker) -> ParseResult { marker.perform(p, NodeKind::WithExpr, |p| { p.eat_assert(&NodeKind::With); if p.peek() == Some(&NodeKind::LeftParen) { args(p, false); Ok(()) } else { p.expected("argument list"); Err(()) } }) } /// Parse a let expression. fn let_expr(p: &mut Parser) -> ParseResult { p.perform(NodeKind::LetExpr, |p| { p.eat_assert(&NodeKind::Let); let marker = p.marker(); ident(p)?; if p.peek() == Some(&NodeKind::With) { with_expr(p, &marker)?; } else { // If a parenthesis follows, this is a function definition. let has_params = p.peek_direct() == Some(&NodeKind::LeftParen); if has_params { p.perform(NodeKind::ClosureParams, |p| { p.start_group(Group::Paren, TokenMode::Code); let marker = p.marker(); collection(p); params(p, &marker, true); p.end_group(); }); } if p.eat_if(&NodeKind::Eq) { expr(p)?; } else if has_params { // Function definitions must have a body. p.expected_at("body"); } // Rewrite into a closure expression if it's a function definition. if has_params { marker.end(p, NodeKind::Closure); } } Ok(()) }) } /// Parse an if expresion. fn if_expr(p: &mut Parser) -> ParseResult { p.perform(NodeKind::IfExpr, |p| { p.eat_assert(&NodeKind::If); expr(p)?; body(p)?; if p.eat_if(&NodeKind::Else) { if p.peek() == Some(&NodeKind::If) { if_expr(p)?; } else { body(p)?; } } Ok(()) }) } /// Parse a while expresion. fn while_expr(p: &mut Parser) -> ParseResult { p.perform(NodeKind::WhileExpr, |p| { p.eat_assert(&NodeKind::While); expr(p)?; body(p)?; Ok(()) }) } /// Parse a for expression. fn for_expr(p: &mut Parser) -> ParseResult { p.perform(NodeKind::ForExpr, |p| { p.eat_assert(&NodeKind::For); for_pattern(p)?; p.eat_expect(&NodeKind::In)?; expr(p)?; body(p)?; Ok(()) }) } /// Parse a for loop pattern. fn for_pattern(p: &mut Parser) -> ParseResult { p.perform(NodeKind::ForPattern, |p| { ident(p)?; if p.eat_if(&NodeKind::Comma) { ident(p)?; } Ok(()) }) } /// Parse an import expression. fn import_expr(p: &mut Parser) -> ParseResult { p.perform(NodeKind::ImportExpr, |p| { p.eat_assert(&NodeKind::Import); if !p.eat_if(&NodeKind::Star) { // This is the list of identifiers scenario. p.perform(NodeKind::ImportItems, |p| { p.start_group(Group::Imports, TokenMode::Code); let marker = p.marker(); let items = collection(p).1; if items == 0 { p.expected_at("import items"); } p.end_group(); marker.filter_children(p, |n| match n.kind() { NodeKind::Ident(_) | NodeKind::Comma => Ok(()), _ => Err((ErrorPosition::Full, "expected identifier".into())), }); }); }; p.eat_expect(&NodeKind::From)?; expr(p)?; Ok(()) }) } /// Parse an include expression. fn include_expr(p: &mut Parser) -> ParseResult { p.perform(NodeKind::IncludeExpr, |p| { p.eat_assert(&NodeKind::Include); expr(p)?; Ok(()) }) } /// Parse an identifier. fn ident(p: &mut Parser) -> ParseResult { match p.peek() { Some(NodeKind::Ident(_)) => { p.eat(); Ok(()) } _ => { p.expected("identifier"); Err(()) } } } /// Parse a control flow body. fn body(p: &mut Parser) -> ParseResult { match p.peek() { Some(NodeKind::LeftBracket) => template(p), Some(NodeKind::LeftBrace) => block(p), _ => { p.expected_at("body"); return Err(()); } } Ok(()) }