Lists with indent-based parsing

- Unordered lists with indent-based parsing and basic layout using stacks
- Headings are now also indent based
- Removes syntax functions since they will be superseded by select & transform
This commit is contained in:
Laurenz 2021-06-09 00:37:13 +02:00
parent d69dfa84ec
commit 5afb42ad89
35 changed files with 710 additions and 817 deletions

View File

@ -2,7 +2,7 @@ use std::rc::Rc;
use super::{Scope, Scopes, Value};
use crate::syntax::visit::{visit_expr, Visit};
use crate::syntax::{Expr, Ident, Node};
use crate::syntax::{Expr, Ident};
/// A visitor that captures variable slots.
#[derive(Debug)]
@ -26,37 +26,20 @@ impl<'a> CapturesVisitor<'a> {
pub fn finish(self) -> Scope {
self.captures
}
/// Find out whether the name is not locally defined and if so if it can be
/// captured.
fn process(&mut self, name: &str) {
if self.internal.get(name).is_none() {
if let Some(slot) = self.external.get(name) {
self.captures.def_slot(name, Rc::clone(slot));
}
}
}
}
impl<'ast> Visit<'ast> for CapturesVisitor<'_> {
fn visit_node(&mut self, node: &'ast Node) {
match node {
Node::Text(_) => {}
Node::Space => {}
Node::Linebreak(_) => self.process(Node::LINEBREAK),
Node::Parbreak(_) => self.process(Node::PARBREAK),
Node::Strong(_) => self.process(Node::STRONG),
Node::Emph(_) => self.process(Node::EMPH),
Node::Heading(_) => self.process(Node::HEADING),
Node::Raw(_) => self.process(Node::RAW),
Node::Expr(expr) => self.visit_expr(expr),
}
}
fn visit_expr(&mut self, node: &'ast Expr) {
match node {
Expr::Ident(ident) => self.process(ident),
expr => visit_expr(self, expr),
if let Expr::Ident(ident) = node {
// Find out whether the name is not locally defined and if so if it
// can be captured.
if self.internal.get(ident).is_none() {
if let Some(slot) = self.external.get(ident) {
self.captures.def_slot(ident.as_str(), Rc::clone(slot));
}
}
} else {
visit_expr(self, node);
}
}

View File

@ -218,24 +218,23 @@ pub trait Eval {
}
impl Eval for Tree {
type Output = NodeMap;
type Output = ExprMap;
fn eval(&self, ctx: &mut EvalContext) -> Self::Output {
let mut map = NodeMap::new();
for node in self {
let value = if let Some(call) = node.desugar() {
call.eval(ctx)
} else if let Node::Expr(expr) = node {
expr.eval(ctx)
} else {
continue;
};
map.insert(node as *const _, value);
struct ExprVisitor<'a, 'b> {
ctx: &'a mut EvalContext<'b>,
map: ExprMap,
}
map
impl<'ast> Visit<'ast> for ExprVisitor<'_, '_> {
fn visit_expr(&mut self, node: &'ast Expr) {
self.map.insert(node as *const _, node.eval(self.ctx));
}
}
let mut visitor = ExprVisitor { ctx, map: ExprMap::new() };
visitor.visit_tree(self);
visitor.map
}
}

View File

@ -33,8 +33,7 @@ impl<'a> Scopes<'a> {
/// Exit the topmost scope.
///
/// # Panics
/// Panics if no scope was entered.
/// This panics if no scope was entered.
pub fn exit(&mut self) {
self.top = self.scopes.pop().expect("no pushed scope");
}

View File

@ -9,7 +9,7 @@ use super::EvalContext;
use crate::color::{Color, RgbaColor};
use crate::exec::ExecContext;
use crate::geom::{Angle, Length, Linear, Relative};
use crate::syntax::{Node, Span, Spanned, Tree};
use crate::syntax::{Expr, Span, Spanned, Tree};
/// A computational value.
#[derive(Debug, Clone, PartialEq)]
@ -148,7 +148,7 @@ pub enum TemplateNode {
/// The syntax tree of the corresponding template expression.
tree: Rc<Tree>,
/// The evaluated expressions for the `tree`.
map: NodeMap,
map: ExprMap,
},
/// A template that was converted from a string.
Str(String),
@ -163,13 +163,13 @@ impl PartialEq for TemplateNode {
}
}
/// A map from nodes to the values they evaluated to.
/// A map from expressions to the values they evaluated to.
///
/// The raw pointers point into the nodes contained in some [`Tree`]. Since the
/// lifetime is erased, the tree could go out of scope while the hash map still
/// lives. Although this could lead to lookup panics, it is not unsafe since the
/// pointers are never dereferenced.
pub type NodeMap = HashMap<*const Node, Value>;
/// The raw pointers point into the expressions contained in some [`Tree`].
/// Since the lifetime is erased, the tree could go out of scope while the hash
/// map still lives. Although this could lead to lookup panics, it is not unsafe
/// since the pointers are never dereferenced.
pub type ExprMap = HashMap<*const Expr, Value>;
/// A reference-counted dynamic template node that can implement custom
/// behaviour.

View File

@ -1,13 +1,13 @@
use std::mem;
use super::{Exec, FontFamily, State};
use super::{Exec, ExecWithMap, FontFamily, State};
use crate::diag::{Diag, DiagSet, Pass};
use crate::eval::TemplateValue;
use crate::eval::{ExprMap, TemplateValue};
use crate::geom::{Align, Dir, Gen, GenAxis, Length, Linear, Sides, Size};
use crate::layout::{
AnyNode, PadNode, PageRun, ParChild, ParNode, StackChild, StackNode, Tree,
};
use crate::syntax::Span;
use crate::syntax::{self, Span};
/// The context for execution.
pub struct ExecContext {
@ -48,12 +48,22 @@ impl ExecContext {
}
/// Execute a template and return the result as a stack node.
pub fn exec_template(&mut self, template: &TemplateValue) -> StackNode {
pub fn exec_template_stack(&mut self, template: &TemplateValue) -> StackNode {
self.exec_stack(|ctx| template.exec(ctx))
}
/// Execute a tree with a map and return the result as a stack node.
pub fn exec_tree_stack(&mut self, tree: &syntax::Tree, map: &ExprMap) -> StackNode {
self.exec_stack(|ctx| tree.exec_with_map(ctx, map))
}
/// Execute something and return the result as a stack node.
pub fn exec_stack(&mut self, f: impl FnOnce(&mut Self)) -> StackNode {
let snapshot = self.state.clone();
let page = self.page.take();
let stack = mem::replace(&mut self.stack, StackBuilder::new(&self.state));
template.exec(self);
f(self);
self.state = snapshot;
self.page = page;

View File

@ -9,10 +9,11 @@ pub use state::*;
use std::rc::Rc;
use crate::diag::Pass;
use crate::eval::{NodeMap, TemplateFunc, TemplateNode, TemplateValue, Value};
use crate::layout;
use crate::eval::{ExprMap, TemplateFunc, TemplateNode, TemplateValue, Value};
use crate::geom::{Dir, Gen};
use crate::layout::{self, FixedNode, StackChild, StackNode};
use crate::pretty::pretty;
use crate::syntax::*;
use crate::syntax;
/// Execute a template to produce a layout tree.
pub fn exec(template: &TemplateValue, state: State) -> Pass<layout::Tree> {
@ -33,30 +34,96 @@ pub trait Exec {
fn exec(&self, ctx: &mut ExecContext);
}
/// Execute a node with a node map that applies to it.
/// Execute a node with an expression map that applies to it.
pub trait ExecWithMap {
/// Execute the node.
fn exec_with_map(&self, ctx: &mut ExecContext, map: &NodeMap);
fn exec_with_map(&self, ctx: &mut ExecContext, map: &ExprMap);
}
impl ExecWithMap for Tree {
fn exec_with_map(&self, ctx: &mut ExecContext, map: &NodeMap) {
impl ExecWithMap for syntax::Tree {
fn exec_with_map(&self, ctx: &mut ExecContext, map: &ExprMap) {
for node in self {
node.exec_with_map(ctx, map);
}
}
}
impl ExecWithMap for Node {
fn exec_with_map(&self, ctx: &mut ExecContext, map: &NodeMap) {
impl ExecWithMap for syntax::Node {
fn exec_with_map(&self, ctx: &mut ExecContext, map: &ExprMap) {
match self {
Node::Text(text) => ctx.push_text(text),
Node::Space => ctx.push_word_space(),
_ => map[&(self as *const _)].exec(ctx),
Self::Text(text) => ctx.push_text(text),
Self::Space => ctx.push_word_space(),
Self::Linebreak(_) => ctx.linebreak(),
Self::Parbreak(_) => ctx.parbreak(),
Self::Strong(_) => ctx.state.font.strong ^= true,
Self::Emph(_) => ctx.state.font.emph ^= true,
Self::Raw(raw) => raw.exec(ctx),
Self::Heading(heading) => heading.exec_with_map(ctx, map),
Self::List(list) => list.exec_with_map(ctx, map),
Self::Expr(expr) => map[&(expr as *const _)].exec(ctx),
}
}
}
impl Exec for syntax::RawNode {
fn exec(&self, ctx: &mut ExecContext) {
if self.block {
ctx.parbreak();
}
let snapshot = ctx.state.clone();
ctx.set_monospace();
ctx.push_text(&self.text);
ctx.state = snapshot;
if self.block {
ctx.parbreak();
}
}
}
impl ExecWithMap for syntax::HeadingNode {
fn exec_with_map(&self, ctx: &mut ExecContext, map: &ExprMap) {
let snapshot = ctx.state.clone();
let upscale = 1.6 - 0.1 * self.level as f64;
ctx.state.font.scale *= upscale;
ctx.state.font.strong = true;
self.body.exec_with_map(ctx, map);
ctx.state = snapshot;
ctx.parbreak();
}
}
impl ExecWithMap for syntax::ListNode {
fn exec_with_map(&self, ctx: &mut ExecContext, map: &ExprMap) {
ctx.parbreak();
let bullet = ctx.exec_stack(|ctx| ctx.push_text(""));
let body = ctx.exec_tree_stack(&self.body, map);
let stack = StackNode {
dirs: Gen::new(Dir::TTB, ctx.state.lang.dir),
aspect: None,
children: vec![
StackChild::Any(bullet.into(), Gen::default()),
StackChild::Spacing(ctx.state.font.resolve_size() / 2.0),
StackChild::Any(body.into(), Gen::default()),
],
};
ctx.push(FixedNode {
width: None,
height: None,
child: stack.into(),
});
ctx.parbreak();
}
}
impl Exec for Value {
fn exec(&self, ctx: &mut ExecContext) {
match self {

View File

@ -7,7 +7,7 @@ use crate::geom::*;
use crate::layout::Fill;
use crate::paper::{Paper, PaperClass, PAPER_A4};
/// The evaluation state.
/// The execution state.
#[derive(Debug, Clone, PartialEq)]
pub struct State {
/// The current language-related settings.

View File

@ -1,170 +0,0 @@
use super::*;
use crate::syntax::{HeadingNode, RawNode};
/// `linebreak`: Start a new line.
///
/// # Syntax
/// This function has dedicated syntax:
/// ```typst
/// This line ends here, \
/// And a new one begins.
/// ```
///
/// # Return value
/// A template that inserts a line break.
pub fn linebreak(_: &mut EvalContext, _: &mut FuncArgs) -> Value {
Value::template(Node::LINEBREAK, move |ctx| {
ctx.linebreak();
})
}
/// `parbreak`: Start a new paragraph.
///
/// # Return value
/// A template that inserts a paragraph break.
pub fn parbreak(_: &mut EvalContext, _: &mut FuncArgs) -> Value {
Value::template(Node::PARBREAK, move |ctx| {
ctx.parbreak();
})
}
/// `strong`: Strong text.
///
/// # Syntax
/// This function has dedicated syntax.
/// ```typst
/// This is *important*!
/// ```
///
/// # Positional parameters
/// - Body: optional, of type `template`.
///
/// # Return value
/// A template that flips the boldness of text. The effect is scoped to the
/// body if present.
pub fn strong(ctx: &mut EvalContext, args: &mut FuncArgs) -> Value {
let body = args.eat::<TemplateValue>(ctx);
Value::template(Node::STRONG, move |ctx| {
let snapshot = ctx.state.clone();
ctx.state.font.strong ^= true;
if let Some(body) = &body {
body.exec(ctx);
ctx.state = snapshot;
}
})
}
/// `emph`: Emphasized text.
///
/// # Syntax
/// This function has dedicated syntax.
/// ```typst
/// I would have _never_ thought so!
/// ```
///
/// # Positional parameters
/// - Body: optional, of type `template`.
///
/// # Return value
/// A template that flips whether text is set in italics. The effect is scoped
/// to the body if present.
pub fn emph(ctx: &mut EvalContext, args: &mut FuncArgs) -> Value {
let body = args.eat::<TemplateValue>(ctx);
Value::template(Node::EMPH, move |ctx| {
let snapshot = ctx.state.clone();
ctx.state.font.emph ^= true;
if let Some(body) = &body {
body.exec(ctx);
ctx.state = snapshot;
}
})
}
/// `heading`: A section heading.
///
/// # Syntax
/// This function has dedicated syntax.
/// ```typst
/// = Section
/// ...
///
/// == Subsection
/// ...
/// ```
///
/// # Positional parameters
/// - Body, of type `template`.
///
/// # Named parameters
/// - Section depth: `level`, of type `integer` between 1 and 6.
///
/// # Return value
/// A template that sets the body as a section heading, that is, large and in
/// bold.
pub fn heading(ctx: &mut EvalContext, args: &mut FuncArgs) -> Value {
let level = args.eat_named(ctx, HeadingNode::LEVEL).unwrap_or(1);
let body = args
.eat_expect::<TemplateValue>(ctx, HeadingNode::BODY)
.unwrap_or_default();
Value::template(Node::HEADING, move |ctx| {
let snapshot = ctx.state.clone();
let upscale = 1.6 - 0.1 * level as f64;
ctx.state.font.scale *= upscale;
ctx.state.font.strong = true;
body.exec(ctx);
ctx.state = snapshot;
ctx.parbreak();
})
}
/// `raw`: Raw text.
///
/// # Syntax
/// This function has dedicated syntax:
/// - For inline-level raw text:
/// ```typst
/// `...`
/// ```
/// - For block-level raw text:
/// ````typst
/// ```rust
/// println!("Hello World!");
/// ```
/// ````
///
/// # Positional parameters
/// - Text, of type `string`.
///
/// # Named parameters
/// - Language for syntax highlighting: `lang`, of type `string`.
/// - Whether the item is block level (split in its own paragraph): `block`, of
/// type `boolean`.
///
/// # Return value
/// A template that sets the text raw, that is, in monospace and optionally with
/// syntax highlighting.
pub fn raw(ctx: &mut EvalContext, args: &mut FuncArgs) -> Value {
let text = args.eat_expect::<String>(ctx, RawNode::TEXT).unwrap_or_default();
let _lang = args.eat_named::<String>(ctx, RawNode::LANG);
let block = args.eat_named(ctx, RawNode::BLOCK).unwrap_or(false);
Value::template(Node::RAW, move |ctx| {
if block {
ctx.parbreak();
}
let snapshot = ctx.state.clone();
ctx.set_monospace();
ctx.push_text(&text);
ctx.state = snapshot;
if block {
ctx.parbreak();
}
})
}

View File

@ -8,7 +8,6 @@ mod basic;
mod font;
mod image;
mod lang;
mod markup;
mod math;
mod pad;
mod page;
@ -22,7 +21,6 @@ pub use align::*;
pub use basic::*;
pub use font::*;
pub use lang::*;
pub use markup::*;
pub use math::*;
pub use pad::*;
pub use page::*;
@ -38,20 +36,12 @@ use crate::eval::{EvalContext, FuncArgs, Scope, TemplateValue, Value};
use crate::exec::{Exec, FontFamily};
use crate::font::{FontStyle, FontWeight, VerticalFontMetric};
use crate::geom::*;
use crate::syntax::{Node, Spanned};
use crate::syntax::Spanned;
/// Construct a scope containing all standard library definitions.
pub fn new() -> Scope {
let mut std = Scope::new();
// Syntax functions.
std.def_func(Node::LINEBREAK, linebreak);
std.def_func(Node::PARBREAK, parbreak);
std.def_func(Node::STRONG, strong);
std.def_func(Node::EMPH, emph);
std.def_func(Node::HEADING, heading);
std.def_func(Node::RAW, raw);
// Library functions.
std.def_func("align", align);
std.def_func("circle", circle);

View File

@ -31,7 +31,7 @@ pub fn pad(ctx: &mut EvalContext, args: &mut FuncArgs) -> Value {
);
Value::template("pad", move |ctx| {
let child = ctx.exec_template(&body).into();
let child = ctx.exec_template_stack(&body).into();
ctx.push(PadNode { padding, child });
})
}

View File

@ -61,7 +61,7 @@ fn rect_impl(
body: TemplateValue,
) -> Value {
Value::template(name, move |ctx| {
let mut stack = ctx.exec_template(&body);
let mut stack = ctx.exec_template_stack(&body);
stack.aspect = aspect;
let fixed = FixedNode { width, height, child: stack.into() };
@ -137,7 +137,7 @@ fn ellipse_impl(
// perfectly into the ellipse.
const PAD: f64 = 0.5 - SQRT_2 / 4.0;
let mut stack = ctx.exec_template(&body);
let mut stack = ctx.exec_template_stack(&body);
stack.aspect = aspect;
let fixed = FixedNode {

View File

@ -26,7 +26,7 @@ pub fn stack(ctx: &mut EvalContext, args: &mut FuncArgs) -> Value {
let children = children
.iter()
.map(|child| {
let child = ctx.exec_template(child).into();
let child = ctx.exec_template_stack(child).into();
StackChild::Any(child, ctx.state.aligns)
})
.collect();

View File

@ -32,6 +32,8 @@ impl<'s> LineMap<'s> {
let start = self.line_starts.get(line_index)?;
let head = self.src.get(start.to_usize() .. pos.to_usize())?;
// TODO: What about tabs?
let column_index = head.chars().count();
Some(Location {
@ -52,12 +54,14 @@ impl<'s> LineMap<'s> {
let line = self.src.get(line_start.to_usize() .. line_end)?;
// Find the index in the line. For the first column, the index is always zero. For
// other columns, we have to look at which byte the char directly before the
// column in question ends. We can't do `nth(column_idx)` directly since the
// column may be behind the last char.
// Find the index in the line. For the first column, the index is always
// zero. For other columns, we have to look at which byte the char
// directly before the column in question ends. We can't do
// `nth(column_idx)` directly since the column may be behind the last
// char.
let column_idx = location.column.checked_sub(1)? as usize;
let line_offset = if let Some(prev_idx) = column_idx.checked_sub(1) {
// TODO: What about tabs?
let (idx, prev) = line.char_indices().nth(prev_idx)?;
idx + prev.len_utf8()
} else {
@ -68,6 +72,22 @@ impl<'s> LineMap<'s> {
}
}
/// Determine the column at the end of the string.
pub fn search_column(src: &str) -> usize {
let mut column = 0;
for c in src.chars().rev() {
if is_newline(c) {
break;
} else if c == '\t' {
// TODO: How many columns per tab?
column += 2;
} else {
column += 1;
}
}
column
}
/// Whether this character denotes a newline.
pub fn is_newline(character: char) -> bool {
matches!(

View File

@ -25,14 +25,32 @@ pub fn parse(src: &str) -> Pass<Tree> {
/// Parse a syntax tree.
fn tree(p: &mut Parser) -> Tree {
tree_while(p, |_| true)
}
/// Parse a syntax tree that stays right of the column at the start of the next
/// non-whitespace token.
fn tree_indented(p: &mut Parser) -> Tree {
p.skip_white();
let column = p.column(p.next_start());
tree_while(p, |p| match p.peek() {
Some(Token::Space(n)) if n >= 1 => p.column(p.next_end()) >= column,
_ => true,
})
}
/// Parse a syntax tree.
fn tree_while(p: &mut Parser, mut f: impl FnMut(&mut Parser) -> bool) -> Tree {
// We keep track of whether we are at the start of a block or paragraph
// to know whether headings are allowed.
// to know whether things like headings are allowed.
let mut at_start = true;
let mut tree = vec![];
while !p.eof() {
while !p.eof() && f(p) {
if let Some(node) = node(p, &mut at_start) {
if !matches!(node, Node::Parbreak(_) | Node::Space) {
at_start = false;
match node {
Node::Space => {}
Node::Parbreak(_) => {}
_ => at_start = false,
}
tree.push(node);
}
@ -57,10 +75,16 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> {
// Text.
Token::Text(text) => Node::Text(text.into()),
Token::Tilde => Node::Text("\u{00A0}".into()),
Token::HyphHyph => Node::Text("\u{2013}".into()),
Token::HyphHyphHyph => Node::Text("\u{2014}".into()),
Token::UnicodeEscape(t) => Node::Text(unicode_escape(p, t)),
// Markup.
Token::Backslash => Node::Linebreak(span),
Token::Star => Node::Strong(span),
Token::Underscore => Node::Emph(span),
Token::Raw(t) => raw(p, t),
Token::Hashtag => {
if *at_start {
return Some(heading(p));
@ -68,10 +92,13 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> {
Node::Text(p.peek_src().into())
}
}
Token::Tilde => Node::Text("\u{00A0}".into()),
Token::Backslash => Node::Linebreak(span),
Token::Raw(t) => raw(p, t),
Token::UnicodeEscape(t) => Node::Text(unicode_escape(p, t)),
Token::Hyph => {
if *at_start {
return Some(list(p));
} else {
Node::Text(p.peek_src().into())
}
}
// Hashtag + keyword / identifier.
Token::Ident(_)
@ -81,31 +108,27 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> {
| Token::For
| Token::Import
| Token::Include => {
*at_start = false;
let stmt = matches!(token, Token::Let | Token::Import);
let group = if stmt { Group::Stmt } else { Group::Expr };
p.start_group(group, TokenMode::Code);
let expr = expr_with(p, true, 0);
if stmt && expr.is_some() && !p.eof() {
p.expected_at("semicolon or line break", p.end());
p.expected_at("semicolon or line break", p.prev_end());
}
p.end_group();
// Uneat spaces we might have eaten eagerly.
p.jump(p.end());
return expr.map(Node::Expr);
}
// Block.
Token::LeftBrace => {
*at_start = false;
return Some(Node::Expr(block(p, false)));
}
// Template.
Token::LeftBracket => {
*at_start = false;
return Some(Node::Expr(template(p)));
}
@ -125,33 +148,22 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> {
Some(node)
}
/// Parse a heading.
fn heading(p: &mut Parser) -> Node {
let start = p.start();
p.assert(Token::Hashtag);
/// Handle a unicode escape sequence.
fn unicode_escape(p: &mut Parser, token: UnicodeEscapeToken) -> String {
let span = p.peek_span();
let text = if let Some(c) = resolve::resolve_hex(token.sequence) {
c.to_string()
} else {
// Print out the escape sequence verbatim if it is invalid.
p.diag(error!(span, "invalid unicode escape sequence"));
p.peek_src().into()
};
// Count depth.
let mut level: usize = 1;
while p.eat_if(Token::Hashtag) {
level += 1;
if !token.terminated {
p.diag(error!(span.end, "expected closing brace"));
}
if level > 6 {
p.diag(warning!(start .. p.end(), "should not exceed depth 6"));
level = 6;
}
// Parse the heading contents.
let mut tree = vec![];
while p.check(|t| !matches!(t, Token::Space(n) if n >= 1)) {
tree.extend(node(p, &mut false));
}
Node::Heading(HeadingNode {
span: p.span(start),
level,
contents: Rc::new(tree),
})
text
}
/// Handle a raw block.
@ -164,22 +176,37 @@ fn raw(p: &mut Parser, token: RawToken) -> Node {
Node::Raw(raw)
}
/// Handle a unicode escape sequence.
fn unicode_escape(p: &mut Parser, token: UnicodeEscapeToken) -> String {
let span = p.peek_span();
let text = if let Some(c) = resolve::resolve_hex(token.sequence) {
c.to_string()
} else {
// Print out the escape sequence verbatim if it is invalid.
p.diag(error!(span, "invalid unicode escape sequence"));
p.get(span).into()
};
/// Parse a heading.
fn heading(p: &mut Parser) -> Node {
let start = p.next_start();
p.assert(Token::Hashtag);
if !token.terminated {
p.diag(error!(span.end, "expected closing brace"));
// Count depth.
let mut level: usize = 1;
while p.eat_if(Token::Hashtag) {
level += 1;
}
text
if level > 6 {
p.diag(warning!(start .. p.prev_end(), "should not exceed depth 6"));
level = 6;
}
let body = tree_indented(p);
Node::Heading(HeadingNode {
span: p.span(start),
level,
body: Rc::new(body),
})
}
/// Parse a single list item.
fn list(p: &mut Parser) -> Node {
let start = p.next_start();
p.assert(Token::Hyph);
let body = tree_indented(p);
Node::List(ListNode { span: p.span(start), body })
}
/// Parse an expression.
@ -195,7 +222,7 @@ fn expr(p: &mut Parser) -> Option<Expr> {
///
/// Stops parsing at operations with lower precedence than `min_prec`,
fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> Option<Expr> {
let start = p.start();
let start = p.next_start();
let mut lhs = match p.eat_map(UnOp::from_token) {
Some(op) => {
let prec = op.precedence();
@ -383,7 +410,7 @@ fn collection(p: &mut Parser) -> (Vec<CallArg>, bool) {
break;
}
let behind = p.end();
let behind = p.prev_end();
if p.eat_if(Token::Comma) {
has_comma = true;
} else {
@ -467,7 +494,7 @@ fn block(p: &mut Parser, scoping: bool) -> Expr {
if let Some(expr) = expr(p) {
exprs.push(expr);
if !p.eof() {
p.expected_at("semicolon or line break", p.end());
p.expected_at("semicolon or line break", p.prev_end());
}
}
p.end_group();
@ -506,14 +533,14 @@ fn call(p: &mut Parser, callee: Expr) -> Expr {
/// Parse the arguments to a function call.
fn args(p: &mut Parser) -> CallArgs {
let start = p.start();
let start = p.next_start();
let items = collection(p).0;
CallArgs { span: p.span(start), items }
}
/// Parse a let expression.
fn expr_let(p: &mut Parser) -> Option<Expr> {
let start = p.start();
let start = p.next_start();
p.assert(Token::Let);
let mut expr_let = None;
@ -532,7 +559,7 @@ fn expr_let(p: &mut Parser) -> Option<Expr> {
init = expr(p);
} else if params.is_some() {
// Function definitions must have a body.
p.expected_at("body", p.end());
p.expected_at("body", p.prev_end());
}
// Rewrite into a closure expression if it's a function definition.
@ -558,7 +585,7 @@ fn expr_let(p: &mut Parser) -> Option<Expr> {
/// Parse an if expresion.
fn expr_if(p: &mut Parser) -> Option<Expr> {
let start = p.start();
let start = p.next_start();
p.assert(Token::If);
let mut expr_if = None;
@ -589,7 +616,7 @@ fn expr_if(p: &mut Parser) -> Option<Expr> {
/// Parse a while expresion.
fn expr_while(p: &mut Parser) -> Option<Expr> {
let start = p.start();
let start = p.next_start();
p.assert(Token::While);
let mut expr_while = None;
@ -608,7 +635,7 @@ fn expr_while(p: &mut Parser) -> Option<Expr> {
/// Parse a for expression.
fn expr_for(p: &mut Parser) -> Option<Expr> {
let start = p.start();
let start = p.next_start();
p.assert(Token::For);
let mut expr_for = None;
@ -643,7 +670,7 @@ fn for_pattern(p: &mut Parser) -> Option<ForPattern> {
/// Parse an import expression.
fn expr_import(p: &mut Parser) -> Option<Expr> {
let start = p.start();
let start = p.next_start();
p.assert(Token::Import);
let mut expr_import = None;
@ -657,7 +684,7 @@ fn expr_import(p: &mut Parser) -> Option<Expr> {
p.start_group(Group::Expr, TokenMode::Code);
let items = collection(p).0;
if items.is_empty() {
p.expected_at("import items", p.end());
p.expected_at("import items", p.prev_end());
}
let idents = idents(p, items);
@ -680,7 +707,7 @@ fn expr_import(p: &mut Parser) -> Option<Expr> {
/// Parse an include expression.
fn expr_include(p: &mut Parser) -> Option<Expr> {
let start = p.start();
let start = p.next_start();
p.assert(Token::Include);
expr(p).map(|path| {
@ -710,7 +737,7 @@ fn body(p: &mut Parser) -> Option<Expr> {
Some(Token::LeftBracket) => Some(template(p)),
Some(Token::LeftBrace) => Some(block(p, true)),
_ => {
p.expected_at("body", p.end());
p.expected_at("body", p.prev_end());
None
}
}

View File

@ -1,6 +1,7 @@
use std::fmt::{self, Debug, Formatter};
use std::ops::Range;
use super::{Scanner, TokenMode, Tokens};
use super::{search_column, TokenMode, Tokens};
use crate::diag::{Diag, DiagSet};
use crate::syntax::{Pos, Span, Token};
@ -17,10 +18,10 @@ pub struct Parser<'s> {
/// The peeked token.
/// (Same as `next` except if we are at the end of group, then `None`).
peeked: Option<Token<'s>>,
/// The start position of the peeked token.
next_start: Pos,
/// The end position of the last (non-whitespace if in code mode) token.
last_end: Pos,
prev_end: usize,
/// The start position of the peeked token.
next_start: usize,
}
/// A logical group of tokens, e.g. `[...]`.
@ -28,7 +29,7 @@ pub struct Parser<'s> {
struct GroupEntry {
/// The start position of the group. Used by `Parser::end_group` to return
/// The group's full span.
pub start: Pos,
pub start: usize,
/// The kind of group this is. This decides which tokens will end the group.
/// For example, a [`Group::Paren`] will be ended by
/// [`Token::RightParen`].
@ -59,12 +60,12 @@ impl<'s> Parser<'s> {
let next = tokens.next();
Self {
diags: DiagSet::new(),
next,
tokens,
last_end: Pos::ZERO,
peeked: next,
next_start: Pos::ZERO,
groups: vec![],
next,
peeked: next,
prev_end: 0,
next_start: 0,
}
}
@ -76,9 +77,9 @@ impl<'s> Parser<'s> {
/// Eat the next token and add a diagnostic that it is not the expected
/// `thing`.
pub fn expected(&mut self, what: &str) {
let before = self.next_start;
let before = self.next_start();
if let Some(found) = self.eat() {
let after = self.last_end;
let after = self.prev_end();
self.diag(error!(
before .. after,
"expected {}, found {}",
@ -86,20 +87,20 @@ impl<'s> Parser<'s> {
found.name(),
));
} else {
self.expected_at(what, self.next_start);
self.expected_at(what, self.next_start());
}
}
/// Add a diagnostic that `what` was expected at the given position.
pub fn expected_at(&mut self, what: &str, pos: Pos) {
self.diag(error!(pos, "expected {}", what));
pub fn expected_at(&mut self, what: &str, pos: impl Into<Pos>) {
self.diag(error!(pos.into(), "expected {}", what));
}
/// Eat the next token and add a diagnostic that it is unexpected.
pub fn unexpected(&mut self) {
let before = self.next_start;
let before = self.next_start();
if let Some(found) = self.eat() {
let after = self.last_end;
let after = self.prev_end();
self.diag(error!(before .. after, "unexpected {}", found.name()));
}
}
@ -110,11 +111,10 @@ impl<'s> Parser<'s> {
/// `eat()` and `peek()` return `None`. Parsing can only continue with
/// a matching call to `end_group`.
///
/// # Panics
/// This panics if the next token does not start the given group.
pub fn start_group(&mut self, kind: Group, mode: TokenMode) {
self.groups.push(GroupEntry {
start: self.next_start,
start: self.next_start(),
kind,
outer_mode: self.tokens.mode(),
});
@ -133,7 +133,6 @@ impl<'s> Parser<'s> {
/// End the parsing of a group.
///
/// # Panics
/// This panics if no group was started.
pub fn end_group(&mut self) -> Span {
let prev_mode = self.tokens.mode();
@ -156,17 +155,16 @@ impl<'s> Parser<'s> {
self.bump();
rescan = false;
} else if required {
self.diag(error!(self.next_start, "expected {}", end.name()));
self.diag(error!(self.next_start(), "expected {}", end.name()));
}
}
// Rescan the peeked token if the mode changed.
if rescan {
self.tokens.jump(self.last_end);
self.bump();
self.jump(self.prev_end());
}
Span::new(group.start, self.last_end)
Span::new(group.start, self.prev_end())
}
/// The tokenization mode outside of the current group.
@ -193,7 +191,7 @@ impl<'s> Parser<'s> {
/// Peek at the next token if it follows immediately after the last one
/// without any whitespace in between.
pub fn peek_direct(&self) -> Option<Token<'s>> {
if self.next_start == self.last_end {
if self.next_start() == self.prev_end() {
self.peeked
} else {
None
@ -204,15 +202,17 @@ impl<'s> Parser<'s> {
///
/// Has length zero if `peek()` returns `None`.
pub fn peek_span(&self) -> Span {
Span::new(
self.next_start,
if self.eof() { self.next_start } else { self.tokens.pos() },
)
self.peek_range().into()
}
/// Peek at the source of the next token.
pub fn peek_src(&self) -> &'s str {
self.get(self.peek_span())
self.tokens.scanner().get(self.peek_range())
}
/// Peek at the source range (start and end index) of the next token.
pub fn peek_range(&self) -> Range<usize> {
self.next_start() .. self.next_end()
}
/// Checks whether the next token fulfills a condition.
@ -255,11 +255,11 @@ impl<'s> Parser<'s> {
mapped
}
/// Eat the next token and return its span.
/// Eat the next token and return its source range.
pub fn eat_span(&mut self) -> Span {
let start = self.next_start;
let start = self.next_start();
self.eat();
Span::new(start, self.last_end)
Span::new(start, self.prev_end())
}
/// Consume the next token if it is the given one and produce a diagnostic
@ -267,7 +267,7 @@ impl<'s> Parser<'s> {
pub fn expect(&mut self, t: Token) -> bool {
let eaten = self.eat_if(t);
if !eaten {
self.expected_at(t.name(), self.last_end);
self.expected_at(t.name(), self.prev_end());
}
eaten
}
@ -290,45 +290,48 @@ impl<'s> Parser<'s> {
}
}
/// The position at which the next token starts.
pub fn start(&self) -> Pos {
/// The index at which the last token ended.
///
/// Refers to the end of the last _non-whitespace_ token in code mode.
pub fn prev_end(&self) -> usize {
self.prev_end
}
/// The index at which the next token starts.
pub fn next_start(&self) -> usize {
self.next_start
}
/// The position at which the last token ended.
/// The index at which the next token will end.
///
/// Refers to the end of the last _non-whitespace_ token in code mode.
pub fn end(&self) -> Pos {
self.last_end
/// Is the same as [`next_start()`][Self::next_start] if `peek()` returns
/// `None`.
pub fn next_end(&self) -> usize {
self.tokens.index()
}
/// The span from `start` to the end of the last token.
pub fn span(&self, start: Pos) -> Span {
Span::new(start, self.last_end)
/// Determine the column for the given index in the source.
pub fn column(&self, index: usize) -> usize {
search_column(self.tokens.scanner().get(.. index))
}
/// Jump to a position in the source string.
pub fn jump(&mut self, pos: Pos) {
self.tokens.jump(pos);
/// The span from `start` to [`self.prev_end()`](Self::prev_end).
pub fn span(&self, start: impl Into<Pos>) -> Span {
Span::new(start, self.prev_end())
}
/// Jump to an index in the string.
///
/// You need to know the correct column.
fn jump(&mut self, index: usize) {
self.tokens.jump(index);
self.bump();
}
/// Slice a part out of the source string.
pub fn get(&self, span: impl Into<Span>) -> &'s str {
self.tokens.scanner().get(span.into().to_range())
}
/// The underlying scanner.
pub fn scanner(&self) -> Scanner<'s> {
let mut scanner = self.tokens.scanner().clone();
scanner.jump(self.next_start.to_usize());
scanner
}
/// Move to the next token.
fn bump(&mut self) {
self.last_end = self.tokens.pos();
self.next_start = self.tokens.pos();
self.prev_end = self.tokens.index();
self.next_start = self.tokens.index();
self.next = self.tokens.next();
if self.tokens.mode() == TokenMode::Code {
@ -339,7 +342,7 @@ impl<'s> Parser<'s> {
Some(Token::BlockComment(_)) => true,
_ => false,
} {
self.next_start = self.tokens.pos();
self.next_start = self.tokens.index();
self.next = self.tokens.next();
}
}
@ -381,7 +384,8 @@ impl<'s> Parser<'s> {
impl Debug for Parser<'_> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let s = self.scanner();
let mut s = self.tokens.scanner();
s.jump(self.next_start());
write!(f, "Parser({}|{})", s.eaten(), s.rest())
}
}

View File

@ -2,7 +2,7 @@ use std::fmt::{self, Debug, Formatter};
use std::slice::SliceIndex;
/// A featureful char-based scanner.
#[derive(Clone)]
#[derive(Copy, Clone)]
pub struct Scanner<'s> {
src: &'s str,
index: usize,

View File

@ -38,20 +38,22 @@ impl<'s> Tokens<'s> {
self.mode = mode;
}
/// The position in the string at which the last token ends and next token
/// The index in the string at which the last token ends and next token
/// will start.
pub fn pos(&self) -> Pos {
self.s.index().into()
pub fn index(&self) -> usize {
self.s.index()
}
/// Jump to the given position.
pub fn jump(&mut self, pos: Pos) {
self.s.jump(pos.to_usize());
/// Jump to the given index in the string.
///
/// You need to know the correct column.
pub fn jump(&mut self, index: usize) {
self.s.jump(index);
}
/// The underlying scanner.
pub fn scanner(&self) -> &Scanner<'s> {
&self.s
pub fn scanner(&self) -> Scanner<'s> {
self.s
}
}
@ -62,11 +64,7 @@ impl<'s> Iterator for Tokens<'s> {
fn next(&mut self) -> Option<Self::Item> {
let start = self.s.index();
let c = self.s.eat()?;
// This never loops. It just exists to allow breaking out of it.
loop {
// Common elements.
return Some(match c {
Some(match c {
// Blocks and templates.
'[' => Token::LeftBracket,
']' => Token::RightBracket,
@ -84,25 +82,34 @@ impl<'s> Iterator for Tokens<'s> {
'/' if self.s.eat_if('*') => self.block_comment(),
'*' if self.s.eat_if('/') => Token::Invalid(self.s.eaten_from(start)),
_ => break,
});
// Other things.
_ => match self.mode {
TokenMode::Markup => self.markup(start, c),
TokenMode::Code => self.code(start, c),
},
})
}
}
Some(match self.mode {
TokenMode::Markup => match c {
impl<'s> Tokens<'s> {
fn markup(&mut self, start: usize, c: char) -> Token<'s> {
match c {
// Markup.
'~' => Token::Tilde,
'*' => Token::Star,
'_' => Token::Underscore,
'~' => Token::Tilde,
'\\' => self.backslash(),
'`' => self.raw(),
'$' => self.math(),
'\\' => self.backslash(),
'-' => self.hyph(start),
// Plain text.
_ => self.text(start),
},
}
}
TokenMode::Code => match c {
fn code(&mut self, start: usize, c: char) -> Token<'s> {
match c {
// Parens.
'(' => Token::LeftParen,
')' => Token::RightParen,
@ -145,43 +152,12 @@ impl<'s> Iterator for Tokens<'s> {
'"' => self.string(),
_ => Token::Invalid(self.s.eaten_from(start)),
},
})
}
}
impl<'s> Tokens<'s> {
fn hash(&mut self, start: usize) -> Token<'s> {
let read = self.s.eat_while(is_id_continue);
match self.mode {
TokenMode::Markup => {
if read.is_empty() {
return Token::Hashtag;
}
if let Some(token) = keyword(read) {
return token;
}
if read.chars().next().map_or(false, is_id_start) {
return Token::Ident(read);
}
}
TokenMode::Code => {
if let Ok(color) = RgbaColor::from_str(read) {
return Token::Color(color);
}
}
}
Token::Invalid(self.s.eaten_from(start))
}
fn whitespace(&mut self, first: char) -> Token<'s> {
// Fast path for just a single space
if first == ' ' && !self.s.check(|c| c.is_whitespace()) {
if first == ' ' && !self.s.check(char::is_whitespace) {
Token::Space(0)
} else {
self.s.uneat();
@ -210,12 +186,13 @@ impl<'s> Tokens<'s> {
c if c.is_whitespace() => true,
// Comments.
'/' if self.s.check(|c| c == '/' || c == '*') => true,
// Parenthesis and hashtag.
'[' | ']' | '{' | '}' | '#' => true,
// Parentheses.
'[' | ']' | '{' | '}' => true,
// Markup.
'*' | '_' | '=' | '~' | '`' | '$' => true,
'#' | '~' | '*' | '_' | '-' | '`' | '$' => true,
// Escaping.
'\\' => true,
// Just text.
_ => false,
} {
self.s.uneat();
@ -226,6 +203,77 @@ impl<'s> Tokens<'s> {
Token::Text(self.s.eaten_from(start))
}
fn backslash(&mut self) -> Token<'s> {
if let Some(c) = self.s.peek() {
match c {
// Backslash and comments.
'\\' | '/' |
// Parenthesis and hashtag.
'[' | ']' | '{' | '}' | '#' |
// Markup.
'*' | '_' | '=' | '~' | '`' | '$' => {
let start = self.s.index();
self.s.eat_assert(c);
Token::Text(&self.s.eaten_from(start))
}
'u' if self.s.peek_nth(1) == Some('{') => {
self.s.eat_assert('u');
self.s.eat_assert('{');
Token::UnicodeEscape(UnicodeEscapeToken {
// Allow more than `ascii_hexdigit` for better error recovery.
sequence: self.s.eat_while(|c| c.is_ascii_alphanumeric()),
terminated: self.s.eat_if('}'),
})
}
c if c.is_whitespace() => Token::Backslash,
_ => Token::Text("\\"),
}
} else {
Token::Backslash
}
}
fn hash(&mut self, start: usize) -> Token<'s> {
match self.mode {
TokenMode::Markup => {
if self.s.check(is_id_start) {
let read = self.s.eat_while(is_id_continue);
if let Some(keyword) = keyword(read) {
keyword
} else {
Token::Ident(read)
}
} else if self.s.check(|c| c != '#' && !c.is_whitespace()) {
Token::Text(self.s.eaten_from(start))
} else {
Token::Hashtag
}
}
TokenMode::Code => {
let read = self.s.eat_while(is_id_continue);
if let Ok(color) = RgbaColor::from_str(read) {
Token::Color(color)
} else {
Token::Invalid(self.s.eaten_from(start))
}
}
}
}
fn hyph(&mut self, start: usize) -> Token<'s> {
if self.s.eat_if('-') {
if self.s.eat_if('-') {
Token::HyphHyphHyph
} else {
Token::HyphHyph
}
} else if self.s.check(|c| !c.is_whitespace()) {
Token::Text(self.s.eaten_from(start))
} else {
Token::Hyph
}
}
fn raw(&mut self) -> Token<'s> {
let mut backticks = 1;
while self.s.eat_if('`') {
@ -295,36 +343,6 @@ impl<'s> Tokens<'s> {
})
}
fn backslash(&mut self) -> Token<'s> {
if let Some(c) = self.s.peek() {
match c {
// Backslash and comments.
'\\' | '/' |
// Parenthesis and hashtag.
'[' | ']' | '{' | '}' | '#' |
// Markup.
'*' | '_' | '=' | '~' | '`' | '$' => {
let start = self.s.index();
self.s.eat_assert(c);
Token::Text(&self.s.eaten_from(start))
}
'u' if self.s.peek_nth(1) == Some('{') => {
self.s.eat_assert('u');
self.s.eat_assert('{');
Token::UnicodeEscape(UnicodeEscapeToken {
// Allow more than `ascii_hexdigit` for better error recovery.
sequence: self.s.eat_while(|c| c.is_ascii_alphanumeric()),
terminated: self.s.eat_if('}'),
})
}
c if c.is_whitespace() => Token::Backslash,
_ => Token::Text("\\"),
}
} else {
Token::Backslash
}
}
fn ident(&mut self, start: usize) -> Token<'s> {
self.s.eat_while(is_id_continue);
match self.s.eaten_from(start) {
@ -474,6 +492,10 @@ mod tests {
use Token::{Ident, *};
use TokenMode::{Code, Markup};
const fn UnicodeEscape(sequence: &str, terminated: bool) -> Token {
Token::UnicodeEscape(UnicodeEscapeToken { sequence, terminated })
}
const fn Raw(text: &str, backticks: usize, terminated: bool) -> Token {
Token::Raw(RawToken { text, backticks, terminated })
}
@ -482,18 +504,14 @@ mod tests {
Token::Math(MathToken { formula, display, terminated })
}
const fn UnicodeEscape(sequence: &str, terminated: bool) -> Token {
Token::UnicodeEscape(UnicodeEscapeToken { sequence, terminated })
const fn Color(r: u8, g: u8, b: u8, a: u8) -> Token<'static> {
Token::Color(RgbaColor { r, g, b, a })
}
const fn Str(string: &str, terminated: bool) -> Token {
Token::Str(StrToken { string, terminated })
}
const fn Color(r: u8, g: u8, b: u8, a: u8) -> Token<'static> {
Token::Color(RgbaColor { r, g, b, a })
}
/// Building blocks for suffix testing.
///
/// We extend each test case with a collection of different suffixes to make
@ -605,6 +623,81 @@ mod tests {
t!(Code: ")" => RightParen);
}
#[test]
fn test_tokenize_whitespace() {
// Test basic whitespace.
t!(Both["a1/"]: "" => );
t!(Both["a1/"]: " " => Space(0));
t!(Both["a1/"]: " " => Space(0));
t!(Both["a1/"]: "\t" => Space(0));
t!(Both["a1/"]: " \t" => Space(0));
t!(Both["a1/"]: "\u{202F}" => Space(0));
// Test newline counting.
t!(Both["a1/"]: "\n" => Space(1));
t!(Both["a1/"]: "\n " => Space(1));
t!(Both["a1/"]: " \n" => Space(1));
t!(Both["a1/"]: " \n " => Space(1));
t!(Both["a1/"]: "\r\n" => Space(1));
t!(Both["a1/"]: " \n\t \n " => Space(2));
t!(Both["a1/"]: "\n\r" => Space(2));
t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
}
#[test]
fn test_tokenize_text() {
// Test basic text.
t!(Markup[" /"]: "hello" => Text("hello"));
t!(Markup[" /"]: "hello-world" => Text("hello"), Text("-"), Text("world"));
// Test code symbols in text.
t!(Markup[" /"]: "a():\"b" => Text("a():\"b"));
t!(Markup[" /"]: ";:,|/+" => Text(";:,|/+"));
t!(Markup[" /"]: "#-a" => Text("#"), Text("-"), Text("a"));
t!(Markup[" "]: "#123" => Text("#"), Text("123"));
// Test text ends.
t!(Markup[""]: "hello " => Text("hello"), Space(0));
t!(Markup[""]: "hello~" => Text("hello"), Tilde);
}
#[test]
fn test_tokenize_escape_sequences() {
// Test escapable symbols.
t!(Markup: r"\\" => Text(r"\"));
t!(Markup: r"\/" => Text("/"));
t!(Markup: r"\[" => Text("["));
t!(Markup: r"\]" => Text("]"));
t!(Markup: r"\{" => Text("{"));
t!(Markup: r"\}" => Text("}"));
t!(Markup: r"\*" => Text("*"));
t!(Markup: r"\_" => Text("_"));
t!(Markup: r"\=" => Text("="));
t!(Markup: r"\~" => Text("~"));
t!(Markup: r"\`" => Text("`"));
t!(Markup: r"\$" => Text("$"));
t!(Markup: r"\#" => Text("#"));
// Test unescapable symbols.
t!(Markup[" /"]: r"\a" => Text(r"\"), Text("a"));
t!(Markup[" /"]: r"\u" => Text(r"\"), Text("u"));
t!(Markup[" /"]: r"\1" => Text(r"\"), Text("1"));
t!(Markup[" /"]: r"\:" => Text(r"\"), Text(":"));
t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\""));
// Test basic unicode escapes.
t!(Markup: r"\u{}" => UnicodeEscape("", true));
t!(Markup: r"\u{2603}" => UnicodeEscape("2603", true));
t!(Markup: r"\u{P}" => UnicodeEscape("P", true));
// Test unclosed unicode escapes.
t!(Markup[" /"]: r"\u{" => UnicodeEscape("", false));
t!(Markup[" /"]: r"\u{1" => UnicodeEscape("1", false));
t!(Markup[" /"]: r"\u{26A4" => UnicodeEscape("26A4", false));
t!(Markup[" /"]: r"\u{1Q3P" => UnicodeEscape("1Q3P", false));
t!(Markup: r"\u{1🏕}" => UnicodeEscape("1", false), Text("🏕"), RightBrace);
}
#[test]
fn test_tokenize_markup_symbols() {
// Test markup tokens.
@ -612,8 +705,10 @@ mod tests {
t!(Markup: "_" => Underscore);
t!(Markup[""]: "###" => Hashtag, Hashtag, Hashtag);
t!(Markup["a1/"]: "# " => Hashtag, Space(0));
t!(Markup["a1/"]: "- " => Hyph, Space(0));
t!(Markup: "~" => Tilde);
t!(Markup[" "]: r"\" => Backslash);
t!(Markup["a "]: r"a--" => Text("a"), HyphHyph);
}
#[test]
@ -654,73 +749,34 @@ mod tests {
#[test]
fn test_tokenize_keywords() {
let keywords = [
// A list of a few (not all) keywords.
let list = [
("let", Let),
("if", If),
("else", Else),
("for", For),
("in", In),
("while", While),
("break", Break),
("continue", Continue),
("return", Return),
("import", Import),
];
for &(s, t) in &keywords {
for &(s, t) in &list {
t!(Markup[" "]: format!("#{}", s) => t);
t!(Markup[" "]: format!("#{0}#{0}", s) => t, t);
t!(Markup[" /"]: format!("# {}", s) => Token::Hashtag, Space(0), Text(s));
}
for &(s, t) in &keywords {
for &(s, t) in &list {
t!(Code[" "]: s => t);
t!(Markup[" /"]: s => Text(s));
}
// Test simple identifier.
t!(Markup[" "]: "#letter" => Ident("letter"));
t!(Markup[" "]: "#123" => Invalid("#123"));
t!(Code[" /"]: "falser" => Ident("falser"));
t!(Code[" /"]: "None" => Ident("None"));
t!(Code[" /"]: "True" => Ident("True"));
}
#[test]
fn test_tokenize_whitespace() {
// Test basic whitespace.
t!(Both["a1/"]: "" => );
t!(Both["a1/"]: " " => Space(0));
t!(Both["a1/"]: " " => Space(0));
t!(Both["a1/"]: "\t" => Space(0));
t!(Both["a1/"]: " \t" => Space(0));
t!(Both["a1/"]: "\u{202F}" => Space(0));
// Test newline counting.
t!(Both["a1/"]: "\n" => Space(1));
t!(Both["a1/"]: "\n " => Space(1));
t!(Both["a1/"]: " \n" => Space(1));
t!(Both["a1/"]: " \n " => Space(1));
t!(Both["a1/"]: "\r\n" => Space(1));
t!(Both["a1/"]: " \n\t \n " => Space(2));
t!(Both["a1/"]: "\n\r" => Space(2));
t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
}
#[test]
fn test_tokenize_text() {
// Test basic text.
t!(Markup[" /"]: "hello" => Text("hello"));
t!(Markup[" /"]: "hello-world" => Text("hello-world"));
// Test code symbols in text.
t!(Markup[" /"]: "a():\"b" => Text("a():\"b"));
t!(Markup[" /"]: ";:,|/+-" => Text(";:,|/+-"));
// Test text ends.
t!(Markup[""]: "hello " => Text("hello"), Space(0));
t!(Markup[""]: "hello~" => Text("hello"), Tilde);
}
#[test]
fn test_tokenize_raw_blocks() {
let empty = Raw("", 1, true);
@ -764,43 +820,6 @@ mod tests {
t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, false));
}
#[test]
fn test_tokenize_escape_sequences() {
// Test escapable symbols.
t!(Markup: r"\\" => Text(r"\"));
t!(Markup: r"\/" => Text("/"));
t!(Markup: r"\[" => Text("["));
t!(Markup: r"\]" => Text("]"));
t!(Markup: r"\{" => Text("{"));
t!(Markup: r"\}" => Text("}"));
t!(Markup: r"\*" => Text("*"));
t!(Markup: r"\_" => Text("_"));
t!(Markup: r"\=" => Text("="));
t!(Markup: r"\~" => Text("~"));
t!(Markup: r"\`" => Text("`"));
t!(Markup: r"\$" => Text("$"));
t!(Markup: r"\#" => Text("#"));
// Test unescapable symbols.
t!(Markup[" /"]: r"\a" => Text(r"\"), Text("a"));
t!(Markup[" /"]: r"\u" => Text(r"\"), Text("u"));
t!(Markup[" /"]: r"\1" => Text(r"\"), Text("1"));
t!(Markup[" /"]: r"\:" => Text(r"\"), Text(":"));
t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\""));
// Test basic unicode escapes.
t!(Markup: r"\u{}" => UnicodeEscape("", true));
t!(Markup: r"\u{2603}" => UnicodeEscape("2603", true));
t!(Markup: r"\u{P}" => UnicodeEscape("P", true));
// Test unclosed unicode escapes.
t!(Markup[" /"]: r"\u{" => UnicodeEscape("", false));
t!(Markup[" /"]: r"\u{1" => UnicodeEscape("1", false));
t!(Markup[" /"]: r"\u{26A4" => UnicodeEscape("26A4", false));
t!(Markup[" /"]: r"\u{1Q3P" => UnicodeEscape("1Q3P", false));
t!(Markup: r"\u{1🏕}" => UnicodeEscape("1", false), Text("🏕"), RightBrace);
}
#[test]
fn test_tokenize_idents() {
// Test valid identifiers.
@ -956,8 +975,7 @@ mod tests {
t!(Code: "1p%" => Invalid("1p"), Invalid("%"));
t!(Code: "1%%" => Percent(1.0), Invalid("%"));
// Test invalid keyword.
t!(Markup[" /"]: "#-" => Invalid("#-"));
// Test invalid color.
t!(Code[" /"]: r"#letter" => Invalid(r"#letter"));
}
}

View File

@ -17,8 +17,8 @@ where
p.finish()
}
/// Pretty print an item with a node map and return the resulting string.
pub fn pretty_with_map<T>(item: &T, map: &NodeMap) -> String
/// Pretty print an item with a expression map and return the resulting string.
pub fn pretty_with_map<T>(item: &T, map: &ExprMap) -> String
where
T: PrettyWithMap + ?Sized,
{
@ -33,10 +33,10 @@ pub trait Pretty {
fn pretty(&self, p: &mut Printer);
}
/// Pretty print an item with a node map that applies to it.
/// Pretty print an item with an expression map that applies to it.
pub trait PrettyWithMap {
/// Pretty print this item into the given printer.
fn pretty_with_map(&self, p: &mut Printer, map: Option<&NodeMap>);
fn pretty_with_map(&self, p: &mut Printer, map: Option<&ExprMap>);
}
impl<T> Pretty for T
@ -104,7 +104,7 @@ impl Write for Printer {
}
impl PrettyWithMap for Tree {
fn pretty_with_map(&self, p: &mut Printer, map: Option<&NodeMap>) {
fn pretty_with_map(&self, p: &mut Printer, map: Option<&ExprMap>) {
for node in self {
node.pretty_with_map(p, map);
}
@ -112,20 +112,21 @@ impl PrettyWithMap for Tree {
}
impl PrettyWithMap for Node {
fn pretty_with_map(&self, p: &mut Printer, map: Option<&NodeMap>) {
fn pretty_with_map(&self, p: &mut Printer, map: Option<&ExprMap>) {
match self {
// TODO: Handle escaping.
Self::Text(text) => p.push_str(text),
Self::Space => p.push(' '),
Self::Strong(_) => p.push('*'),
Self::Emph(_) => p.push('_'),
Self::Linebreak(_) => p.push_str(r"\"),
Self::Parbreak(_) => p.push_str("\n\n"),
Self::Heading(heading) => heading.pretty_with_map(p, map),
Self::Strong(_) => p.push('*'),
Self::Emph(_) => p.push('_'),
Self::Raw(raw) => raw.pretty(p),
Self::Heading(heading) => heading.pretty_with_map(p, map),
Self::List(list) => list.pretty_with_map(p, map),
Self::Expr(expr) => {
if let Some(map) = map {
let value = &map[&(self as *const _)];
let value = &map[&(expr as *const _)];
value.pretty(p);
} else {
if expr.has_short_form() {
@ -138,15 +139,6 @@ impl PrettyWithMap for Node {
}
}
impl PrettyWithMap for HeadingNode {
fn pretty_with_map(&self, p: &mut Printer, map: Option<&NodeMap>) {
for _ in 0 .. self.level {
p.push('#');
}
self.contents.pretty_with_map(p, map);
}
}
impl Pretty for RawNode {
fn pretty(&self, p: &mut Printer) {
// Find out how many backticks we need.
@ -203,6 +195,23 @@ impl Pretty for RawNode {
}
}
impl PrettyWithMap for HeadingNode {
fn pretty_with_map(&self, p: &mut Printer, map: Option<&ExprMap>) {
for _ in 0 .. self.level {
p.push('#');
}
p.push(' ');
self.body.pretty_with_map(p, map);
}
}
impl PrettyWithMap for ListNode {
fn pretty_with_map(&self, p: &mut Printer, map: Option<&ExprMap>) {
p.push_str("- ");
self.body.pretty_with_map(p, map);
}
}
impl Pretty for Expr {
fn pretty(&self, p: &mut Printer) {
match self {
@ -664,9 +673,8 @@ mod tests {
roundtrip("\\ ");
roundtrip("\n\n");
roundtrip("hi");
// Heading.
roundtrip("# *Ok*");
roundtrip("- Ok");
// Raw.
roundtrip("``");

View File

@ -17,70 +17,16 @@ pub enum Node {
Strong(Span),
/// Emphasized text was enabled / disabled: `_`.
Emph(Span),
/// A section heading: `= Introduction`.
Heading(HeadingNode),
/// A raw block with optional syntax highlighting: `` `...` ``.
Raw(RawNode),
/// A section heading: `= Introduction`.
Heading(HeadingNode),
/// A single list item: `- ...`.
List(ListNode),
/// An expression.
Expr(Expr),
}
impl Node {
// The names of the corresponding library functions.
pub const LINEBREAK: &'static str = "linebreak";
pub const PARBREAK: &'static str = "parbreak";
pub const STRONG: &'static str = "strong";
pub const EMPH: &'static str = "emph";
pub const HEADING: &'static str = "heading";
pub const RAW: &'static str = "raw";
/// Desugar markup into a function call.
pub fn desugar(&self) -> Option<CallExpr> {
match *self {
Self::Text(_) => None,
Self::Space => None,
Self::Linebreak(span) => Some(call(span, Self::LINEBREAK)),
Self::Parbreak(span) => Some(call(span, Self::PARBREAK)),
Self::Strong(span) => Some(call(span, Self::STRONG)),
Self::Emph(span) => Some(call(span, Self::EMPH)),
Self::Heading(ref heading) => Some(heading.desugar()),
Self::Raw(ref raw) => Some(raw.desugar()),
Self::Expr(_) => None,
}
}
}
/// A section heading: `= Introduction`.
#[derive(Debug, Clone, PartialEq)]
pub struct HeadingNode {
/// The source code location.
pub span: Span,
/// The section depth (numer of equals signs).
pub level: usize,
/// The contents of the heading.
pub contents: Rc<Tree>,
}
impl HeadingNode {
pub const LEVEL: &'static str = "level";
pub const BODY: &'static str = "body";
/// Desugar into a function call.
pub fn desugar(&self) -> CallExpr {
let Self { span, level, ref contents } = *self;
let mut call = call(span, Node::HEADING);
call.args.items.push(CallArg::Named(Named {
name: ident(span, Self::LEVEL),
expr: Expr::Int(span, level as i64),
}));
call.args.items.push(CallArg::Pos(Expr::Template(TemplateExpr {
span,
tree: Rc::clone(&contents),
})));
call
}
}
/// A raw block with optional syntax highlighting: `` `...` ``.
///
/// Raw blocks start with 1 or 3+ backticks and end with the same number of
@ -158,38 +104,22 @@ pub struct RawNode {
pub block: bool,
}
impl RawNode {
pub const LANG: &'static str = "lang";
pub const BLOCK: &'static str = "block";
pub const TEXT: &'static str = "text";
/// Desugar into a function call.
pub fn desugar(&self) -> CallExpr {
let Self { span, ref lang, ref text, block } = *self;
let mut call = call(span, Node::RAW);
if let Some(lang) = lang {
call.args.items.push(CallArg::Named(Named {
name: ident(span, Self::LANG),
expr: Expr::Str(span, lang.string.clone()),
}));
}
call.args.items.push(CallArg::Named(Named {
name: ident(span, Self::BLOCK),
expr: Expr::Bool(span, block),
}));
call.args.items.push(CallArg::Pos(Expr::Str(span, text.clone())));
call
}
/// A section heading: `= Introduction`.
#[derive(Debug, Clone, PartialEq)]
pub struct HeadingNode {
/// The source code location.
pub span: Span,
/// The section depth (numer of equals signs).
pub level: usize,
/// The contents of the heading.
pub body: Rc<Tree>,
}
fn call(span: Span, name: &str) -> CallExpr {
CallExpr {
span,
callee: Box::new(Expr::Ident(Ident { span, string: name.into() })),
args: CallArgs { span, items: vec![] },
}
}
fn ident(span: Span, string: &str) -> Ident {
Ident { span, string: string.into() }
/// A single list item: `- ...`.
#[derive(Debug, Clone, PartialEq)]
pub struct ListNode {
/// The source code location.
pub span: Span,
/// The contents of the list item.
pub body: Tree,
}

View File

@ -24,6 +24,10 @@ pub enum Token<'s> {
Hashtag,
/// A tilde: `~`.
Tilde,
/// Two hyphens: `--`.
HyphHyph,
/// Three hyphens: `---`.
HyphHyphHyph,
/// A backslash followed by nothing or whitespace: `\`.
Backslash,
/// A comma: `,`.
@ -103,15 +107,15 @@ pub enum Token<'s> {
Space(usize),
/// A consecutive non-markup string.
Text(&'s str),
/// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`.
UnicodeEscape(UnicodeEscapeToken<'s>),
/// An arbitrary number of backticks followed by inner contents, terminated
/// with the same number of backticks: `` `...` ``.
Raw(RawToken<'s>),
/// One or two dollar signs followed by inner contents, terminated with the
/// same number of dollar signs.
Math(MathToken<'s>),
/// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`.
UnicodeEscape(UnicodeEscapeToken<'s>),
/// An identifier: `center`.
Ident(&'s str),
/// A boolean: `true`, `false`.
@ -204,6 +208,8 @@ impl<'s> Token<'s> {
Self::Underscore => "underscore",
Self::Hashtag => "hashtag",
Self::Tilde => "tilde",
Self::HyphHyph => "en dash",
Self::HyphHyphHyph => "em dash",
Self::Backslash => "backslash",
Self::Comma => "comma",
Self::Semicolon => "semicolon",
@ -242,9 +248,9 @@ impl<'s> Token<'s> {
Self::Using => "keyword `using`",
Self::Space(_) => "space",
Self::Text(_) => "text",
Self::UnicodeEscape(_) => "unicode escape sequence",
Self::Raw(_) => "raw block",
Self::Math(_) => "math formula",
Self::UnicodeEscape(_) => "unicode escape sequence",
Self::Ident(_) => "identifier",
Self::Bool(_) => "boolean",
Self::Int(_) => "integer",

View File

@ -52,16 +52,25 @@ visit! {
match node {
Node::Text(_) => {}
Node::Space => {}
Node::Strong(_) => {}
Node::Linebreak(_) => {}
Node::Parbreak(_) => {}
Node::Strong(_) => {}
Node::Emph(_) => {}
Node::Heading(heading) => v.visit_tree(&heading.contents),
Node::Raw(_) => {}
Node::Expr(expr) => v.visit_expr(expr),
Node::Heading(n) => v.visit_heading(n),
Node::List(n) => v.visit_list(n),
Node::Expr(n) => v.visit_expr(n),
}
}
fn visit_heading(v, node: &HeadingNode) {
v.visit_tree(&node.body);
}
fn visit_list(v, node: &ListNode) {
v.visit_tree(&node.body);
}
fn visit_expr(v, node: &Expr) {
match node {
Expr::None(_) => {}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.3 KiB

After

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

After

Width:  |  Height:  |  Size: 7.3 KiB

BIN
tests/ref/markup/lists.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.0 KiB

After

Width:  |  Height:  |  Size: 7.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.3 KiB

After

Width:  |  Height:  |  Size: 2.3 KiB

View File

@ -13,7 +13,7 @@
// Expression as a file name.
#let chap2 = include "import" + "able/chap" + "2.typ"
_ -- Intermission -- _
-- _Intermission_ --
#chap2
{

View File

@ -1,20 +1,10 @@
// Test basic markup.
---
#let linebreak() = [
// Inside the old line break definition is still active.
#square(length: 3pt, fill: black) \
]
A \ B \ C
---
// Paragraph breaks don't exist!
#let parbreak() = [ ]
No more
paragraph breaks
Paragraph breaks
for you!

View File

@ -9,15 +9,3 @@ Partly em_phas_ized.
// Scoped to body.
#rect[_Scoped] to body.
---
#let emph = strong
_Strong_
#let emph() = "Hi"
_, _!
#let emph = "hi"
// Error: 1-2 expected function, found string
_

View File

@ -4,38 +4,21 @@
// Different number of hashtags.
// Valid levels.
# 1
### 2
###### 6
# Level 1
### Level 2
###### Level 6
// Too many hashtags.
// Warning: 1-8 should not exceed depth 6
####### 7
---
// Heading continuation over linebreak.
// Code blocks continue heading.
# A{
"B"
}
// Function call continues heading.
# #rect[
A
] B
// Without some kind of block, headings end at a line break.
# A
B
####### Level 7
---
// Heading vs. no heading.
// Parsed as headings if at start of the context.
/**/ # Ok
{[## Ok]}
#rect[### Ok]
/**/ # Level 1
{[## Level 2]}
#rect[### Level 3]
// Not at the start of the context.
No # heading
@ -44,9 +27,16 @@ No # heading
\# No heading
---
// Make small, but double heading.
#let heading(contents) = heading(contents + contents, level: 6)
// While indented at least as much as the start, the heading continues.
// The new heading's argument list doesn't contain `level`.
// Error: 1-11 unexpected argument
### Twice.
# This
is
indented.
# This
is not.
// Code blocks continue heading.
# A {
"B"
}

View File

@ -0,0 +1,45 @@
// Test lists.
---
_Shopping list_
- Apples
- Potatoes
- Juice
---
- First level.
- Second level.
There are multiple paragraphs.
- Third level.
Still the same bullet point.
- Still level 2.
- At the top.
---
- Works
- Also with four spaces
- Or two tabs
---
- Top-level indent
- is fine.
---
Tightly
- surrounded
- by two
paragraphs.
---
- A
- B
- C
- D
---
- Level 1
- Level [
2 through template
]

View File

@ -45,14 +45,6 @@ def hi():
print("Hi!")
```
---
// Make everything block-level.
#let raw(text) = raw(text, block: true)
// The new raw's argument list doesn't contain `block`.
// Error: 6-10 unexpected argument
This `is` block-level.
---
// Unterminated.
// Error: 2:1 expected backtick(s)

View File

@ -9,15 +9,3 @@ Partly str*ength*ened.
// Scoped to body.
#rect[*Scoped] to body.
---
#let strong = emph
*Emph*
#let strong() = "Bye"
*, *!
#let strong = 123
// Error: 1-2 expected function, found integer
*