Lists with indent-based parsing

- Unordered lists with indent-based parsing and basic layout using stacks
- Headings are now also indent based
- Removes syntax functions since they will be superseded by select & transform
This commit is contained in:
Laurenz 2021-06-09 00:37:13 +02:00
parent d69dfa84ec
commit 5afb42ad89
35 changed files with 710 additions and 817 deletions

View File

@ -2,7 +2,7 @@ use std::rc::Rc;
use super::{Scope, Scopes, Value}; use super::{Scope, Scopes, Value};
use crate::syntax::visit::{visit_expr, Visit}; use crate::syntax::visit::{visit_expr, Visit};
use crate::syntax::{Expr, Ident, Node}; use crate::syntax::{Expr, Ident};
/// A visitor that captures variable slots. /// A visitor that captures variable slots.
#[derive(Debug)] #[derive(Debug)]
@ -26,37 +26,20 @@ impl<'a> CapturesVisitor<'a> {
pub fn finish(self) -> Scope { pub fn finish(self) -> Scope {
self.captures self.captures
} }
/// Find out whether the name is not locally defined and if so if it can be
/// captured.
fn process(&mut self, name: &str) {
if self.internal.get(name).is_none() {
if let Some(slot) = self.external.get(name) {
self.captures.def_slot(name, Rc::clone(slot));
}
}
}
} }
impl<'ast> Visit<'ast> for CapturesVisitor<'_> { impl<'ast> Visit<'ast> for CapturesVisitor<'_> {
fn visit_node(&mut self, node: &'ast Node) {
match node {
Node::Text(_) => {}
Node::Space => {}
Node::Linebreak(_) => self.process(Node::LINEBREAK),
Node::Parbreak(_) => self.process(Node::PARBREAK),
Node::Strong(_) => self.process(Node::STRONG),
Node::Emph(_) => self.process(Node::EMPH),
Node::Heading(_) => self.process(Node::HEADING),
Node::Raw(_) => self.process(Node::RAW),
Node::Expr(expr) => self.visit_expr(expr),
}
}
fn visit_expr(&mut self, node: &'ast Expr) { fn visit_expr(&mut self, node: &'ast Expr) {
match node { if let Expr::Ident(ident) = node {
Expr::Ident(ident) => self.process(ident), // Find out whether the name is not locally defined and if so if it
expr => visit_expr(self, expr), // can be captured.
if self.internal.get(ident).is_none() {
if let Some(slot) = self.external.get(ident) {
self.captures.def_slot(ident.as_str(), Rc::clone(slot));
}
}
} else {
visit_expr(self, node);
} }
} }

View File

@ -218,24 +218,23 @@ pub trait Eval {
} }
impl Eval for Tree { impl Eval for Tree {
type Output = NodeMap; type Output = ExprMap;
fn eval(&self, ctx: &mut EvalContext) -> Self::Output { fn eval(&self, ctx: &mut EvalContext) -> Self::Output {
let mut map = NodeMap::new(); struct ExprVisitor<'a, 'b> {
ctx: &'a mut EvalContext<'b>,
for node in self { map: ExprMap,
let value = if let Some(call) = node.desugar() {
call.eval(ctx)
} else if let Node::Expr(expr) = node {
expr.eval(ctx)
} else {
continue;
};
map.insert(node as *const _, value);
} }
map impl<'ast> Visit<'ast> for ExprVisitor<'_, '_> {
fn visit_expr(&mut self, node: &'ast Expr) {
self.map.insert(node as *const _, node.eval(self.ctx));
}
}
let mut visitor = ExprVisitor { ctx, map: ExprMap::new() };
visitor.visit_tree(self);
visitor.map
} }
} }

View File

@ -33,8 +33,7 @@ impl<'a> Scopes<'a> {
/// Exit the topmost scope. /// Exit the topmost scope.
/// ///
/// # Panics /// This panics if no scope was entered.
/// Panics if no scope was entered.
pub fn exit(&mut self) { pub fn exit(&mut self) {
self.top = self.scopes.pop().expect("no pushed scope"); self.top = self.scopes.pop().expect("no pushed scope");
} }

View File

@ -9,7 +9,7 @@ use super::EvalContext;
use crate::color::{Color, RgbaColor}; use crate::color::{Color, RgbaColor};
use crate::exec::ExecContext; use crate::exec::ExecContext;
use crate::geom::{Angle, Length, Linear, Relative}; use crate::geom::{Angle, Length, Linear, Relative};
use crate::syntax::{Node, Span, Spanned, Tree}; use crate::syntax::{Expr, Span, Spanned, Tree};
/// A computational value. /// A computational value.
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
@ -148,7 +148,7 @@ pub enum TemplateNode {
/// The syntax tree of the corresponding template expression. /// The syntax tree of the corresponding template expression.
tree: Rc<Tree>, tree: Rc<Tree>,
/// The evaluated expressions for the `tree`. /// The evaluated expressions for the `tree`.
map: NodeMap, map: ExprMap,
}, },
/// A template that was converted from a string. /// A template that was converted from a string.
Str(String), Str(String),
@ -163,13 +163,13 @@ impl PartialEq for TemplateNode {
} }
} }
/// A map from nodes to the values they evaluated to. /// A map from expressions to the values they evaluated to.
/// ///
/// The raw pointers point into the nodes contained in some [`Tree`]. Since the /// The raw pointers point into the expressions contained in some [`Tree`].
/// lifetime is erased, the tree could go out of scope while the hash map still /// Since the lifetime is erased, the tree could go out of scope while the hash
/// lives. Although this could lead to lookup panics, it is not unsafe since the /// map still lives. Although this could lead to lookup panics, it is not unsafe
/// pointers are never dereferenced. /// since the pointers are never dereferenced.
pub type NodeMap = HashMap<*const Node, Value>; pub type ExprMap = HashMap<*const Expr, Value>;
/// A reference-counted dynamic template node that can implement custom /// A reference-counted dynamic template node that can implement custom
/// behaviour. /// behaviour.

View File

@ -1,13 +1,13 @@
use std::mem; use std::mem;
use super::{Exec, FontFamily, State}; use super::{Exec, ExecWithMap, FontFamily, State};
use crate::diag::{Diag, DiagSet, Pass}; use crate::diag::{Diag, DiagSet, Pass};
use crate::eval::TemplateValue; use crate::eval::{ExprMap, TemplateValue};
use crate::geom::{Align, Dir, Gen, GenAxis, Length, Linear, Sides, Size}; use crate::geom::{Align, Dir, Gen, GenAxis, Length, Linear, Sides, Size};
use crate::layout::{ use crate::layout::{
AnyNode, PadNode, PageRun, ParChild, ParNode, StackChild, StackNode, Tree, AnyNode, PadNode, PageRun, ParChild, ParNode, StackChild, StackNode, Tree,
}; };
use crate::syntax::Span; use crate::syntax::{self, Span};
/// The context for execution. /// The context for execution.
pub struct ExecContext { pub struct ExecContext {
@ -48,12 +48,22 @@ impl ExecContext {
} }
/// Execute a template and return the result as a stack node. /// Execute a template and return the result as a stack node.
pub fn exec_template(&mut self, template: &TemplateValue) -> StackNode { pub fn exec_template_stack(&mut self, template: &TemplateValue) -> StackNode {
self.exec_stack(|ctx| template.exec(ctx))
}
/// Execute a tree with a map and return the result as a stack node.
pub fn exec_tree_stack(&mut self, tree: &syntax::Tree, map: &ExprMap) -> StackNode {
self.exec_stack(|ctx| tree.exec_with_map(ctx, map))
}
/// Execute something and return the result as a stack node.
pub fn exec_stack(&mut self, f: impl FnOnce(&mut Self)) -> StackNode {
let snapshot = self.state.clone(); let snapshot = self.state.clone();
let page = self.page.take(); let page = self.page.take();
let stack = mem::replace(&mut self.stack, StackBuilder::new(&self.state)); let stack = mem::replace(&mut self.stack, StackBuilder::new(&self.state));
template.exec(self); f(self);
self.state = snapshot; self.state = snapshot;
self.page = page; self.page = page;

View File

@ -9,10 +9,11 @@ pub use state::*;
use std::rc::Rc; use std::rc::Rc;
use crate::diag::Pass; use crate::diag::Pass;
use crate::eval::{NodeMap, TemplateFunc, TemplateNode, TemplateValue, Value}; use crate::eval::{ExprMap, TemplateFunc, TemplateNode, TemplateValue, Value};
use crate::layout; use crate::geom::{Dir, Gen};
use crate::layout::{self, FixedNode, StackChild, StackNode};
use crate::pretty::pretty; use crate::pretty::pretty;
use crate::syntax::*; use crate::syntax;
/// Execute a template to produce a layout tree. /// Execute a template to produce a layout tree.
pub fn exec(template: &TemplateValue, state: State) -> Pass<layout::Tree> { pub fn exec(template: &TemplateValue, state: State) -> Pass<layout::Tree> {
@ -33,30 +34,96 @@ pub trait Exec {
fn exec(&self, ctx: &mut ExecContext); fn exec(&self, ctx: &mut ExecContext);
} }
/// Execute a node with a node map that applies to it. /// Execute a node with an expression map that applies to it.
pub trait ExecWithMap { pub trait ExecWithMap {
/// Execute the node. /// Execute the node.
fn exec_with_map(&self, ctx: &mut ExecContext, map: &NodeMap); fn exec_with_map(&self, ctx: &mut ExecContext, map: &ExprMap);
} }
impl ExecWithMap for Tree { impl ExecWithMap for syntax::Tree {
fn exec_with_map(&self, ctx: &mut ExecContext, map: &NodeMap) { fn exec_with_map(&self, ctx: &mut ExecContext, map: &ExprMap) {
for node in self { for node in self {
node.exec_with_map(ctx, map); node.exec_with_map(ctx, map);
} }
} }
} }
impl ExecWithMap for Node { impl ExecWithMap for syntax::Node {
fn exec_with_map(&self, ctx: &mut ExecContext, map: &NodeMap) { fn exec_with_map(&self, ctx: &mut ExecContext, map: &ExprMap) {
match self { match self {
Node::Text(text) => ctx.push_text(text), Self::Text(text) => ctx.push_text(text),
Node::Space => ctx.push_word_space(), Self::Space => ctx.push_word_space(),
_ => map[&(self as *const _)].exec(ctx), Self::Linebreak(_) => ctx.linebreak(),
Self::Parbreak(_) => ctx.parbreak(),
Self::Strong(_) => ctx.state.font.strong ^= true,
Self::Emph(_) => ctx.state.font.emph ^= true,
Self::Raw(raw) => raw.exec(ctx),
Self::Heading(heading) => heading.exec_with_map(ctx, map),
Self::List(list) => list.exec_with_map(ctx, map),
Self::Expr(expr) => map[&(expr as *const _)].exec(ctx),
} }
} }
} }
impl Exec for syntax::RawNode {
fn exec(&self, ctx: &mut ExecContext) {
if self.block {
ctx.parbreak();
}
let snapshot = ctx.state.clone();
ctx.set_monospace();
ctx.push_text(&self.text);
ctx.state = snapshot;
if self.block {
ctx.parbreak();
}
}
}
impl ExecWithMap for syntax::HeadingNode {
fn exec_with_map(&self, ctx: &mut ExecContext, map: &ExprMap) {
let snapshot = ctx.state.clone();
let upscale = 1.6 - 0.1 * self.level as f64;
ctx.state.font.scale *= upscale;
ctx.state.font.strong = true;
self.body.exec_with_map(ctx, map);
ctx.state = snapshot;
ctx.parbreak();
}
}
impl ExecWithMap for syntax::ListNode {
fn exec_with_map(&self, ctx: &mut ExecContext, map: &ExprMap) {
ctx.parbreak();
let bullet = ctx.exec_stack(|ctx| ctx.push_text(""));
let body = ctx.exec_tree_stack(&self.body, map);
let stack = StackNode {
dirs: Gen::new(Dir::TTB, ctx.state.lang.dir),
aspect: None,
children: vec![
StackChild::Any(bullet.into(), Gen::default()),
StackChild::Spacing(ctx.state.font.resolve_size() / 2.0),
StackChild::Any(body.into(), Gen::default()),
],
};
ctx.push(FixedNode {
width: None,
height: None,
child: stack.into(),
});
ctx.parbreak();
}
}
impl Exec for Value { impl Exec for Value {
fn exec(&self, ctx: &mut ExecContext) { fn exec(&self, ctx: &mut ExecContext) {
match self { match self {

View File

@ -7,7 +7,7 @@ use crate::geom::*;
use crate::layout::Fill; use crate::layout::Fill;
use crate::paper::{Paper, PaperClass, PAPER_A4}; use crate::paper::{Paper, PaperClass, PAPER_A4};
/// The evaluation state. /// The execution state.
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub struct State { pub struct State {
/// The current language-related settings. /// The current language-related settings.

View File

@ -1,170 +0,0 @@
use super::*;
use crate::syntax::{HeadingNode, RawNode};
/// `linebreak`: Start a new line.
///
/// # Syntax
/// This function has dedicated syntax:
/// ```typst
/// This line ends here, \
/// And a new one begins.
/// ```
///
/// # Return value
/// A template that inserts a line break.
pub fn linebreak(_: &mut EvalContext, _: &mut FuncArgs) -> Value {
Value::template(Node::LINEBREAK, move |ctx| {
ctx.linebreak();
})
}
/// `parbreak`: Start a new paragraph.
///
/// # Return value
/// A template that inserts a paragraph break.
pub fn parbreak(_: &mut EvalContext, _: &mut FuncArgs) -> Value {
Value::template(Node::PARBREAK, move |ctx| {
ctx.parbreak();
})
}
/// `strong`: Strong text.
///
/// # Syntax
/// This function has dedicated syntax.
/// ```typst
/// This is *important*!
/// ```
///
/// # Positional parameters
/// - Body: optional, of type `template`.
///
/// # Return value
/// A template that flips the boldness of text. The effect is scoped to the
/// body if present.
pub fn strong(ctx: &mut EvalContext, args: &mut FuncArgs) -> Value {
let body = args.eat::<TemplateValue>(ctx);
Value::template(Node::STRONG, move |ctx| {
let snapshot = ctx.state.clone();
ctx.state.font.strong ^= true;
if let Some(body) = &body {
body.exec(ctx);
ctx.state = snapshot;
}
})
}
/// `emph`: Emphasized text.
///
/// # Syntax
/// This function has dedicated syntax.
/// ```typst
/// I would have _never_ thought so!
/// ```
///
/// # Positional parameters
/// - Body: optional, of type `template`.
///
/// # Return value
/// A template that flips whether text is set in italics. The effect is scoped
/// to the body if present.
pub fn emph(ctx: &mut EvalContext, args: &mut FuncArgs) -> Value {
let body = args.eat::<TemplateValue>(ctx);
Value::template(Node::EMPH, move |ctx| {
let snapshot = ctx.state.clone();
ctx.state.font.emph ^= true;
if let Some(body) = &body {
body.exec(ctx);
ctx.state = snapshot;
}
})
}
/// `heading`: A section heading.
///
/// # Syntax
/// This function has dedicated syntax.
/// ```typst
/// = Section
/// ...
///
/// == Subsection
/// ...
/// ```
///
/// # Positional parameters
/// - Body, of type `template`.
///
/// # Named parameters
/// - Section depth: `level`, of type `integer` between 1 and 6.
///
/// # Return value
/// A template that sets the body as a section heading, that is, large and in
/// bold.
pub fn heading(ctx: &mut EvalContext, args: &mut FuncArgs) -> Value {
let level = args.eat_named(ctx, HeadingNode::LEVEL).unwrap_or(1);
let body = args
.eat_expect::<TemplateValue>(ctx, HeadingNode::BODY)
.unwrap_or_default();
Value::template(Node::HEADING, move |ctx| {
let snapshot = ctx.state.clone();
let upscale = 1.6 - 0.1 * level as f64;
ctx.state.font.scale *= upscale;
ctx.state.font.strong = true;
body.exec(ctx);
ctx.state = snapshot;
ctx.parbreak();
})
}
/// `raw`: Raw text.
///
/// # Syntax
/// This function has dedicated syntax:
/// - For inline-level raw text:
/// ```typst
/// `...`
/// ```
/// - For block-level raw text:
/// ````typst
/// ```rust
/// println!("Hello World!");
/// ```
/// ````
///
/// # Positional parameters
/// - Text, of type `string`.
///
/// # Named parameters
/// - Language for syntax highlighting: `lang`, of type `string`.
/// - Whether the item is block level (split in its own paragraph): `block`, of
/// type `boolean`.
///
/// # Return value
/// A template that sets the text raw, that is, in monospace and optionally with
/// syntax highlighting.
pub fn raw(ctx: &mut EvalContext, args: &mut FuncArgs) -> Value {
let text = args.eat_expect::<String>(ctx, RawNode::TEXT).unwrap_or_default();
let _lang = args.eat_named::<String>(ctx, RawNode::LANG);
let block = args.eat_named(ctx, RawNode::BLOCK).unwrap_or(false);
Value::template(Node::RAW, move |ctx| {
if block {
ctx.parbreak();
}
let snapshot = ctx.state.clone();
ctx.set_monospace();
ctx.push_text(&text);
ctx.state = snapshot;
if block {
ctx.parbreak();
}
})
}

View File

@ -8,7 +8,6 @@ mod basic;
mod font; mod font;
mod image; mod image;
mod lang; mod lang;
mod markup;
mod math; mod math;
mod pad; mod pad;
mod page; mod page;
@ -22,7 +21,6 @@ pub use align::*;
pub use basic::*; pub use basic::*;
pub use font::*; pub use font::*;
pub use lang::*; pub use lang::*;
pub use markup::*;
pub use math::*; pub use math::*;
pub use pad::*; pub use pad::*;
pub use page::*; pub use page::*;
@ -38,20 +36,12 @@ use crate::eval::{EvalContext, FuncArgs, Scope, TemplateValue, Value};
use crate::exec::{Exec, FontFamily}; use crate::exec::{Exec, FontFamily};
use crate::font::{FontStyle, FontWeight, VerticalFontMetric}; use crate::font::{FontStyle, FontWeight, VerticalFontMetric};
use crate::geom::*; use crate::geom::*;
use crate::syntax::{Node, Spanned}; use crate::syntax::Spanned;
/// Construct a scope containing all standard library definitions. /// Construct a scope containing all standard library definitions.
pub fn new() -> Scope { pub fn new() -> Scope {
let mut std = Scope::new(); let mut std = Scope::new();
// Syntax functions.
std.def_func(Node::LINEBREAK, linebreak);
std.def_func(Node::PARBREAK, parbreak);
std.def_func(Node::STRONG, strong);
std.def_func(Node::EMPH, emph);
std.def_func(Node::HEADING, heading);
std.def_func(Node::RAW, raw);
// Library functions. // Library functions.
std.def_func("align", align); std.def_func("align", align);
std.def_func("circle", circle); std.def_func("circle", circle);

View File

@ -31,7 +31,7 @@ pub fn pad(ctx: &mut EvalContext, args: &mut FuncArgs) -> Value {
); );
Value::template("pad", move |ctx| { Value::template("pad", move |ctx| {
let child = ctx.exec_template(&body).into(); let child = ctx.exec_template_stack(&body).into();
ctx.push(PadNode { padding, child }); ctx.push(PadNode { padding, child });
}) })
} }

View File

@ -61,7 +61,7 @@ fn rect_impl(
body: TemplateValue, body: TemplateValue,
) -> Value { ) -> Value {
Value::template(name, move |ctx| { Value::template(name, move |ctx| {
let mut stack = ctx.exec_template(&body); let mut stack = ctx.exec_template_stack(&body);
stack.aspect = aspect; stack.aspect = aspect;
let fixed = FixedNode { width, height, child: stack.into() }; let fixed = FixedNode { width, height, child: stack.into() };
@ -137,7 +137,7 @@ fn ellipse_impl(
// perfectly into the ellipse. // perfectly into the ellipse.
const PAD: f64 = 0.5 - SQRT_2 / 4.0; const PAD: f64 = 0.5 - SQRT_2 / 4.0;
let mut stack = ctx.exec_template(&body); let mut stack = ctx.exec_template_stack(&body);
stack.aspect = aspect; stack.aspect = aspect;
let fixed = FixedNode { let fixed = FixedNode {

View File

@ -26,7 +26,7 @@ pub fn stack(ctx: &mut EvalContext, args: &mut FuncArgs) -> Value {
let children = children let children = children
.iter() .iter()
.map(|child| { .map(|child| {
let child = ctx.exec_template(child).into(); let child = ctx.exec_template_stack(child).into();
StackChild::Any(child, ctx.state.aligns) StackChild::Any(child, ctx.state.aligns)
}) })
.collect(); .collect();

View File

@ -32,6 +32,8 @@ impl<'s> LineMap<'s> {
let start = self.line_starts.get(line_index)?; let start = self.line_starts.get(line_index)?;
let head = self.src.get(start.to_usize() .. pos.to_usize())?; let head = self.src.get(start.to_usize() .. pos.to_usize())?;
// TODO: What about tabs?
let column_index = head.chars().count(); let column_index = head.chars().count();
Some(Location { Some(Location {
@ -52,12 +54,14 @@ impl<'s> LineMap<'s> {
let line = self.src.get(line_start.to_usize() .. line_end)?; let line = self.src.get(line_start.to_usize() .. line_end)?;
// Find the index in the line. For the first column, the index is always zero. For // Find the index in the line. For the first column, the index is always
// other columns, we have to look at which byte the char directly before the // zero. For other columns, we have to look at which byte the char
// column in question ends. We can't do `nth(column_idx)` directly since the // directly before the column in question ends. We can't do
// column may be behind the last char. // `nth(column_idx)` directly since the column may be behind the last
// char.
let column_idx = location.column.checked_sub(1)? as usize; let column_idx = location.column.checked_sub(1)? as usize;
let line_offset = if let Some(prev_idx) = column_idx.checked_sub(1) { let line_offset = if let Some(prev_idx) = column_idx.checked_sub(1) {
// TODO: What about tabs?
let (idx, prev) = line.char_indices().nth(prev_idx)?; let (idx, prev) = line.char_indices().nth(prev_idx)?;
idx + prev.len_utf8() idx + prev.len_utf8()
} else { } else {
@ -68,6 +72,22 @@ impl<'s> LineMap<'s> {
} }
} }
/// Determine the column at the end of the string.
pub fn search_column(src: &str) -> usize {
let mut column = 0;
for c in src.chars().rev() {
if is_newline(c) {
break;
} else if c == '\t' {
// TODO: How many columns per tab?
column += 2;
} else {
column += 1;
}
}
column
}
/// Whether this character denotes a newline. /// Whether this character denotes a newline.
pub fn is_newline(character: char) -> bool { pub fn is_newline(character: char) -> bool {
matches!( matches!(

View File

@ -25,14 +25,32 @@ pub fn parse(src: &str) -> Pass<Tree> {
/// Parse a syntax tree. /// Parse a syntax tree.
fn tree(p: &mut Parser) -> Tree { fn tree(p: &mut Parser) -> Tree {
tree_while(p, |_| true)
}
/// Parse a syntax tree that stays right of the column at the start of the next
/// non-whitespace token.
fn tree_indented(p: &mut Parser) -> Tree {
p.skip_white();
let column = p.column(p.next_start());
tree_while(p, |p| match p.peek() {
Some(Token::Space(n)) if n >= 1 => p.column(p.next_end()) >= column,
_ => true,
})
}
/// Parse a syntax tree.
fn tree_while(p: &mut Parser, mut f: impl FnMut(&mut Parser) -> bool) -> Tree {
// We keep track of whether we are at the start of a block or paragraph // We keep track of whether we are at the start of a block or paragraph
// to know whether headings are allowed. // to know whether things like headings are allowed.
let mut at_start = true; let mut at_start = true;
let mut tree = vec![]; let mut tree = vec![];
while !p.eof() { while !p.eof() && f(p) {
if let Some(node) = node(p, &mut at_start) { if let Some(node) = node(p, &mut at_start) {
if !matches!(node, Node::Parbreak(_) | Node::Space) { match node {
at_start = false; Node::Space => {}
Node::Parbreak(_) => {}
_ => at_start = false,
} }
tree.push(node); tree.push(node);
} }
@ -57,10 +75,16 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> {
// Text. // Text.
Token::Text(text) => Node::Text(text.into()), Token::Text(text) => Node::Text(text.into()),
Token::Tilde => Node::Text("\u{00A0}".into()),
Token::HyphHyph => Node::Text("\u{2013}".into()),
Token::HyphHyphHyph => Node::Text("\u{2014}".into()),
Token::UnicodeEscape(t) => Node::Text(unicode_escape(p, t)),
// Markup. // Markup.
Token::Backslash => Node::Linebreak(span),
Token::Star => Node::Strong(span), Token::Star => Node::Strong(span),
Token::Underscore => Node::Emph(span), Token::Underscore => Node::Emph(span),
Token::Raw(t) => raw(p, t),
Token::Hashtag => { Token::Hashtag => {
if *at_start { if *at_start {
return Some(heading(p)); return Some(heading(p));
@ -68,10 +92,13 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> {
Node::Text(p.peek_src().into()) Node::Text(p.peek_src().into())
} }
} }
Token::Tilde => Node::Text("\u{00A0}".into()), Token::Hyph => {
Token::Backslash => Node::Linebreak(span), if *at_start {
Token::Raw(t) => raw(p, t), return Some(list(p));
Token::UnicodeEscape(t) => Node::Text(unicode_escape(p, t)), } else {
Node::Text(p.peek_src().into())
}
}
// Hashtag + keyword / identifier. // Hashtag + keyword / identifier.
Token::Ident(_) Token::Ident(_)
@ -81,31 +108,27 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> {
| Token::For | Token::For
| Token::Import | Token::Import
| Token::Include => { | Token::Include => {
*at_start = false;
let stmt = matches!(token, Token::Let | Token::Import); let stmt = matches!(token, Token::Let | Token::Import);
let group = if stmt { Group::Stmt } else { Group::Expr }; let group = if stmt { Group::Stmt } else { Group::Expr };
p.start_group(group, TokenMode::Code); p.start_group(group, TokenMode::Code);
let expr = expr_with(p, true, 0); let expr = expr_with(p, true, 0);
if stmt && expr.is_some() && !p.eof() { if stmt && expr.is_some() && !p.eof() {
p.expected_at("semicolon or line break", p.end()); p.expected_at("semicolon or line break", p.prev_end());
} }
p.end_group(); p.end_group();
// Uneat spaces we might have eaten eagerly. // Uneat spaces we might have eaten eagerly.
p.jump(p.end());
return expr.map(Node::Expr); return expr.map(Node::Expr);
} }
// Block. // Block.
Token::LeftBrace => { Token::LeftBrace => {
*at_start = false;
return Some(Node::Expr(block(p, false))); return Some(Node::Expr(block(p, false)));
} }
// Template. // Template.
Token::LeftBracket => { Token::LeftBracket => {
*at_start = false;
return Some(Node::Expr(template(p))); return Some(Node::Expr(template(p)));
} }
@ -125,33 +148,22 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option<Node> {
Some(node) Some(node)
} }
/// Parse a heading. /// Handle a unicode escape sequence.
fn heading(p: &mut Parser) -> Node { fn unicode_escape(p: &mut Parser, token: UnicodeEscapeToken) -> String {
let start = p.start(); let span = p.peek_span();
p.assert(Token::Hashtag); let text = if let Some(c) = resolve::resolve_hex(token.sequence) {
c.to_string()
} else {
// Print out the escape sequence verbatim if it is invalid.
p.diag(error!(span, "invalid unicode escape sequence"));
p.peek_src().into()
};
// Count depth. if !token.terminated {
let mut level: usize = 1; p.diag(error!(span.end, "expected closing brace"));
while p.eat_if(Token::Hashtag) {
level += 1;
} }
if level > 6 { text
p.diag(warning!(start .. p.end(), "should not exceed depth 6"));
level = 6;
}
// Parse the heading contents.
let mut tree = vec![];
while p.check(|t| !matches!(t, Token::Space(n) if n >= 1)) {
tree.extend(node(p, &mut false));
}
Node::Heading(HeadingNode {
span: p.span(start),
level,
contents: Rc::new(tree),
})
} }
/// Handle a raw block. /// Handle a raw block.
@ -164,22 +176,37 @@ fn raw(p: &mut Parser, token: RawToken) -> Node {
Node::Raw(raw) Node::Raw(raw)
} }
/// Handle a unicode escape sequence. /// Parse a heading.
fn unicode_escape(p: &mut Parser, token: UnicodeEscapeToken) -> String { fn heading(p: &mut Parser) -> Node {
let span = p.peek_span(); let start = p.next_start();
let text = if let Some(c) = resolve::resolve_hex(token.sequence) { p.assert(Token::Hashtag);
c.to_string()
} else {
// Print out the escape sequence verbatim if it is invalid.
p.diag(error!(span, "invalid unicode escape sequence"));
p.get(span).into()
};
if !token.terminated { // Count depth.
p.diag(error!(span.end, "expected closing brace")); let mut level: usize = 1;
while p.eat_if(Token::Hashtag) {
level += 1;
} }
text if level > 6 {
p.diag(warning!(start .. p.prev_end(), "should not exceed depth 6"));
level = 6;
}
let body = tree_indented(p);
Node::Heading(HeadingNode {
span: p.span(start),
level,
body: Rc::new(body),
})
}
/// Parse a single list item.
fn list(p: &mut Parser) -> Node {
let start = p.next_start();
p.assert(Token::Hyph);
let body = tree_indented(p);
Node::List(ListNode { span: p.span(start), body })
} }
/// Parse an expression. /// Parse an expression.
@ -195,7 +222,7 @@ fn expr(p: &mut Parser) -> Option<Expr> {
/// ///
/// Stops parsing at operations with lower precedence than `min_prec`, /// Stops parsing at operations with lower precedence than `min_prec`,
fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> Option<Expr> { fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> Option<Expr> {
let start = p.start(); let start = p.next_start();
let mut lhs = match p.eat_map(UnOp::from_token) { let mut lhs = match p.eat_map(UnOp::from_token) {
Some(op) => { Some(op) => {
let prec = op.precedence(); let prec = op.precedence();
@ -383,7 +410,7 @@ fn collection(p: &mut Parser) -> (Vec<CallArg>, bool) {
break; break;
} }
let behind = p.end(); let behind = p.prev_end();
if p.eat_if(Token::Comma) { if p.eat_if(Token::Comma) {
has_comma = true; has_comma = true;
} else { } else {
@ -467,7 +494,7 @@ fn block(p: &mut Parser, scoping: bool) -> Expr {
if let Some(expr) = expr(p) { if let Some(expr) = expr(p) {
exprs.push(expr); exprs.push(expr);
if !p.eof() { if !p.eof() {
p.expected_at("semicolon or line break", p.end()); p.expected_at("semicolon or line break", p.prev_end());
} }
} }
p.end_group(); p.end_group();
@ -506,14 +533,14 @@ fn call(p: &mut Parser, callee: Expr) -> Expr {
/// Parse the arguments to a function call. /// Parse the arguments to a function call.
fn args(p: &mut Parser) -> CallArgs { fn args(p: &mut Parser) -> CallArgs {
let start = p.start(); let start = p.next_start();
let items = collection(p).0; let items = collection(p).0;
CallArgs { span: p.span(start), items } CallArgs { span: p.span(start), items }
} }
/// Parse a let expression. /// Parse a let expression.
fn expr_let(p: &mut Parser) -> Option<Expr> { fn expr_let(p: &mut Parser) -> Option<Expr> {
let start = p.start(); let start = p.next_start();
p.assert(Token::Let); p.assert(Token::Let);
let mut expr_let = None; let mut expr_let = None;
@ -532,7 +559,7 @@ fn expr_let(p: &mut Parser) -> Option<Expr> {
init = expr(p); init = expr(p);
} else if params.is_some() { } else if params.is_some() {
// Function definitions must have a body. // Function definitions must have a body.
p.expected_at("body", p.end()); p.expected_at("body", p.prev_end());
} }
// Rewrite into a closure expression if it's a function definition. // Rewrite into a closure expression if it's a function definition.
@ -558,7 +585,7 @@ fn expr_let(p: &mut Parser) -> Option<Expr> {
/// Parse an if expresion. /// Parse an if expresion.
fn expr_if(p: &mut Parser) -> Option<Expr> { fn expr_if(p: &mut Parser) -> Option<Expr> {
let start = p.start(); let start = p.next_start();
p.assert(Token::If); p.assert(Token::If);
let mut expr_if = None; let mut expr_if = None;
@ -589,7 +616,7 @@ fn expr_if(p: &mut Parser) -> Option<Expr> {
/// Parse a while expresion. /// Parse a while expresion.
fn expr_while(p: &mut Parser) -> Option<Expr> { fn expr_while(p: &mut Parser) -> Option<Expr> {
let start = p.start(); let start = p.next_start();
p.assert(Token::While); p.assert(Token::While);
let mut expr_while = None; let mut expr_while = None;
@ -608,7 +635,7 @@ fn expr_while(p: &mut Parser) -> Option<Expr> {
/// Parse a for expression. /// Parse a for expression.
fn expr_for(p: &mut Parser) -> Option<Expr> { fn expr_for(p: &mut Parser) -> Option<Expr> {
let start = p.start(); let start = p.next_start();
p.assert(Token::For); p.assert(Token::For);
let mut expr_for = None; let mut expr_for = None;
@ -643,7 +670,7 @@ fn for_pattern(p: &mut Parser) -> Option<ForPattern> {
/// Parse an import expression. /// Parse an import expression.
fn expr_import(p: &mut Parser) -> Option<Expr> { fn expr_import(p: &mut Parser) -> Option<Expr> {
let start = p.start(); let start = p.next_start();
p.assert(Token::Import); p.assert(Token::Import);
let mut expr_import = None; let mut expr_import = None;
@ -657,7 +684,7 @@ fn expr_import(p: &mut Parser) -> Option<Expr> {
p.start_group(Group::Expr, TokenMode::Code); p.start_group(Group::Expr, TokenMode::Code);
let items = collection(p).0; let items = collection(p).0;
if items.is_empty() { if items.is_empty() {
p.expected_at("import items", p.end()); p.expected_at("import items", p.prev_end());
} }
let idents = idents(p, items); let idents = idents(p, items);
@ -680,7 +707,7 @@ fn expr_import(p: &mut Parser) -> Option<Expr> {
/// Parse an include expression. /// Parse an include expression.
fn expr_include(p: &mut Parser) -> Option<Expr> { fn expr_include(p: &mut Parser) -> Option<Expr> {
let start = p.start(); let start = p.next_start();
p.assert(Token::Include); p.assert(Token::Include);
expr(p).map(|path| { expr(p).map(|path| {
@ -710,7 +737,7 @@ fn body(p: &mut Parser) -> Option<Expr> {
Some(Token::LeftBracket) => Some(template(p)), Some(Token::LeftBracket) => Some(template(p)),
Some(Token::LeftBrace) => Some(block(p, true)), Some(Token::LeftBrace) => Some(block(p, true)),
_ => { _ => {
p.expected_at("body", p.end()); p.expected_at("body", p.prev_end());
None None
} }
} }

View File

@ -1,6 +1,7 @@
use std::fmt::{self, Debug, Formatter}; use std::fmt::{self, Debug, Formatter};
use std::ops::Range;
use super::{Scanner, TokenMode, Tokens}; use super::{search_column, TokenMode, Tokens};
use crate::diag::{Diag, DiagSet}; use crate::diag::{Diag, DiagSet};
use crate::syntax::{Pos, Span, Token}; use crate::syntax::{Pos, Span, Token};
@ -17,10 +18,10 @@ pub struct Parser<'s> {
/// The peeked token. /// The peeked token.
/// (Same as `next` except if we are at the end of group, then `None`). /// (Same as `next` except if we are at the end of group, then `None`).
peeked: Option<Token<'s>>, peeked: Option<Token<'s>>,
/// The start position of the peeked token.
next_start: Pos,
/// The end position of the last (non-whitespace if in code mode) token. /// The end position of the last (non-whitespace if in code mode) token.
last_end: Pos, prev_end: usize,
/// The start position of the peeked token.
next_start: usize,
} }
/// A logical group of tokens, e.g. `[...]`. /// A logical group of tokens, e.g. `[...]`.
@ -28,7 +29,7 @@ pub struct Parser<'s> {
struct GroupEntry { struct GroupEntry {
/// The start position of the group. Used by `Parser::end_group` to return /// The start position of the group. Used by `Parser::end_group` to return
/// The group's full span. /// The group's full span.
pub start: Pos, pub start: usize,
/// The kind of group this is. This decides which tokens will end the group. /// The kind of group this is. This decides which tokens will end the group.
/// For example, a [`Group::Paren`] will be ended by /// For example, a [`Group::Paren`] will be ended by
/// [`Token::RightParen`]. /// [`Token::RightParen`].
@ -59,12 +60,12 @@ impl<'s> Parser<'s> {
let next = tokens.next(); let next = tokens.next();
Self { Self {
diags: DiagSet::new(), diags: DiagSet::new(),
next,
tokens, tokens,
last_end: Pos::ZERO,
peeked: next,
next_start: Pos::ZERO,
groups: vec![], groups: vec![],
next,
peeked: next,
prev_end: 0,
next_start: 0,
} }
} }
@ -76,9 +77,9 @@ impl<'s> Parser<'s> {
/// Eat the next token and add a diagnostic that it is not the expected /// Eat the next token and add a diagnostic that it is not the expected
/// `thing`. /// `thing`.
pub fn expected(&mut self, what: &str) { pub fn expected(&mut self, what: &str) {
let before = self.next_start; let before = self.next_start();
if let Some(found) = self.eat() { if let Some(found) = self.eat() {
let after = self.last_end; let after = self.prev_end();
self.diag(error!( self.diag(error!(
before .. after, before .. after,
"expected {}, found {}", "expected {}, found {}",
@ -86,20 +87,20 @@ impl<'s> Parser<'s> {
found.name(), found.name(),
)); ));
} else { } else {
self.expected_at(what, self.next_start); self.expected_at(what, self.next_start());
} }
} }
/// Add a diagnostic that `what` was expected at the given position. /// Add a diagnostic that `what` was expected at the given position.
pub fn expected_at(&mut self, what: &str, pos: Pos) { pub fn expected_at(&mut self, what: &str, pos: impl Into<Pos>) {
self.diag(error!(pos, "expected {}", what)); self.diag(error!(pos.into(), "expected {}", what));
} }
/// Eat the next token and add a diagnostic that it is unexpected. /// Eat the next token and add a diagnostic that it is unexpected.
pub fn unexpected(&mut self) { pub fn unexpected(&mut self) {
let before = self.next_start; let before = self.next_start();
if let Some(found) = self.eat() { if let Some(found) = self.eat() {
let after = self.last_end; let after = self.prev_end();
self.diag(error!(before .. after, "unexpected {}", found.name())); self.diag(error!(before .. after, "unexpected {}", found.name()));
} }
} }
@ -110,11 +111,10 @@ impl<'s> Parser<'s> {
/// `eat()` and `peek()` return `None`. Parsing can only continue with /// `eat()` and `peek()` return `None`. Parsing can only continue with
/// a matching call to `end_group`. /// a matching call to `end_group`.
/// ///
/// # Panics
/// This panics if the next token does not start the given group. /// This panics if the next token does not start the given group.
pub fn start_group(&mut self, kind: Group, mode: TokenMode) { pub fn start_group(&mut self, kind: Group, mode: TokenMode) {
self.groups.push(GroupEntry { self.groups.push(GroupEntry {
start: self.next_start, start: self.next_start(),
kind, kind,
outer_mode: self.tokens.mode(), outer_mode: self.tokens.mode(),
}); });
@ -133,7 +133,6 @@ impl<'s> Parser<'s> {
/// End the parsing of a group. /// End the parsing of a group.
/// ///
/// # Panics
/// This panics if no group was started. /// This panics if no group was started.
pub fn end_group(&mut self) -> Span { pub fn end_group(&mut self) -> Span {
let prev_mode = self.tokens.mode(); let prev_mode = self.tokens.mode();
@ -156,17 +155,16 @@ impl<'s> Parser<'s> {
self.bump(); self.bump();
rescan = false; rescan = false;
} else if required { } else if required {
self.diag(error!(self.next_start, "expected {}", end.name())); self.diag(error!(self.next_start(), "expected {}", end.name()));
} }
} }
// Rescan the peeked token if the mode changed. // Rescan the peeked token if the mode changed.
if rescan { if rescan {
self.tokens.jump(self.last_end); self.jump(self.prev_end());
self.bump();
} }
Span::new(group.start, self.last_end) Span::new(group.start, self.prev_end())
} }
/// The tokenization mode outside of the current group. /// The tokenization mode outside of the current group.
@ -193,7 +191,7 @@ impl<'s> Parser<'s> {
/// Peek at the next token if it follows immediately after the last one /// Peek at the next token if it follows immediately after the last one
/// without any whitespace in between. /// without any whitespace in between.
pub fn peek_direct(&self) -> Option<Token<'s>> { pub fn peek_direct(&self) -> Option<Token<'s>> {
if self.next_start == self.last_end { if self.next_start() == self.prev_end() {
self.peeked self.peeked
} else { } else {
None None
@ -204,15 +202,17 @@ impl<'s> Parser<'s> {
/// ///
/// Has length zero if `peek()` returns `None`. /// Has length zero if `peek()` returns `None`.
pub fn peek_span(&self) -> Span { pub fn peek_span(&self) -> Span {
Span::new( self.peek_range().into()
self.next_start,
if self.eof() { self.next_start } else { self.tokens.pos() },
)
} }
/// Peek at the source of the next token. /// Peek at the source of the next token.
pub fn peek_src(&self) -> &'s str { pub fn peek_src(&self) -> &'s str {
self.get(self.peek_span()) self.tokens.scanner().get(self.peek_range())
}
/// Peek at the source range (start and end index) of the next token.
pub fn peek_range(&self) -> Range<usize> {
self.next_start() .. self.next_end()
} }
/// Checks whether the next token fulfills a condition. /// Checks whether the next token fulfills a condition.
@ -255,11 +255,11 @@ impl<'s> Parser<'s> {
mapped mapped
} }
/// Eat the next token and return its span. /// Eat the next token and return its source range.
pub fn eat_span(&mut self) -> Span { pub fn eat_span(&mut self) -> Span {
let start = self.next_start; let start = self.next_start();
self.eat(); self.eat();
Span::new(start, self.last_end) Span::new(start, self.prev_end())
} }
/// Consume the next token if it is the given one and produce a diagnostic /// Consume the next token if it is the given one and produce a diagnostic
@ -267,7 +267,7 @@ impl<'s> Parser<'s> {
pub fn expect(&mut self, t: Token) -> bool { pub fn expect(&mut self, t: Token) -> bool {
let eaten = self.eat_if(t); let eaten = self.eat_if(t);
if !eaten { if !eaten {
self.expected_at(t.name(), self.last_end); self.expected_at(t.name(), self.prev_end());
} }
eaten eaten
} }
@ -290,45 +290,48 @@ impl<'s> Parser<'s> {
} }
} }
/// The position at which the next token starts. /// The index at which the last token ended.
pub fn start(&self) -> Pos { ///
/// Refers to the end of the last _non-whitespace_ token in code mode.
pub fn prev_end(&self) -> usize {
self.prev_end
}
/// The index at which the next token starts.
pub fn next_start(&self) -> usize {
self.next_start self.next_start
} }
/// The position at which the last token ended. /// The index at which the next token will end.
/// ///
/// Refers to the end of the last _non-whitespace_ token in code mode. /// Is the same as [`next_start()`][Self::next_start] if `peek()` returns
pub fn end(&self) -> Pos { /// `None`.
self.last_end pub fn next_end(&self) -> usize {
self.tokens.index()
} }
/// The span from `start` to the end of the last token. /// Determine the column for the given index in the source.
pub fn span(&self, start: Pos) -> Span { pub fn column(&self, index: usize) -> usize {
Span::new(start, self.last_end) search_column(self.tokens.scanner().get(.. index))
} }
/// Jump to a position in the source string. /// The span from `start` to [`self.prev_end()`](Self::prev_end).
pub fn jump(&mut self, pos: Pos) { pub fn span(&self, start: impl Into<Pos>) -> Span {
self.tokens.jump(pos); Span::new(start, self.prev_end())
}
/// Jump to an index in the string.
///
/// You need to know the correct column.
fn jump(&mut self, index: usize) {
self.tokens.jump(index);
self.bump(); self.bump();
} }
/// Slice a part out of the source string.
pub fn get(&self, span: impl Into<Span>) -> &'s str {
self.tokens.scanner().get(span.into().to_range())
}
/// The underlying scanner.
pub fn scanner(&self) -> Scanner<'s> {
let mut scanner = self.tokens.scanner().clone();
scanner.jump(self.next_start.to_usize());
scanner
}
/// Move to the next token. /// Move to the next token.
fn bump(&mut self) { fn bump(&mut self) {
self.last_end = self.tokens.pos(); self.prev_end = self.tokens.index();
self.next_start = self.tokens.pos(); self.next_start = self.tokens.index();
self.next = self.tokens.next(); self.next = self.tokens.next();
if self.tokens.mode() == TokenMode::Code { if self.tokens.mode() == TokenMode::Code {
@ -339,7 +342,7 @@ impl<'s> Parser<'s> {
Some(Token::BlockComment(_)) => true, Some(Token::BlockComment(_)) => true,
_ => false, _ => false,
} { } {
self.next_start = self.tokens.pos(); self.next_start = self.tokens.index();
self.next = self.tokens.next(); self.next = self.tokens.next();
} }
} }
@ -381,7 +384,8 @@ impl<'s> Parser<'s> {
impl Debug for Parser<'_> { impl Debug for Parser<'_> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let s = self.scanner(); let mut s = self.tokens.scanner();
s.jump(self.next_start());
write!(f, "Parser({}|{})", s.eaten(), s.rest()) write!(f, "Parser({}|{})", s.eaten(), s.rest())
} }
} }

View File

@ -2,7 +2,7 @@ use std::fmt::{self, Debug, Formatter};
use std::slice::SliceIndex; use std::slice::SliceIndex;
/// A featureful char-based scanner. /// A featureful char-based scanner.
#[derive(Clone)] #[derive(Copy, Clone)]
pub struct Scanner<'s> { pub struct Scanner<'s> {
src: &'s str, src: &'s str,
index: usize, index: usize,

View File

@ -38,20 +38,22 @@ impl<'s> Tokens<'s> {
self.mode = mode; self.mode = mode;
} }
/// The position in the string at which the last token ends and next token /// The index in the string at which the last token ends and next token
/// will start. /// will start.
pub fn pos(&self) -> Pos { pub fn index(&self) -> usize {
self.s.index().into() self.s.index()
} }
/// Jump to the given position. /// Jump to the given index in the string.
pub fn jump(&mut self, pos: Pos) { ///
self.s.jump(pos.to_usize()); /// You need to know the correct column.
pub fn jump(&mut self, index: usize) {
self.s.jump(index);
} }
/// The underlying scanner. /// The underlying scanner.
pub fn scanner(&self) -> &Scanner<'s> { pub fn scanner(&self) -> Scanner<'s> {
&self.s self.s
} }
} }
@ -62,126 +64,100 @@ impl<'s> Iterator for Tokens<'s> {
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
let start = self.s.index(); let start = self.s.index();
let c = self.s.eat()?; let c = self.s.eat()?;
Some(match c {
// Blocks and templates.
'[' => Token::LeftBracket,
']' => Token::RightBracket,
'{' => Token::LeftBrace,
'}' => Token::RightBrace,
// This never loops. It just exists to allow breaking out of it. // Headings, keywords, identifiers, colors.
loop { '#' => self.hash(start),
// Common elements.
return Some(match c {
// Blocks and templates.
'[' => Token::LeftBracket,
']' => Token::RightBracket,
'{' => Token::LeftBrace,
'}' => Token::RightBrace,
// Headings, keywords, identifiers, colors. // Whitespace.
'#' => self.hash(start), c if c.is_whitespace() => self.whitespace(c),
// Whitespace. // Comments.
c if c.is_whitespace() => self.whitespace(c), '/' if self.s.eat_if('/') => self.line_comment(),
'/' if self.s.eat_if('*') => self.block_comment(),
'*' if self.s.eat_if('/') => Token::Invalid(self.s.eaten_from(start)),
// Comments. // Other things.
'/' if self.s.eat_if('/') => self.line_comment(), _ => match self.mode {
'/' if self.s.eat_if('*') => self.block_comment(), TokenMode::Markup => self.markup(start, c),
'*' if self.s.eat_if('/') => Token::Invalid(self.s.eaten_from(start)), TokenMode::Code => self.code(start, c),
_ => break,
});
}
Some(match self.mode {
TokenMode::Markup => match c {
// Markup.
'*' => Token::Star,
'_' => Token::Underscore,
'~' => Token::Tilde,
'`' => self.raw(),
'$' => self.math(),
'\\' => self.backslash(),
// Plain text.
_ => self.text(start),
},
TokenMode::Code => match c {
// Parens.
'(' => Token::LeftParen,
')' => Token::RightParen,
// Length two.
'=' if self.s.eat_if('=') => Token::EqEq,
'!' if self.s.eat_if('=') => Token::BangEq,
'<' if self.s.eat_if('=') => Token::LtEq,
'>' if self.s.eat_if('=') => Token::GtEq,
'+' if self.s.eat_if('=') => Token::PlusEq,
'-' if self.s.eat_if('=') => Token::HyphEq,
'*' if self.s.eat_if('=') => Token::StarEq,
'/' if self.s.eat_if('=') => Token::SlashEq,
'.' if self.s.eat_if('.') => Token::Dots,
'=' if self.s.eat_if('>') => Token::Arrow,
// Length one.
',' => Token::Comma,
';' => Token::Semicolon,
':' => Token::Colon,
'+' => Token::Plus,
'-' => Token::Hyph,
'*' => Token::Star,
'/' => Token::Slash,
'=' => Token::Eq,
'<' => Token::Lt,
'>' => Token::Gt,
// Identifiers.
c if is_id_start(c) => self.ident(start),
// Numbers.
c if c.is_ascii_digit()
|| (c == '.' && self.s.check(|n| n.is_ascii_digit())) =>
{
self.number(start, c)
}
// Strings.
'"' => self.string(),
_ => Token::Invalid(self.s.eaten_from(start)),
}, },
}) })
} }
} }
impl<'s> Tokens<'s> { impl<'s> Tokens<'s> {
fn hash(&mut self, start: usize) -> Token<'s> { fn markup(&mut self, start: usize, c: char) -> Token<'s> {
let read = self.s.eat_while(is_id_continue); match c {
// Markup.
'~' => Token::Tilde,
'*' => Token::Star,
'_' => Token::Underscore,
'\\' => self.backslash(),
'`' => self.raw(),
'$' => self.math(),
'-' => self.hyph(start),
match self.mode { // Plain text.
TokenMode::Markup => { _ => self.text(start),
if read.is_empty() {
return Token::Hashtag;
}
if let Some(token) = keyword(read) {
return token;
}
if read.chars().next().map_or(false, is_id_start) {
return Token::Ident(read);
}
}
TokenMode::Code => {
if let Ok(color) = RgbaColor::from_str(read) {
return Token::Color(color);
}
}
} }
}
Token::Invalid(self.s.eaten_from(start)) fn code(&mut self, start: usize, c: char) -> Token<'s> {
match c {
// Parens.
'(' => Token::LeftParen,
')' => Token::RightParen,
// Length two.
'=' if self.s.eat_if('=') => Token::EqEq,
'!' if self.s.eat_if('=') => Token::BangEq,
'<' if self.s.eat_if('=') => Token::LtEq,
'>' if self.s.eat_if('=') => Token::GtEq,
'+' if self.s.eat_if('=') => Token::PlusEq,
'-' if self.s.eat_if('=') => Token::HyphEq,
'*' if self.s.eat_if('=') => Token::StarEq,
'/' if self.s.eat_if('=') => Token::SlashEq,
'.' if self.s.eat_if('.') => Token::Dots,
'=' if self.s.eat_if('>') => Token::Arrow,
// Length one.
',' => Token::Comma,
';' => Token::Semicolon,
':' => Token::Colon,
'+' => Token::Plus,
'-' => Token::Hyph,
'*' => Token::Star,
'/' => Token::Slash,
'=' => Token::Eq,
'<' => Token::Lt,
'>' => Token::Gt,
// Identifiers.
c if is_id_start(c) => self.ident(start),
// Numbers.
c if c.is_ascii_digit()
|| (c == '.' && self.s.check(|n| n.is_ascii_digit())) =>
{
self.number(start, c)
}
// Strings.
'"' => self.string(),
_ => Token::Invalid(self.s.eaten_from(start)),
}
} }
fn whitespace(&mut self, first: char) -> Token<'s> { fn whitespace(&mut self, first: char) -> Token<'s> {
// Fast path for just a single space // Fast path for just a single space
if first == ' ' && !self.s.check(|c| c.is_whitespace()) { if first == ' ' && !self.s.check(char::is_whitespace) {
Token::Space(0) Token::Space(0)
} else { } else {
self.s.uneat(); self.s.uneat();
@ -210,12 +186,13 @@ impl<'s> Tokens<'s> {
c if c.is_whitespace() => true, c if c.is_whitespace() => true,
// Comments. // Comments.
'/' if self.s.check(|c| c == '/' || c == '*') => true, '/' if self.s.check(|c| c == '/' || c == '*') => true,
// Parenthesis and hashtag. // Parentheses.
'[' | ']' | '{' | '}' | '#' => true, '[' | ']' | '{' | '}' => true,
// Markup. // Markup.
'*' | '_' | '=' | '~' | '`' | '$' => true, '#' | '~' | '*' | '_' | '-' | '`' | '$' => true,
// Escaping. // Escaping.
'\\' => true, '\\' => true,
// Just text.
_ => false, _ => false,
} { } {
self.s.uneat(); self.s.uneat();
@ -226,6 +203,77 @@ impl<'s> Tokens<'s> {
Token::Text(self.s.eaten_from(start)) Token::Text(self.s.eaten_from(start))
} }
fn backslash(&mut self) -> Token<'s> {
if let Some(c) = self.s.peek() {
match c {
// Backslash and comments.
'\\' | '/' |
// Parenthesis and hashtag.
'[' | ']' | '{' | '}' | '#' |
// Markup.
'*' | '_' | '=' | '~' | '`' | '$' => {
let start = self.s.index();
self.s.eat_assert(c);
Token::Text(&self.s.eaten_from(start))
}
'u' if self.s.peek_nth(1) == Some('{') => {
self.s.eat_assert('u');
self.s.eat_assert('{');
Token::UnicodeEscape(UnicodeEscapeToken {
// Allow more than `ascii_hexdigit` for better error recovery.
sequence: self.s.eat_while(|c| c.is_ascii_alphanumeric()),
terminated: self.s.eat_if('}'),
})
}
c if c.is_whitespace() => Token::Backslash,
_ => Token::Text("\\"),
}
} else {
Token::Backslash
}
}
fn hash(&mut self, start: usize) -> Token<'s> {
match self.mode {
TokenMode::Markup => {
if self.s.check(is_id_start) {
let read = self.s.eat_while(is_id_continue);
if let Some(keyword) = keyword(read) {
keyword
} else {
Token::Ident(read)
}
} else if self.s.check(|c| c != '#' && !c.is_whitespace()) {
Token::Text(self.s.eaten_from(start))
} else {
Token::Hashtag
}
}
TokenMode::Code => {
let read = self.s.eat_while(is_id_continue);
if let Ok(color) = RgbaColor::from_str(read) {
Token::Color(color)
} else {
Token::Invalid(self.s.eaten_from(start))
}
}
}
}
fn hyph(&mut self, start: usize) -> Token<'s> {
if self.s.eat_if('-') {
if self.s.eat_if('-') {
Token::HyphHyphHyph
} else {
Token::HyphHyph
}
} else if self.s.check(|c| !c.is_whitespace()) {
Token::Text(self.s.eaten_from(start))
} else {
Token::Hyph
}
}
fn raw(&mut self) -> Token<'s> { fn raw(&mut self) -> Token<'s> {
let mut backticks = 1; let mut backticks = 1;
while self.s.eat_if('`') { while self.s.eat_if('`') {
@ -295,36 +343,6 @@ impl<'s> Tokens<'s> {
}) })
} }
fn backslash(&mut self) -> Token<'s> {
if let Some(c) = self.s.peek() {
match c {
// Backslash and comments.
'\\' | '/' |
// Parenthesis and hashtag.
'[' | ']' | '{' | '}' | '#' |
// Markup.
'*' | '_' | '=' | '~' | '`' | '$' => {
let start = self.s.index();
self.s.eat_assert(c);
Token::Text(&self.s.eaten_from(start))
}
'u' if self.s.peek_nth(1) == Some('{') => {
self.s.eat_assert('u');
self.s.eat_assert('{');
Token::UnicodeEscape(UnicodeEscapeToken {
// Allow more than `ascii_hexdigit` for better error recovery.
sequence: self.s.eat_while(|c| c.is_ascii_alphanumeric()),
terminated: self.s.eat_if('}'),
})
}
c if c.is_whitespace() => Token::Backslash,
_ => Token::Text("\\"),
}
} else {
Token::Backslash
}
}
fn ident(&mut self, start: usize) -> Token<'s> { fn ident(&mut self, start: usize) -> Token<'s> {
self.s.eat_while(is_id_continue); self.s.eat_while(is_id_continue);
match self.s.eaten_from(start) { match self.s.eaten_from(start) {
@ -474,6 +492,10 @@ mod tests {
use Token::{Ident, *}; use Token::{Ident, *};
use TokenMode::{Code, Markup}; use TokenMode::{Code, Markup};
const fn UnicodeEscape(sequence: &str, terminated: bool) -> Token {
Token::UnicodeEscape(UnicodeEscapeToken { sequence, terminated })
}
const fn Raw(text: &str, backticks: usize, terminated: bool) -> Token { const fn Raw(text: &str, backticks: usize, terminated: bool) -> Token {
Token::Raw(RawToken { text, backticks, terminated }) Token::Raw(RawToken { text, backticks, terminated })
} }
@ -482,18 +504,14 @@ mod tests {
Token::Math(MathToken { formula, display, terminated }) Token::Math(MathToken { formula, display, terminated })
} }
const fn UnicodeEscape(sequence: &str, terminated: bool) -> Token { const fn Color(r: u8, g: u8, b: u8, a: u8) -> Token<'static> {
Token::UnicodeEscape(UnicodeEscapeToken { sequence, terminated }) Token::Color(RgbaColor { r, g, b, a })
} }
const fn Str(string: &str, terminated: bool) -> Token { const fn Str(string: &str, terminated: bool) -> Token {
Token::Str(StrToken { string, terminated }) Token::Str(StrToken { string, terminated })
} }
const fn Color(r: u8, g: u8, b: u8, a: u8) -> Token<'static> {
Token::Color(RgbaColor { r, g, b, a })
}
/// Building blocks for suffix testing. /// Building blocks for suffix testing.
/// ///
/// We extend each test case with a collection of different suffixes to make /// We extend each test case with a collection of different suffixes to make
@ -605,15 +623,92 @@ mod tests {
t!(Code: ")" => RightParen); t!(Code: ")" => RightParen);
} }
#[test]
fn test_tokenize_whitespace() {
// Test basic whitespace.
t!(Both["a1/"]: "" => );
t!(Both["a1/"]: " " => Space(0));
t!(Both["a1/"]: " " => Space(0));
t!(Both["a1/"]: "\t" => Space(0));
t!(Both["a1/"]: " \t" => Space(0));
t!(Both["a1/"]: "\u{202F}" => Space(0));
// Test newline counting.
t!(Both["a1/"]: "\n" => Space(1));
t!(Both["a1/"]: "\n " => Space(1));
t!(Both["a1/"]: " \n" => Space(1));
t!(Both["a1/"]: " \n " => Space(1));
t!(Both["a1/"]: "\r\n" => Space(1));
t!(Both["a1/"]: " \n\t \n " => Space(2));
t!(Both["a1/"]: "\n\r" => Space(2));
t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
}
#[test]
fn test_tokenize_text() {
// Test basic text.
t!(Markup[" /"]: "hello" => Text("hello"));
t!(Markup[" /"]: "hello-world" => Text("hello"), Text("-"), Text("world"));
// Test code symbols in text.
t!(Markup[" /"]: "a():\"b" => Text("a():\"b"));
t!(Markup[" /"]: ";:,|/+" => Text(";:,|/+"));
t!(Markup[" /"]: "#-a" => Text("#"), Text("-"), Text("a"));
t!(Markup[" "]: "#123" => Text("#"), Text("123"));
// Test text ends.
t!(Markup[""]: "hello " => Text("hello"), Space(0));
t!(Markup[""]: "hello~" => Text("hello"), Tilde);
}
#[test]
fn test_tokenize_escape_sequences() {
// Test escapable symbols.
t!(Markup: r"\\" => Text(r"\"));
t!(Markup: r"\/" => Text("/"));
t!(Markup: r"\[" => Text("["));
t!(Markup: r"\]" => Text("]"));
t!(Markup: r"\{" => Text("{"));
t!(Markup: r"\}" => Text("}"));
t!(Markup: r"\*" => Text("*"));
t!(Markup: r"\_" => Text("_"));
t!(Markup: r"\=" => Text("="));
t!(Markup: r"\~" => Text("~"));
t!(Markup: r"\`" => Text("`"));
t!(Markup: r"\$" => Text("$"));
t!(Markup: r"\#" => Text("#"));
// Test unescapable symbols.
t!(Markup[" /"]: r"\a" => Text(r"\"), Text("a"));
t!(Markup[" /"]: r"\u" => Text(r"\"), Text("u"));
t!(Markup[" /"]: r"\1" => Text(r"\"), Text("1"));
t!(Markup[" /"]: r"\:" => Text(r"\"), Text(":"));
t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\""));
// Test basic unicode escapes.
t!(Markup: r"\u{}" => UnicodeEscape("", true));
t!(Markup: r"\u{2603}" => UnicodeEscape("2603", true));
t!(Markup: r"\u{P}" => UnicodeEscape("P", true));
// Test unclosed unicode escapes.
t!(Markup[" /"]: r"\u{" => UnicodeEscape("", false));
t!(Markup[" /"]: r"\u{1" => UnicodeEscape("1", false));
t!(Markup[" /"]: r"\u{26A4" => UnicodeEscape("26A4", false));
t!(Markup[" /"]: r"\u{1Q3P" => UnicodeEscape("1Q3P", false));
t!(Markup: r"\u{1🏕}" => UnicodeEscape("1", false), Text("🏕"), RightBrace);
}
#[test] #[test]
fn test_tokenize_markup_symbols() { fn test_tokenize_markup_symbols() {
// Test markup tokens. // Test markup tokens.
t!(Markup[" a1"]: "*" => Star); t!(Markup[" a1"]: "*" => Star);
t!(Markup: "_" => Underscore); t!(Markup: "_" => Underscore);
t!(Markup[""]: "###" => Hashtag, Hashtag, Hashtag); t!(Markup[""]: "###" => Hashtag, Hashtag, Hashtag);
t!(Markup["a1/"]: "# " => Hashtag, Space(0)); t!(Markup["a1/"]: "# " => Hashtag, Space(0));
t!(Markup: "~" => Tilde); t!(Markup["a1/"]: "- " => Hyph, Space(0));
t!(Markup[" "]: r"\" => Backslash); t!(Markup: "~" => Tilde);
t!(Markup[" "]: r"\" => Backslash);
t!(Markup["a "]: r"a--" => Text("a"), HyphHyph);
} }
#[test] #[test]
@ -654,71 +749,32 @@ mod tests {
#[test] #[test]
fn test_tokenize_keywords() { fn test_tokenize_keywords() {
let keywords = [ // A list of a few (not all) keywords.
let list = [
("let", Let), ("let", Let),
("if", If), ("if", If),
("else", Else), ("else", Else),
("for", For), ("for", For),
("in", In), ("in", In),
("while", While), ("import", Import),
("break", Break),
("continue", Continue),
("return", Return),
]; ];
for &(s, t) in &keywords { for &(s, t) in &list {
t!(Markup[" "]: format!("#{}", s) => t); t!(Markup[" "]: format!("#{}", s) => t);
t!(Markup[" "]: format!("#{0}#{0}", s) => t, t); t!(Markup[" "]: format!("#{0}#{0}", s) => t, t);
t!(Markup[" /"]: format!("# {}", s) => Token::Hashtag, Space(0), Text(s)); t!(Markup[" /"]: format!("# {}", s) => Token::Hashtag, Space(0), Text(s));
} }
for &(s, t) in &keywords { for &(s, t) in &list {
t!(Code[" "]: s => t); t!(Code[" "]: s => t);
t!(Markup[" /"]: s => Text(s)); t!(Markup[" /"]: s => Text(s));
} }
// Test simple identifier. // Test simple identifier.
t!(Markup[" "]: "#letter" => Ident("letter")); t!(Markup[" "]: "#letter" => Ident("letter"));
t!(Markup[" "]: "#123" => Invalid("#123")); t!(Code[" /"]: "falser" => Ident("falser"));
t!(Code[" /"]: "falser" => Ident("falser")); t!(Code[" /"]: "None" => Ident("None"));
t!(Code[" /"]: "None" => Ident("None")); t!(Code[" /"]: "True" => Ident("True"));
t!(Code[" /"]: "True" => Ident("True"));
}
#[test]
fn test_tokenize_whitespace() {
// Test basic whitespace.
t!(Both["a1/"]: "" => );
t!(Both["a1/"]: " " => Space(0));
t!(Both["a1/"]: " " => Space(0));
t!(Both["a1/"]: "\t" => Space(0));
t!(Both["a1/"]: " \t" => Space(0));
t!(Both["a1/"]: "\u{202F}" => Space(0));
// Test newline counting.
t!(Both["a1/"]: "\n" => Space(1));
t!(Both["a1/"]: "\n " => Space(1));
t!(Both["a1/"]: " \n" => Space(1));
t!(Both["a1/"]: " \n " => Space(1));
t!(Both["a1/"]: "\r\n" => Space(1));
t!(Both["a1/"]: " \n\t \n " => Space(2));
t!(Both["a1/"]: "\n\r" => Space(2));
t!(Both["a1/"]: " \r\r\n \x0D" => Space(3));
}
#[test]
fn test_tokenize_text() {
// Test basic text.
t!(Markup[" /"]: "hello" => Text("hello"));
t!(Markup[" /"]: "hello-world" => Text("hello-world"));
// Test code symbols in text.
t!(Markup[" /"]: "a():\"b" => Text("a():\"b"));
t!(Markup[" /"]: ";:,|/+-" => Text(";:,|/+-"));
// Test text ends.
t!(Markup[""]: "hello " => Text("hello"), Space(0));
t!(Markup[""]: "hello~" => Text("hello"), Tilde);
} }
#[test] #[test]
@ -764,43 +820,6 @@ mod tests {
t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, false)); t!(Markup[""]: r"$[ ]\\$" => Math(r" ]\\$", true, false));
} }
#[test]
fn test_tokenize_escape_sequences() {
// Test escapable symbols.
t!(Markup: r"\\" => Text(r"\"));
t!(Markup: r"\/" => Text("/"));
t!(Markup: r"\[" => Text("["));
t!(Markup: r"\]" => Text("]"));
t!(Markup: r"\{" => Text("{"));
t!(Markup: r"\}" => Text("}"));
t!(Markup: r"\*" => Text("*"));
t!(Markup: r"\_" => Text("_"));
t!(Markup: r"\=" => Text("="));
t!(Markup: r"\~" => Text("~"));
t!(Markup: r"\`" => Text("`"));
t!(Markup: r"\$" => Text("$"));
t!(Markup: r"\#" => Text("#"));
// Test unescapable symbols.
t!(Markup[" /"]: r"\a" => Text(r"\"), Text("a"));
t!(Markup[" /"]: r"\u" => Text(r"\"), Text("u"));
t!(Markup[" /"]: r"\1" => Text(r"\"), Text("1"));
t!(Markup[" /"]: r"\:" => Text(r"\"), Text(":"));
t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\""));
// Test basic unicode escapes.
t!(Markup: r"\u{}" => UnicodeEscape("", true));
t!(Markup: r"\u{2603}" => UnicodeEscape("2603", true));
t!(Markup: r"\u{P}" => UnicodeEscape("P", true));
// Test unclosed unicode escapes.
t!(Markup[" /"]: r"\u{" => UnicodeEscape("", false));
t!(Markup[" /"]: r"\u{1" => UnicodeEscape("1", false));
t!(Markup[" /"]: r"\u{26A4" => UnicodeEscape("26A4", false));
t!(Markup[" /"]: r"\u{1Q3P" => UnicodeEscape("1Q3P", false));
t!(Markup: r"\u{1🏕}" => UnicodeEscape("1", false), Text("🏕"), RightBrace);
}
#[test] #[test]
fn test_tokenize_idents() { fn test_tokenize_idents() {
// Test valid identifiers. // Test valid identifiers.
@ -956,8 +975,7 @@ mod tests {
t!(Code: "1p%" => Invalid("1p"), Invalid("%")); t!(Code: "1p%" => Invalid("1p"), Invalid("%"));
t!(Code: "1%%" => Percent(1.0), Invalid("%")); t!(Code: "1%%" => Percent(1.0), Invalid("%"));
// Test invalid keyword. // Test invalid color.
t!(Markup[" /"]: "#-" => Invalid("#-"));
t!(Code[" /"]: r"#letter" => Invalid(r"#letter")); t!(Code[" /"]: r"#letter" => Invalid(r"#letter"));
} }
} }

View File

@ -17,8 +17,8 @@ where
p.finish() p.finish()
} }
/// Pretty print an item with a node map and return the resulting string. /// Pretty print an item with a expression map and return the resulting string.
pub fn pretty_with_map<T>(item: &T, map: &NodeMap) -> String pub fn pretty_with_map<T>(item: &T, map: &ExprMap) -> String
where where
T: PrettyWithMap + ?Sized, T: PrettyWithMap + ?Sized,
{ {
@ -33,10 +33,10 @@ pub trait Pretty {
fn pretty(&self, p: &mut Printer); fn pretty(&self, p: &mut Printer);
} }
/// Pretty print an item with a node map that applies to it. /// Pretty print an item with an expression map that applies to it.
pub trait PrettyWithMap { pub trait PrettyWithMap {
/// Pretty print this item into the given printer. /// Pretty print this item into the given printer.
fn pretty_with_map(&self, p: &mut Printer, map: Option<&NodeMap>); fn pretty_with_map(&self, p: &mut Printer, map: Option<&ExprMap>);
} }
impl<T> Pretty for T impl<T> Pretty for T
@ -104,7 +104,7 @@ impl Write for Printer {
} }
impl PrettyWithMap for Tree { impl PrettyWithMap for Tree {
fn pretty_with_map(&self, p: &mut Printer, map: Option<&NodeMap>) { fn pretty_with_map(&self, p: &mut Printer, map: Option<&ExprMap>) {
for node in self { for node in self {
node.pretty_with_map(p, map); node.pretty_with_map(p, map);
} }
@ -112,20 +112,21 @@ impl PrettyWithMap for Tree {
} }
impl PrettyWithMap for Node { impl PrettyWithMap for Node {
fn pretty_with_map(&self, p: &mut Printer, map: Option<&NodeMap>) { fn pretty_with_map(&self, p: &mut Printer, map: Option<&ExprMap>) {
match self { match self {
// TODO: Handle escaping. // TODO: Handle escaping.
Self::Text(text) => p.push_str(text), Self::Text(text) => p.push_str(text),
Self::Space => p.push(' '), Self::Space => p.push(' '),
Self::Strong(_) => p.push('*'),
Self::Emph(_) => p.push('_'),
Self::Linebreak(_) => p.push_str(r"\"), Self::Linebreak(_) => p.push_str(r"\"),
Self::Parbreak(_) => p.push_str("\n\n"), Self::Parbreak(_) => p.push_str("\n\n"),
Self::Heading(heading) => heading.pretty_with_map(p, map), Self::Strong(_) => p.push('*'),
Self::Emph(_) => p.push('_'),
Self::Raw(raw) => raw.pretty(p), Self::Raw(raw) => raw.pretty(p),
Self::Heading(heading) => heading.pretty_with_map(p, map),
Self::List(list) => list.pretty_with_map(p, map),
Self::Expr(expr) => { Self::Expr(expr) => {
if let Some(map) = map { if let Some(map) = map {
let value = &map[&(self as *const _)]; let value = &map[&(expr as *const _)];
value.pretty(p); value.pretty(p);
} else { } else {
if expr.has_short_form() { if expr.has_short_form() {
@ -138,15 +139,6 @@ impl PrettyWithMap for Node {
} }
} }
impl PrettyWithMap for HeadingNode {
fn pretty_with_map(&self, p: &mut Printer, map: Option<&NodeMap>) {
for _ in 0 .. self.level {
p.push('#');
}
self.contents.pretty_with_map(p, map);
}
}
impl Pretty for RawNode { impl Pretty for RawNode {
fn pretty(&self, p: &mut Printer) { fn pretty(&self, p: &mut Printer) {
// Find out how many backticks we need. // Find out how many backticks we need.
@ -203,6 +195,23 @@ impl Pretty for RawNode {
} }
} }
impl PrettyWithMap for HeadingNode {
fn pretty_with_map(&self, p: &mut Printer, map: Option<&ExprMap>) {
for _ in 0 .. self.level {
p.push('#');
}
p.push(' ');
self.body.pretty_with_map(p, map);
}
}
impl PrettyWithMap for ListNode {
fn pretty_with_map(&self, p: &mut Printer, map: Option<&ExprMap>) {
p.push_str("- ");
self.body.pretty_with_map(p, map);
}
}
impl Pretty for Expr { impl Pretty for Expr {
fn pretty(&self, p: &mut Printer) { fn pretty(&self, p: &mut Printer) {
match self { match self {
@ -664,9 +673,8 @@ mod tests {
roundtrip("\\ "); roundtrip("\\ ");
roundtrip("\n\n"); roundtrip("\n\n");
roundtrip("hi"); roundtrip("hi");
// Heading.
roundtrip("# *Ok*"); roundtrip("# *Ok*");
roundtrip("- Ok");
// Raw. // Raw.
roundtrip("``"); roundtrip("``");

View File

@ -17,70 +17,16 @@ pub enum Node {
Strong(Span), Strong(Span),
/// Emphasized text was enabled / disabled: `_`. /// Emphasized text was enabled / disabled: `_`.
Emph(Span), Emph(Span),
/// A section heading: `= Introduction`.
Heading(HeadingNode),
/// A raw block with optional syntax highlighting: `` `...` ``. /// A raw block with optional syntax highlighting: `` `...` ``.
Raw(RawNode), Raw(RawNode),
/// A section heading: `= Introduction`.
Heading(HeadingNode),
/// A single list item: `- ...`.
List(ListNode),
/// An expression. /// An expression.
Expr(Expr), Expr(Expr),
} }
impl Node {
// The names of the corresponding library functions.
pub const LINEBREAK: &'static str = "linebreak";
pub const PARBREAK: &'static str = "parbreak";
pub const STRONG: &'static str = "strong";
pub const EMPH: &'static str = "emph";
pub const HEADING: &'static str = "heading";
pub const RAW: &'static str = "raw";
/// Desugar markup into a function call.
pub fn desugar(&self) -> Option<CallExpr> {
match *self {
Self::Text(_) => None,
Self::Space => None,
Self::Linebreak(span) => Some(call(span, Self::LINEBREAK)),
Self::Parbreak(span) => Some(call(span, Self::PARBREAK)),
Self::Strong(span) => Some(call(span, Self::STRONG)),
Self::Emph(span) => Some(call(span, Self::EMPH)),
Self::Heading(ref heading) => Some(heading.desugar()),
Self::Raw(ref raw) => Some(raw.desugar()),
Self::Expr(_) => None,
}
}
}
/// A section heading: `= Introduction`.
#[derive(Debug, Clone, PartialEq)]
pub struct HeadingNode {
/// The source code location.
pub span: Span,
/// The section depth (numer of equals signs).
pub level: usize,
/// The contents of the heading.
pub contents: Rc<Tree>,
}
impl HeadingNode {
pub const LEVEL: &'static str = "level";
pub const BODY: &'static str = "body";
/// Desugar into a function call.
pub fn desugar(&self) -> CallExpr {
let Self { span, level, ref contents } = *self;
let mut call = call(span, Node::HEADING);
call.args.items.push(CallArg::Named(Named {
name: ident(span, Self::LEVEL),
expr: Expr::Int(span, level as i64),
}));
call.args.items.push(CallArg::Pos(Expr::Template(TemplateExpr {
span,
tree: Rc::clone(&contents),
})));
call
}
}
/// A raw block with optional syntax highlighting: `` `...` ``. /// A raw block with optional syntax highlighting: `` `...` ``.
/// ///
/// Raw blocks start with 1 or 3+ backticks and end with the same number of /// Raw blocks start with 1 or 3+ backticks and end with the same number of
@ -158,38 +104,22 @@ pub struct RawNode {
pub block: bool, pub block: bool,
} }
impl RawNode { /// A section heading: `= Introduction`.
pub const LANG: &'static str = "lang"; #[derive(Debug, Clone, PartialEq)]
pub const BLOCK: &'static str = "block"; pub struct HeadingNode {
pub const TEXT: &'static str = "text"; /// The source code location.
pub span: Span,
/// Desugar into a function call. /// The section depth (numer of equals signs).
pub fn desugar(&self) -> CallExpr { pub level: usize,
let Self { span, ref lang, ref text, block } = *self; /// The contents of the heading.
let mut call = call(span, Node::RAW); pub body: Rc<Tree>,
if let Some(lang) = lang {
call.args.items.push(CallArg::Named(Named {
name: ident(span, Self::LANG),
expr: Expr::Str(span, lang.string.clone()),
}));
}
call.args.items.push(CallArg::Named(Named {
name: ident(span, Self::BLOCK),
expr: Expr::Bool(span, block),
}));
call.args.items.push(CallArg::Pos(Expr::Str(span, text.clone())));
call
}
} }
fn call(span: Span, name: &str) -> CallExpr { /// A single list item: `- ...`.
CallExpr { #[derive(Debug, Clone, PartialEq)]
span, pub struct ListNode {
callee: Box::new(Expr::Ident(Ident { span, string: name.into() })), /// The source code location.
args: CallArgs { span, items: vec![] }, pub span: Span,
} /// The contents of the list item.
} pub body: Tree,
fn ident(span: Span, string: &str) -> Ident {
Ident { span, string: string.into() }
} }

View File

@ -24,6 +24,10 @@ pub enum Token<'s> {
Hashtag, Hashtag,
/// A tilde: `~`. /// A tilde: `~`.
Tilde, Tilde,
/// Two hyphens: `--`.
HyphHyph,
/// Three hyphens: `---`.
HyphHyphHyph,
/// A backslash followed by nothing or whitespace: `\`. /// A backslash followed by nothing or whitespace: `\`.
Backslash, Backslash,
/// A comma: `,`. /// A comma: `,`.
@ -103,15 +107,15 @@ pub enum Token<'s> {
Space(usize), Space(usize),
/// A consecutive non-markup string. /// A consecutive non-markup string.
Text(&'s str), Text(&'s str),
/// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`.
UnicodeEscape(UnicodeEscapeToken<'s>),
/// An arbitrary number of backticks followed by inner contents, terminated /// An arbitrary number of backticks followed by inner contents, terminated
/// with the same number of backticks: `` `...` ``. /// with the same number of backticks: `` `...` ``.
Raw(RawToken<'s>), Raw(RawToken<'s>),
/// One or two dollar signs followed by inner contents, terminated with the /// One or two dollar signs followed by inner contents, terminated with the
/// same number of dollar signs. /// same number of dollar signs.
Math(MathToken<'s>), Math(MathToken<'s>),
/// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`.
UnicodeEscape(UnicodeEscapeToken<'s>),
/// An identifier: `center`. /// An identifier: `center`.
Ident(&'s str), Ident(&'s str),
/// A boolean: `true`, `false`. /// A boolean: `true`, `false`.
@ -204,6 +208,8 @@ impl<'s> Token<'s> {
Self::Underscore => "underscore", Self::Underscore => "underscore",
Self::Hashtag => "hashtag", Self::Hashtag => "hashtag",
Self::Tilde => "tilde", Self::Tilde => "tilde",
Self::HyphHyph => "en dash",
Self::HyphHyphHyph => "em dash",
Self::Backslash => "backslash", Self::Backslash => "backslash",
Self::Comma => "comma", Self::Comma => "comma",
Self::Semicolon => "semicolon", Self::Semicolon => "semicolon",
@ -242,9 +248,9 @@ impl<'s> Token<'s> {
Self::Using => "keyword `using`", Self::Using => "keyword `using`",
Self::Space(_) => "space", Self::Space(_) => "space",
Self::Text(_) => "text", Self::Text(_) => "text",
Self::UnicodeEscape(_) => "unicode escape sequence",
Self::Raw(_) => "raw block", Self::Raw(_) => "raw block",
Self::Math(_) => "math formula", Self::Math(_) => "math formula",
Self::UnicodeEscape(_) => "unicode escape sequence",
Self::Ident(_) => "identifier", Self::Ident(_) => "identifier",
Self::Bool(_) => "boolean", Self::Bool(_) => "boolean",
Self::Int(_) => "integer", Self::Int(_) => "integer",

View File

@ -52,16 +52,25 @@ visit! {
match node { match node {
Node::Text(_) => {} Node::Text(_) => {}
Node::Space => {} Node::Space => {}
Node::Strong(_) => {}
Node::Linebreak(_) => {} Node::Linebreak(_) => {}
Node::Parbreak(_) => {} Node::Parbreak(_) => {}
Node::Strong(_) => {}
Node::Emph(_) => {} Node::Emph(_) => {}
Node::Heading(heading) => v.visit_tree(&heading.contents),
Node::Raw(_) => {} Node::Raw(_) => {}
Node::Expr(expr) => v.visit_expr(expr), Node::Heading(n) => v.visit_heading(n),
Node::List(n) => v.visit_list(n),
Node::Expr(n) => v.visit_expr(n),
} }
} }
fn visit_heading(v, node: &HeadingNode) {
v.visit_tree(&node.body);
}
fn visit_list(v, node: &ListNode) {
v.visit_tree(&node.body);
}
fn visit_expr(v, node: &Expr) { fn visit_expr(v, node: &Expr) {
match node { match node {
Expr::None(_) => {} Expr::None(_) => {}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.3 KiB

After

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

After

Width:  |  Height:  |  Size: 7.3 KiB

BIN
tests/ref/markup/lists.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.0 KiB

After

Width:  |  Height:  |  Size: 7.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.3 KiB

After

Width:  |  Height:  |  Size: 2.3 KiB

View File

@ -13,7 +13,7 @@
// Expression as a file name. // Expression as a file name.
#let chap2 = include "import" + "able/chap" + "2.typ" #let chap2 = include "import" + "able/chap" + "2.typ"
_ -- Intermission -- _ -- _Intermission_ --
#chap2 #chap2
{ {

View File

@ -1,20 +1,10 @@
// Test basic markup. // Test basic markup.
--- ---
#let linebreak() = [
// Inside the old line break definition is still active.
#square(length: 3pt, fill: black) \
]
A \ B \ C A \ B \ C
--- ---
// Paragraph breaks don't exist! Paragraph breaks
#let parbreak() = [ ]
No more
paragraph breaks
for you! for you!

View File

@ -9,15 +9,3 @@ Partly em_phas_ized.
// Scoped to body. // Scoped to body.
#rect[_Scoped] to body. #rect[_Scoped] to body.
---
#let emph = strong
_Strong_
#let emph() = "Hi"
_, _!
#let emph = "hi"
// Error: 1-2 expected function, found string
_

View File

@ -4,38 +4,21 @@
// Different number of hashtags. // Different number of hashtags.
// Valid levels. // Valid levels.
# 1 # Level 1
### 2 ### Level 2
###### 6 ###### Level 6
// Too many hashtags. // Too many hashtags.
// Warning: 1-8 should not exceed depth 6 // Warning: 1-8 should not exceed depth 6
####### 7 ####### Level 7
---
// Heading continuation over linebreak.
// Code blocks continue heading.
# A{
"B"
}
// Function call continues heading.
# #rect[
A
] B
// Without some kind of block, headings end at a line break.
# A
B
--- ---
// Heading vs. no heading. // Heading vs. no heading.
// Parsed as headings if at start of the context. // Parsed as headings if at start of the context.
/**/ # Ok /**/ # Level 1
{[## Ok]} {[## Level 2]}
#rect[### Ok] #rect[### Level 3]
// Not at the start of the context. // Not at the start of the context.
No # heading No # heading
@ -44,9 +27,16 @@ No # heading
\# No heading \# No heading
--- ---
// Make small, but double heading. // While indented at least as much as the start, the heading continues.
#let heading(contents) = heading(contents + contents, level: 6)
// The new heading's argument list doesn't contain `level`. # This
// Error: 1-11 unexpected argument is
### Twice. indented.
# This
is not.
// Code blocks continue heading.
# A {
"B"
}

View File

@ -0,0 +1,45 @@
// Test lists.
---
_Shopping list_
- Apples
- Potatoes
- Juice
---
- First level.
- Second level.
There are multiple paragraphs.
- Third level.
Still the same bullet point.
- Still level 2.
- At the top.
---
- Works
- Also with four spaces
- Or two tabs
---
- Top-level indent
- is fine.
---
Tightly
- surrounded
- by two
paragraphs.
---
- A
- B
- C
- D
---
- Level 1
- Level [
2 through template
]

View File

@ -45,14 +45,6 @@ def hi():
print("Hi!") print("Hi!")
``` ```
---
// Make everything block-level.
#let raw(text) = raw(text, block: true)
// The new raw's argument list doesn't contain `block`.
// Error: 6-10 unexpected argument
This `is` block-level.
--- ---
// Unterminated. // Unterminated.
// Error: 2:1 expected backtick(s) // Error: 2:1 expected backtick(s)

View File

@ -9,15 +9,3 @@ Partly str*ength*ened.
// Scoped to body. // Scoped to body.
#rect[*Scoped] to body. #rect[*Scoped] to body.
---
#let strong = emph
*Emph*
#let strong() = "Bye"
*, *!
#let strong = 123
// Error: 1-2 expected function, found integer
*