Minor refactorings

- Reorder parser methods and use `Pos` everywhere
- Remove tab special handling for columns and adapt heading/list/enum indent handling
- Don't panic when a file has an empty path
This commit is contained in:
Laurenz 2021-08-10 11:28:12 +02:00
parent 3932bb2cb9
commit 8207c31aec
9 changed files with 292 additions and 294 deletions

View File

@ -23,7 +23,7 @@ pub use value::*;
use std::collections::HashMap; use std::collections::HashMap;
use std::io; use std::io;
use std::mem; use std::mem;
use std::path::{Path, PathBuf}; use std::path::PathBuf;
use std::rc::Rc; use std::rc::Rc;
use crate::diag::{Error, StrResult, Tracepoint, TypResult}; use crate::diag::{Error, StrResult, Tracepoint, TypResult};
@ -107,7 +107,7 @@ impl<'a> EvalContext<'a> {
/// Process an import of a module relative to the current location. /// Process an import of a module relative to the current location.
pub fn import(&mut self, path: &str, span: Span) -> TypResult<SourceId> { pub fn import(&mut self, path: &str, span: Span) -> TypResult<SourceId> {
// Load the source file. // Load the source file.
let full = self.relpath(path); let full = self.make_path(path);
let id = self.sources.load(&full).map_err(|err| { let id = self.sources.load(&full).map_err(|err| {
Error::boxed(self.source, span, match err.kind() { Error::boxed(self.source, span, match err.kind() {
io::ErrorKind::NotFound => "file not found".into(), io::ErrorKind::NotFound => "file not found".into(),
@ -157,15 +157,14 @@ impl<'a> EvalContext<'a> {
Ok(id) Ok(id)
} }
/// Complete a path that is relative to the current file to be relative to /// Complete a user-entered path (relative to the source file) to be
/// the environment's current directory. /// relative to the compilation environment's root.
pub fn relpath(&self, path: impl AsRef<Path>) -> PathBuf { pub fn make_path(&self, path: &str) -> PathBuf {
self.sources if let Some(dir) = self.sources.get(self.source).path().parent() {
.get(self.source) dir.join(path)
.path() } else {
.parent() path.into()
.expect("is a file") }
.join(path)
} }
} }

View File

@ -15,7 +15,7 @@ pub fn image(ctx: &mut EvalContext, args: &mut FuncArgs) -> TypResult<Value> {
let width = args.named("width")?; let width = args.named("width")?;
let height = args.named("height")?; let height = args.named("height")?;
let full = ctx.relpath(path.v.as_str()); let full = ctx.make_path(&path.v);
let id = ctx.images.load(&full).map_err(|err| { let id = ctx.images.load(&full).map_err(|err| {
Error::boxed(args.source, path.span, match err.kind() { Error::boxed(args.source, path.span, match err.kind() {
io::ErrorKind::NotFound => "file not found".into(), io::ErrorKind::NotFound => "file not found".into(),

View File

@ -34,18 +34,16 @@ fn tree(p: &mut Parser) -> SyntaxTree {
tree_while(p, true, &mut |_| true) tree_while(p, true, &mut |_| true)
} }
/// Parse a syntax tree that stays right of the column at the start of the next /// Parse a syntax tree that stays right of the given column.
/// non-whitespace token. fn tree_indented(p: &mut Parser, column: usize) -> SyntaxTree {
fn tree_indented(p: &mut Parser) -> SyntaxTree {
p.eat_while(|t| match t { p.eat_while(|t| match t {
Token::Space(n) => n == 0, Token::Space(n) => n == 0,
Token::LineComment(_) | Token::BlockComment(_) => true, Token::LineComment(_) | Token::BlockComment(_) => true,
_ => false, _ => false,
}); });
let column = p.column(p.next_start());
tree_while(p, false, &mut |p| match p.peek() { tree_while(p, false, &mut |p| match p.peek() {
Some(Token::Space(n)) if n >= 1 => p.column(p.next_end()) >= column, Some(Token::Space(n)) if n >= 1 => p.column(p.next_end()) > column,
_ => true, _ => true,
}) })
} }
@ -68,7 +66,7 @@ where
let start = p.next_start(); let start = p.next_start();
let tree = tree_while(p, true, f); let tree = tree_while(p, true, f);
call.args.items.push(CallArg::Pos(Expr::Template(TemplateExpr { call.args.items.push(CallArg::Pos(Expr::Template(TemplateExpr {
span: p.span(start), span: p.span_from(start),
tree: Rc::new(tree), tree: Rc::new(tree),
}))); })));
} }
@ -189,7 +187,8 @@ fn raw(p: &mut Parser, token: RawToken) -> SyntaxNode {
/// Parse a heading. /// Parse a heading.
fn heading(p: &mut Parser) -> SyntaxNode { fn heading(p: &mut Parser) -> SyntaxNode {
let start = p.next_start(); let start = p.next_start();
p.assert(Token::Eq); let column = p.column(start);
p.eat_assert(Token::Eq);
// Count depth. // Count depth.
let mut level: usize = 1; let mut level: usize = 1;
@ -198,28 +197,29 @@ fn heading(p: &mut Parser) -> SyntaxNode {
} }
if level > 6 { if level > 6 {
return SyntaxNode::Text(p.eaten_from(start).into()); return SyntaxNode::Text(p.get(start .. p.prev_end()).into());
} }
let body = tree_indented(p); let body = tree_indented(p, column);
SyntaxNode::Heading(HeadingNode { span: p.span_from(start), level, body })
SyntaxNode::Heading(HeadingNode { span: p.span(start), level, body })
} }
/// Parse a single list item. /// Parse a single list item.
fn list_item(p: &mut Parser) -> SyntaxNode { fn list_item(p: &mut Parser) -> SyntaxNode {
let start = p.next_start(); let start = p.next_start();
p.assert(Token::Hyph); let column = p.column(start);
let body = tree_indented(p); p.eat_assert(Token::Hyph);
SyntaxNode::List(ListItem { span: p.span(start), body }) let body = tree_indented(p, column);
SyntaxNode::List(ListItem { span: p.span_from(start), body })
} }
/// Parse a single enum item. /// Parse a single enum item.
fn enum_item(p: &mut Parser, number: Option<usize>) -> SyntaxNode { fn enum_item(p: &mut Parser, number: Option<usize>) -> SyntaxNode {
let start = p.next_start(); let start = p.next_start();
p.assert(Token::Numbering(number)); let column = p.column(start);
let body = tree_indented(p); p.eat_assert(Token::Numbering(number));
SyntaxNode::Enum(EnumItem { span: p.span(start), number, body }) let body = tree_indented(p, column);
SyntaxNode::Enum(EnumItem { span: p.span_from(start), number, body })
} }
/// Parse an expression. /// Parse an expression.
@ -240,7 +240,7 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> Option<Expr> {
Some(op) => { Some(op) => {
let prec = op.precedence(); let prec = op.precedence();
let expr = Box::new(expr_with(p, atomic, prec)?); let expr = Box::new(expr_with(p, atomic, prec)?);
Expr::Unary(UnaryExpr { span: p.span(start), op, expr }) Expr::Unary(UnaryExpr { span: p.span_from(start), op, expr })
} }
None => primary(p, atomic)?, None => primary(p, atomic)?,
}; };
@ -529,7 +529,7 @@ fn block(p: &mut Parser, scoping: bool) -> Expr {
fn call(p: &mut Parser, callee: Expr) -> Option<Expr> { fn call(p: &mut Parser, callee: Expr) -> Option<Expr> {
let mut wide = p.eat_if(Token::Excl); let mut wide = p.eat_if(Token::Excl);
if wide && p.outer_mode() == TokenMode::Code { if wide && p.outer_mode() == TokenMode::Code {
let span = p.span(callee.span().start); let span = p.span_from(callee.span().start);
p.error(span, "wide calls are only allowed directly in templates"); p.error(span, "wide calls are only allowed directly in templates");
wide = false; wide = false;
} }
@ -552,7 +552,7 @@ fn call(p: &mut Parser, callee: Expr) -> Option<Expr> {
} }
Some(Expr::Call(CallExpr { Some(Expr::Call(CallExpr {
span: p.span(callee.span().start), span: p.span_from(callee.span().start),
callee: Box::new(callee), callee: Box::new(callee),
wide, wide,
args, args,
@ -571,7 +571,7 @@ fn args(p: &mut Parser) -> CallArgs {
fn with_expr(p: &mut Parser, callee: Expr) -> Option<Expr> { fn with_expr(p: &mut Parser, callee: Expr) -> Option<Expr> {
if p.peek() == Some(Token::LeftParen) { if p.peek() == Some(Token::LeftParen) {
Some(Expr::With(WithExpr { Some(Expr::With(WithExpr {
span: p.span(callee.span().start), span: p.span_from(callee.span().start),
callee: Box::new(callee), callee: Box::new(callee),
args: args(p), args: args(p),
})) }))
@ -584,7 +584,7 @@ fn with_expr(p: &mut Parser, callee: Expr) -> Option<Expr> {
/// Parse a let expression. /// Parse a let expression.
fn let_expr(p: &mut Parser) -> Option<Expr> { fn let_expr(p: &mut Parser) -> Option<Expr> {
let start = p.next_start(); let start = p.next_start();
p.assert(Token::Let); p.eat_assert(Token::Let);
let mut let_expr = None; let mut let_expr = None;
if let Some(binding) = ident(p) { if let Some(binding) = ident(p) {
@ -622,7 +622,7 @@ fn let_expr(p: &mut Parser) -> Option<Expr> {
} }
let_expr = Some(Expr::Let(LetExpr { let_expr = Some(Expr::Let(LetExpr {
span: p.span(start), span: p.span_from(start),
binding, binding,
init: init.map(Box::new), init: init.map(Box::new),
})); }));
@ -634,7 +634,7 @@ fn let_expr(p: &mut Parser) -> Option<Expr> {
/// Parse an if expresion. /// Parse an if expresion.
fn if_expr(p: &mut Parser) -> Option<Expr> { fn if_expr(p: &mut Parser) -> Option<Expr> {
let start = p.next_start(); let start = p.next_start();
p.assert(Token::If); p.eat_assert(Token::If);
let mut if_expr = None; let mut if_expr = None;
if let Some(condition) = expr(p) { if let Some(condition) = expr(p) {
@ -650,7 +650,7 @@ fn if_expr(p: &mut Parser) -> Option<Expr> {
} }
if_expr = Some(Expr::If(IfExpr { if_expr = Some(Expr::If(IfExpr {
span: p.span(start), span: p.span_from(start),
condition: Box::new(condition), condition: Box::new(condition),
if_body: Box::new(if_body), if_body: Box::new(if_body),
else_body: else_body.map(Box::new), else_body: else_body.map(Box::new),
@ -664,13 +664,13 @@ fn if_expr(p: &mut Parser) -> Option<Expr> {
/// Parse a while expresion. /// Parse a while expresion.
fn while_expr(p: &mut Parser) -> Option<Expr> { fn while_expr(p: &mut Parser) -> Option<Expr> {
let start = p.next_start(); let start = p.next_start();
p.assert(Token::While); p.eat_assert(Token::While);
let mut while_expr = None; let mut while_expr = None;
if let Some(condition) = expr(p) { if let Some(condition) = expr(p) {
if let Some(body) = body(p) { if let Some(body) = body(p) {
while_expr = Some(Expr::While(WhileExpr { while_expr = Some(Expr::While(WhileExpr {
span: p.span(start), span: p.span_from(start),
condition: Box::new(condition), condition: Box::new(condition),
body: Box::new(body), body: Box::new(body),
})); }));
@ -683,15 +683,15 @@ fn while_expr(p: &mut Parser) -> Option<Expr> {
/// Parse a for expression. /// Parse a for expression.
fn for_expr(p: &mut Parser) -> Option<Expr> { fn for_expr(p: &mut Parser) -> Option<Expr> {
let start = p.next_start(); let start = p.next_start();
p.assert(Token::For); p.eat_assert(Token::For);
let mut for_expr = None; let mut for_expr = None;
if let Some(pattern) = for_pattern(p) { if let Some(pattern) = for_pattern(p) {
if p.expect(Token::In) { if p.eat_expect(Token::In) {
if let Some(iter) = expr(p) { if let Some(iter) = expr(p) {
if let Some(body) = body(p) { if let Some(body) = body(p) {
for_expr = Some(Expr::For(ForExpr { for_expr = Some(Expr::For(ForExpr {
span: p.span(start), span: p.span_from(start),
pattern, pattern,
iter: Box::new(iter), iter: Box::new(iter),
body: Box::new(body), body: Box::new(body),
@ -718,7 +718,7 @@ fn for_pattern(p: &mut Parser) -> Option<ForPattern> {
/// Parse an import expression. /// Parse an import expression.
fn import_expr(p: &mut Parser) -> Option<Expr> { fn import_expr(p: &mut Parser) -> Option<Expr> {
let start = p.next_start(); let start = p.next_start();
p.assert(Token::Import); p.eat_assert(Token::Import);
let imports = if p.eat_if(Token::Star) { let imports = if p.eat_if(Token::Star) {
// This is the wildcard scenario. // This is the wildcard scenario.
@ -735,10 +735,10 @@ fn import_expr(p: &mut Parser) -> Option<Expr> {
}; };
let mut import_expr = None; let mut import_expr = None;
if p.expect(Token::From) { if p.eat_expect(Token::From) {
if let Some(path) = expr(p) { if let Some(path) = expr(p) {
import_expr = Some(Expr::Import(ImportExpr { import_expr = Some(Expr::Import(ImportExpr {
span: p.span(start), span: p.span_from(start),
imports, imports,
path: Box::new(path), path: Box::new(path),
})); }));
@ -751,11 +751,11 @@ fn import_expr(p: &mut Parser) -> Option<Expr> {
/// Parse an include expression. /// Parse an include expression.
fn include_expr(p: &mut Parser) -> Option<Expr> { fn include_expr(p: &mut Parser) -> Option<Expr> {
let start = p.next_start(); let start = p.next_start();
p.assert(Token::Include); p.eat_assert(Token::Include);
expr(p).map(|path| { expr(p).map(|path| {
Expr::Include(IncludeExpr { Expr::Include(IncludeExpr {
span: p.span(start), span: p.span_from(start),
path: Box::new(path), path: Box::new(path),
}) })
}) })

View File

@ -1,5 +1,4 @@
use std::fmt::{self, Debug, Formatter}; use std::fmt::{self, Debug, Formatter};
use std::ops::Range;
use super::{TokenMode, Tokens}; use super::{TokenMode, Tokens};
use crate::diag::Error; use crate::diag::Error;
@ -22,9 +21,9 @@ pub struct Parser<'s> {
/// (Same as `next` except if we are at the end of group, then `None`). /// (Same as `next` except if we are at the end of group, then `None`).
peeked: Option<Token<'s>>, peeked: Option<Token<'s>>,
/// The end position of the last (non-whitespace if in code mode) token. /// The end position of the last (non-whitespace if in code mode) token.
prev_end: usize, prev_end: Pos,
/// The start position of the peeked token. /// The start position of the peeked token.
next_start: usize, next_start: Pos,
} }
/// A logical group of tokens, e.g. `[...]`. /// A logical group of tokens, e.g. `[...]`.
@ -32,7 +31,7 @@ pub struct Parser<'s> {
struct GroupEntry { struct GroupEntry {
/// The start position of the group. Used by `Parser::end_group` to return /// The start position of the group. Used by `Parser::end_group` to return
/// The group's full span. /// The group's full span.
pub start: usize, pub start: Pos,
/// The kind of group this is. This decides which tokens will end the group. /// The kind of group this is. This decides which tokens will end the group.
/// For example, a [`Group::Paren`] will be ended by /// For example, a [`Group::Paren`] will be ended by
/// [`Token::RightParen`]. /// [`Token::RightParen`].
@ -70,8 +69,8 @@ impl<'s> Parser<'s> {
groups: vec![], groups: vec![],
next, next,
peeked: next, peeked: next,
prev_end: 0, prev_end: Pos::ZERO,
next_start: 0, next_start: Pos::ZERO,
} }
} }
@ -80,37 +79,144 @@ impl<'s> Parser<'s> {
self.errors self.errors
} }
/// Add an error with location and message. /// Whether the end of the source string or group is reached.
pub fn error(&mut self, span: impl Into<Span>, message: impl Into<String>) { pub fn eof(&self) -> bool {
self.errors.push(Error::new(self.source.id(), span, message)); self.peek().is_none()
} }
/// Eat the next token and add an error that it is not the expected `thing`. /// Consume the next token.
pub fn expected(&mut self, what: &str) { pub fn eat(&mut self) -> Option<Token<'s>> {
let before = self.next_start(); let token = self.peek()?;
if let Some(found) = self.eat() { self.bump();
let after = self.prev_end(); Some(token)
self.error( }
before .. after,
format!("expected {}, found {}", what, found.name()), /// Eat the next token and return its source range.
); pub fn eat_span(&mut self) -> Span {
let start = self.next_start();
self.eat();
Span::new(start, self.prev_end())
}
/// Consume the next token if it is the given one.
pub fn eat_if(&mut self, t: Token) -> bool {
if self.peek() == Some(t) {
self.bump();
true
} else { } else {
self.expected_at(self.next_start(), what); false
} }
} }
/// Add an error that `what` was expected at the given position. /// Consume the next token if the closure maps it a to `Some`-variant.
pub fn expected_at(&mut self, pos: impl Into<Pos>, what: &str) { pub fn eat_map<T, F>(&mut self, f: F) -> Option<T>
self.error(pos.into(), format!("expected {}", what)); where
F: FnOnce(Token<'s>) -> Option<T>,
{
let token = self.peek()?;
let mapped = f(token);
if mapped.is_some() {
self.bump();
}
mapped
} }
/// Eat the next token and add an error that it is unexpected. /// Consume the next token if it is the given one and produce an error if
pub fn unexpected(&mut self) { /// not.
let before = self.next_start(); pub fn eat_expect(&mut self, t: Token) -> bool {
if let Some(found) = self.eat() { let eaten = self.eat_if(t);
let after = self.prev_end(); if !eaten {
self.error(before .. after, format!("unexpected {}", found.name())); self.expected_at(self.prev_end(), t.name());
} }
eaten
}
/// Consume the next token, debug-asserting that it is one of the given ones.
pub fn eat_assert(&mut self, t: Token) {
let next = self.eat();
debug_assert_eq!(next, Some(t));
}
/// Consume tokens while the condition is true.
pub fn eat_while<F>(&mut self, mut f: F)
where
F: FnMut(Token<'s>) -> bool,
{
while self.peek().map_or(false, |t| f(t)) {
self.eat();
}
}
/// Peek at the next token without consuming it.
pub fn peek(&self) -> Option<Token<'s>> {
self.peeked
}
/// Peek at the next token if it follows immediately after the last one
/// without any whitespace in between.
pub fn peek_direct(&self) -> Option<Token<'s>> {
if self.next_start() == self.prev_end() {
self.peeked
} else {
None
}
}
/// Peek at the span of the next token.
///
/// Has length zero if `peek()` returns `None`.
pub fn peek_span(&self) -> Span {
Span::new(self.next_start(), self.next_end())
}
/// Peek at the source of the next token.
pub fn peek_src(&self) -> &'s str {
self.get(self.peek_span())
}
/// Checks whether the next token fulfills a condition.
///
/// Returns `false` if there is no next token.
pub fn check<F>(&self, f: F) -> bool
where
F: FnOnce(Token<'s>) -> bool,
{
self.peek().map_or(false, f)
}
/// The byte position at which the last token ended.
///
/// Refers to the end of the last _non-whitespace_ token in code mode.
pub fn prev_end(&self) -> Pos {
self.prev_end.into()
}
/// The byte position at which the next token starts.
pub fn next_start(&self) -> Pos {
self.next_start.into()
}
/// The byte position at which the next token will end.
///
/// Is the same as [`next_start()`][Self::next_start] if `peek()` returns
/// `None`.
pub fn next_end(&self) -> Pos {
self.tokens.index().into()
}
/// The span from `start` to [`self.prev_end()`](Self::prev_end).
pub fn span_from(&self, start: Pos) -> Span {
Span::new(start, self.prev_end())
}
/// Determine the column index for the given byte position.
pub fn column(&self, pos: Pos) -> usize {
self.source.pos_to_column(pos).unwrap()
}
/// Slice out part of the source string.
pub fn get(&self, span: impl Into<Span>) -> &'s str {
self.tokens.scanner().get(span.into().to_range())
} }
/// Continue parsing in a group. /// Continue parsing in a group.
@ -131,9 +237,9 @@ impl<'s> Parser<'s> {
self.repeek(); self.repeek();
match kind { match kind {
Group::Paren => self.assert(Token::LeftParen), Group::Paren => self.eat_assert(Token::LeftParen),
Group::Bracket => self.assert(Token::LeftBracket), Group::Bracket => self.eat_assert(Token::LeftBracket),
Group::Brace => self.assert(Token::LeftBrace), Group::Brace => self.eat_assert(Token::LeftBrace),
Group::Stmt => {} Group::Stmt => {}
Group::Expr => {} Group::Expr => {}
Group::Imports => {} Group::Imports => {}
@ -171,7 +277,8 @@ impl<'s> Parser<'s> {
// Rescan the peeked token if the mode changed. // Rescan the peeked token if the mode changed.
if rescan { if rescan {
self.jump(self.prev_end()); self.tokens.jump(self.prev_end().to_usize());
self.bump();
} }
Span::new(group.start, self.prev_end()) Span::new(group.start, self.prev_end())
@ -188,163 +295,43 @@ impl<'s> Parser<'s> {
self.groups.last().map_or(TokenMode::Markup, |group| group.outer_mode) self.groups.last().map_or(TokenMode::Markup, |group| group.outer_mode)
} }
/// Whether the end of the source string or group is reached. /// Add an error with location and message.
pub fn eof(&self) -> bool { pub fn error(&mut self, span: impl Into<Span>, message: impl Into<String>) {
self.peek().is_none() self.errors.push(Error::new(self.source.id(), span, message));
} }
/// Peek at the next token without consuming it. /// Eat the next token and add an error that it is not the expected `thing`.
pub fn peek(&self) -> Option<Token<'s>> { pub fn expected(&mut self, what: &str) {
self.peeked let before = self.next_start();
} if let Some(found) = self.eat() {
let after = self.prev_end();
/// Peek at the next token if it follows immediately after the last one self.error(
/// without any whitespace in between. before .. after,
pub fn peek_direct(&self) -> Option<Token<'s>> { format!("expected {}, found {}", what, found.name()),
if self.next_start() == self.prev_end() { );
self.peeked
} else { } else {
None self.expected_at(self.next_start(), what);
} }
} }
/// Peek at the span of the next token. /// Add an error that `what` was expected at the given position.
/// pub fn expected_at(&mut self, pos: Pos, what: &str) {
/// Has length zero if `peek()` returns `None`. self.error(pos, format!("expected {}", what));
pub fn peek_span(&self) -> Span {
self.peek_range().into()
} }
/// Peek at the source of the next token. /// Eat the next token and add an error that it is unexpected.
pub fn peek_src(&self) -> &'s str { pub fn unexpected(&mut self) {
self.tokens.scanner().get(self.peek_range()) let before = self.next_start();
if let Some(found) = self.eat() {
let after = self.prev_end();
self.error(before .. after, format!("unexpected {}", found.name()));
} }
/// Peek at the source range (start and end index) of the next token.
pub fn peek_range(&self) -> Range<usize> {
self.next_start() .. self.next_end()
}
/// Checks whether the next token fulfills a condition.
///
/// Returns `false` if there is no next token.
pub fn check<F>(&self, f: F) -> bool
where
F: FnOnce(Token<'s>) -> bool,
{
self.peek().map_or(false, f)
}
/// Consume the next token.
pub fn eat(&mut self) -> Option<Token<'s>> {
let token = self.peek()?;
self.bump();
Some(token)
}
/// Consume the next token if it is the given one.
pub fn eat_if(&mut self, t: Token) -> bool {
if self.peek() == Some(t) {
self.bump();
true
} else {
false
}
}
/// Consume tokens while the condition is true.
pub fn eat_while<F>(&mut self, mut f: F)
where
F: FnMut(Token<'s>) -> bool,
{
while self.peek().map_or(false, |t| f(t)) {
self.eat();
}
}
/// Consume the next token if the closure maps it a to `Some`-variant.
pub fn eat_map<T, F>(&mut self, f: F) -> Option<T>
where
F: FnOnce(Token<'s>) -> Option<T>,
{
let token = self.peek()?;
let mapped = f(token);
if mapped.is_some() {
self.bump();
}
mapped
}
/// Eat the next token and return its source range.
pub fn eat_span(&mut self) -> Span {
let start = self.next_start();
self.eat();
Span::new(start, self.prev_end())
}
/// Consume the next token if it is the given one and produce an error if
/// not.
pub fn expect(&mut self, t: Token) -> bool {
let eaten = self.eat_if(t);
if !eaten {
self.expected_at(self.prev_end(), t.name());
}
eaten
}
/// Consume the next token, debug-asserting that it is one of the given ones.
pub fn assert(&mut self, t: Token) {
let next = self.eat();
debug_assert_eq!(next, Some(t));
}
/// The index at which the last token ended.
///
/// Refers to the end of the last _non-whitespace_ token in code mode.
pub fn prev_end(&self) -> usize {
self.prev_end
}
/// The index at which the next token starts.
pub fn next_start(&self) -> usize {
self.next_start
}
/// The index at which the next token will end.
///
/// Is the same as [`next_start()`][Self::next_start] if `peek()` returns
/// `None`.
pub fn next_end(&self) -> usize {
self.tokens.index()
}
/// Determine the column for the given index in the source.
pub fn column(&self, index: usize) -> usize {
self.source.pos_to_column(index.into()).unwrap()
}
/// The span from `start` to [`self.prev_end()`](Self::prev_end).
pub fn span(&self, start: impl Into<Pos>) -> Span {
Span::new(start, self.prev_end())
}
/// Return the source string from `start` to the end of the previous token.
pub fn eaten_from(&self, start: usize) -> &'s str {
self.tokens.scanner().get(start .. self.prev_end())
}
/// Jump to an index in the string.
///
/// You need to know the correct column.
fn jump(&mut self, index: usize) {
self.tokens.jump(index);
self.bump();
} }
/// Move to the next token. /// Move to the next token.
fn bump(&mut self) { fn bump(&mut self) {
self.prev_end = self.tokens.index(); self.prev_end = self.tokens.index().into();
self.next_start = self.tokens.index(); self.next_start = self.tokens.index().into();
self.next = self.tokens.next(); self.next = self.tokens.next();
if self.tokens.mode() == TokenMode::Code { if self.tokens.mode() == TokenMode::Code {
@ -355,7 +342,7 @@ impl<'s> Parser<'s> {
Some(Token::BlockComment(_)) => true, Some(Token::BlockComment(_)) => true,
_ => false, _ => false,
} { } {
self.next_start = self.tokens.index(); self.next_start = self.tokens.index().into();
self.next = self.tokens.next(); self.next = self.tokens.next();
} }
} }
@ -399,7 +386,7 @@ impl<'s> Parser<'s> {
impl Debug for Parser<'_> { impl Debug for Parser<'_> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let mut s = self.tokens.scanner(); let mut s = self.tokens.scanner();
s.jump(self.next_start()); s.jump(self.next_start().to_usize());
write!(f, "Parser({}|{})", s.eaten(), s.rest()) write!(f, "Parser({}|{})", s.eaten(), s.rest())
} }
} }

View File

@ -63,7 +63,7 @@ impl SourceStore {
io::Error::new(io::ErrorKind::InvalidData, "file is not valid utf-8") io::Error::new(io::ErrorKind::InvalidData, "file is not valid utf-8")
})?; })?;
Ok(self.insert(Some(hash), path, src)) Ok(self.insert(path, src, Some(hash)))
} }
/// Directly provide a source file. /// Directly provide a source file.
@ -82,16 +82,16 @@ impl SourceStore {
id id
} else { } else {
// Not loaded yet. // Not loaded yet.
self.insert(Some(hash), path, src) self.insert(path, src, Some(hash))
} }
} else { } else {
// Not known to the loader. // Not known to the loader.
self.insert(None, path, src) self.insert(path, src, None)
} }
} }
/// Insert a new source file. /// Insert a new source file.
fn insert(&mut self, hash: Option<FileHash>, path: &Path, src: String) -> SourceId { fn insert(&mut self, path: &Path, src: String, hash: Option<FileHash>) -> SourceId {
let id = SourceId(self.sources.len() as u32); let id = SourceId(self.sources.len() as u32);
if let Some(hash) = hash { if let Some(hash) = hash {
self.files.insert(hash, id); self.files.insert(hash, id);
@ -112,6 +112,9 @@ impl SourceStore {
} }
/// A single source file. /// A single source file.
///
/// _Note_: All line and column indices start at zero, just like byte indices.
/// Only for user-facing display, you should add 1 to them.
pub struct SourceFile { pub struct SourceFile {
id: SourceId, id: SourceId,
path: PathBuf, path: PathBuf,
@ -120,7 +123,8 @@ pub struct SourceFile {
} }
impl SourceFile { impl SourceFile {
fn new(id: SourceId, path: &Path, src: String) -> Self { /// Create a new source file.
pub fn new(id: SourceId, path: &Path, src: String) -> Self {
let mut line_starts = vec![Pos::ZERO]; let mut line_starts = vec![Pos::ZERO];
let mut s = Scanner::new(&src); let mut s = Scanner::new(&src);
@ -151,7 +155,7 @@ impl SourceFile {
self.id self.id
} }
/// The path to the source file. /// The normalized path to the source file.
pub fn path(&self) -> &Path { pub fn path(&self) -> &Path {
&self.path &self.path
} }
@ -161,6 +165,11 @@ impl SourceFile {
&self.src &self.src
} }
/// Slice out the part of the source code enclosed by the span.
pub fn get(&self, span: impl Into<Span>) -> Option<&str> {
self.src.get(span.into().to_range())
}
/// Get the length of the file in bytes. /// Get the length of the file in bytes.
pub fn len_bytes(&self) -> usize { pub fn len_bytes(&self) -> usize {
self.src.len() self.src.len()
@ -171,11 +180,6 @@ impl SourceFile {
self.line_starts.len() self.line_starts.len()
} }
/// Slice out the part of the source code enclosed by the span.
pub fn get(&self, span: Span) -> Option<&str> {
self.src.get(span.to_range())
}
/// Return the index of the line that contains the given byte position. /// Return the index of the line that contains the given byte position.
pub fn pos_to_line(&self, byte_pos: Pos) -> Option<usize> { pub fn pos_to_line(&self, byte_pos: Pos) -> Option<usize> {
(byte_pos.to_usize() <= self.src.len()).then(|| { (byte_pos.to_usize() <= self.src.len()).then(|| {
@ -186,14 +190,15 @@ impl SourceFile {
}) })
} }
/// Return the column of the byte index. /// Return the index of the column at the byte index.
/// ///
/// Tabs are counted as occupying two columns. /// The column is defined as the number of characters in the line before the
/// byte position.
pub fn pos_to_column(&self, byte_pos: Pos) -> Option<usize> { pub fn pos_to_column(&self, byte_pos: Pos) -> Option<usize> {
let line = self.pos_to_line(byte_pos)?; let line = self.pos_to_line(byte_pos)?;
let start = self.line_to_pos(line)?; let start = self.line_to_pos(line)?;
let head = self.get(Span::new(start, byte_pos))?; let head = self.get(Span::new(start, byte_pos))?;
Some(head.chars().map(width).sum()) Some(head.chars().count())
} }
/// Return the byte position at which the given line starts. /// Return the byte position at which the given line starts.
@ -210,30 +215,17 @@ impl SourceFile {
/// Return the byte position of the given (line, column) pair. /// Return the byte position of the given (line, column) pair.
/// ///
/// Tabs are counted as occupying two columns. /// The column defines the number of characters to go beyond the start of
/// the line.
pub fn line_column_to_pos(&self, line_idx: usize, column_idx: usize) -> Option<Pos> { pub fn line_column_to_pos(&self, line_idx: usize, column_idx: usize) -> Option<Pos> {
let span = self.line_to_span(line_idx)?; let span = self.line_to_span(line_idx)?;
let line = self.get(span)?; let line = self.get(span)?;
let mut chars = line.chars();
if column_idx == 0 { for _ in 0 .. column_idx {
return Some(span.start); chars.next();
} }
Some(span.start + (line.len() - chars.as_str().len()))
let mut column = 0;
for (i, c) in line.char_indices() {
column += width(c);
if column >= column_idx {
return Some(span.start + Pos::from(i + c.len_utf8()));
} }
}
None
}
}
/// The display width of the character.
fn width(c: char) -> usize {
if c == '\t' { 2 } else { 1 }
} }
impl AsRef<str> for SourceFile { impl AsRef<str> for SourceFile {
@ -256,14 +248,34 @@ impl<'a> Files<'a> for SourceStore {
Ok(self.get(id)) Ok(self.get(id))
} }
fn line_index( fn line_index(&'a self, id: SourceId, given: usize) -> Result<usize, files::Error> {
let source = self.get(id);
source
.pos_to_line(given.into())
.ok_or_else(|| files::Error::IndexTooLarge { given, max: source.len_bytes() })
}
fn line_range(
&'a self, &'a self,
id: SourceId, id: SourceId,
byte_index: usize, given: usize,
) -> Result<std::ops::Range<usize>, files::Error> {
let source = self.get(id);
source
.line_to_span(given)
.map(Span::to_range)
.ok_or_else(|| files::Error::LineTooLarge { given, max: source.len_lines() })
}
fn column_number(
&'a self,
id: SourceId,
_: usize,
given: usize,
) -> Result<usize, files::Error> { ) -> Result<usize, files::Error> {
let source = self.get(id); let source = self.get(id);
source.pos_to_line(byte_index.into()).ok_or_else(|| { source.pos_to_column(given.into()).ok_or_else(|| {
let (given, max) = (byte_index, source.len_bytes()); let max = source.len_bytes();
if given <= max { if given <= max {
files::Error::InvalidCharBoundary { given } files::Error::InvalidCharBoundary { given }
} else { } else {
@ -271,28 +283,13 @@ impl<'a> Files<'a> for SourceStore {
} }
}) })
} }
fn line_range(
&'a self,
id: SourceId,
line_index: usize,
) -> Result<std::ops::Range<usize>, files::Error> {
let source = self.get(id);
match source.line_to_span(line_index) {
Some(span) => Ok(span.to_range()),
None => Err(files::Error::LineTooLarge {
given: line_index,
max: source.len_lines(),
}),
}
}
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
const TEST: &str = "äbcde\nf💛g\r\nhi\rjkl"; const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
#[test] #[test]
fn test_source_file_new() { fn test_source_file_new() {
@ -313,6 +310,17 @@ mod tests {
assert_eq!(source.pos_to_line(Pos(22)), None); assert_eq!(source.pos_to_line(Pos(22)), None);
} }
#[test]
fn test_source_file_pos_to_column() {
let source = SourceFile::detached(TEST);
assert_eq!(source.pos_to_column(Pos(0)), Some(0));
assert_eq!(source.pos_to_column(Pos(2)), Some(1));
assert_eq!(source.pos_to_column(Pos(6)), Some(5));
assert_eq!(source.pos_to_column(Pos(7)), Some(0));
assert_eq!(source.pos_to_column(Pos(8)), Some(1));
assert_eq!(source.pos_to_column(Pos(12)), Some(2));
}
#[test] #[test]
fn test_source_file_roundtrip() { fn test_source_file_roundtrip() {
#[track_caller] #[track_caller]

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -8,4 +8,4 @@
--- ---
1. First. 1. First.
2. Second. 2. Second.
1. Back to first. 1. Back to first.

View File

@ -33,7 +33,7 @@ No = heading
indented. indented.
= This = This
is not. is not.
// Code blocks continue heading. // Code blocks continue heading.
= A { = A {

View File

@ -6,6 +6,12 @@ _Shopping list_
- Potatoes - Potatoes
- Juice - Juice
---
Tightly
- surrounded
- by two
paragraphs.
--- ---
- First level. - First level.
- Second level. - Second level.
@ -18,28 +24,26 @@ _Shopping list_
- At the top. - At the top.
--- ---
- Works - Level 1
- Also with four spaces - Level [
- Or two tabs 2 through template
]
--- ---
- Top-level indent - Top-level indent
- is fine. - is fine.
---
Tightly
- surrounded
- by two
paragraphs.
--- ---
- A - A
- B - B
- C - C
- D - D
--- ---
- Level 1 // This works because tabs are used consistently.
- Level [ - A with 1 tab
2 through template - B with 2 tabs
]
// This doesn't work because of mixed tabs and spaces.
- A with 2 spaces
- B with 2 tabs