Minor refactorings

- Reorder parser methods and use `Pos` everywhere
- Remove tab special handling for columns and adapt heading/list/enum indent handling
- Don't panic when a file has an empty path
This commit is contained in:
Laurenz 2021-08-10 11:28:12 +02:00
parent 3932bb2cb9
commit 8207c31aec
9 changed files with 292 additions and 294 deletions

View File

@ -23,7 +23,7 @@ pub use value::*;
use std::collections::HashMap;
use std::io;
use std::mem;
use std::path::{Path, PathBuf};
use std::path::PathBuf;
use std::rc::Rc;
use crate::diag::{Error, StrResult, Tracepoint, TypResult};
@ -107,7 +107,7 @@ impl<'a> EvalContext<'a> {
/// Process an import of a module relative to the current location.
pub fn import(&mut self, path: &str, span: Span) -> TypResult<SourceId> {
// Load the source file.
let full = self.relpath(path);
let full = self.make_path(path);
let id = self.sources.load(&full).map_err(|err| {
Error::boxed(self.source, span, match err.kind() {
io::ErrorKind::NotFound => "file not found".into(),
@ -157,15 +157,14 @@ impl<'a> EvalContext<'a> {
Ok(id)
}
/// Complete a path that is relative to the current file to be relative to
/// the environment's current directory.
pub fn relpath(&self, path: impl AsRef<Path>) -> PathBuf {
self.sources
.get(self.source)
.path()
.parent()
.expect("is a file")
.join(path)
/// Complete a user-entered path (relative to the source file) to be
/// relative to the compilation environment's root.
pub fn make_path(&self, path: &str) -> PathBuf {
if let Some(dir) = self.sources.get(self.source).path().parent() {
dir.join(path)
} else {
path.into()
}
}
}

View File

@ -15,7 +15,7 @@ pub fn image(ctx: &mut EvalContext, args: &mut FuncArgs) -> TypResult<Value> {
let width = args.named("width")?;
let height = args.named("height")?;
let full = ctx.relpath(path.v.as_str());
let full = ctx.make_path(&path.v);
let id = ctx.images.load(&full).map_err(|err| {
Error::boxed(args.source, path.span, match err.kind() {
io::ErrorKind::NotFound => "file not found".into(),

View File

@ -34,18 +34,16 @@ fn tree(p: &mut Parser) -> SyntaxTree {
tree_while(p, true, &mut |_| true)
}
/// Parse a syntax tree that stays right of the column at the start of the next
/// non-whitespace token.
fn tree_indented(p: &mut Parser) -> SyntaxTree {
/// Parse a syntax tree that stays right of the given column.
fn tree_indented(p: &mut Parser, column: usize) -> SyntaxTree {
p.eat_while(|t| match t {
Token::Space(n) => n == 0,
Token::LineComment(_) | Token::BlockComment(_) => true,
_ => false,
});
let column = p.column(p.next_start());
tree_while(p, false, &mut |p| match p.peek() {
Some(Token::Space(n)) if n >= 1 => p.column(p.next_end()) >= column,
Some(Token::Space(n)) if n >= 1 => p.column(p.next_end()) > column,
_ => true,
})
}
@ -68,7 +66,7 @@ where
let start = p.next_start();
let tree = tree_while(p, true, f);
call.args.items.push(CallArg::Pos(Expr::Template(TemplateExpr {
span: p.span(start),
span: p.span_from(start),
tree: Rc::new(tree),
})));
}
@ -189,7 +187,8 @@ fn raw(p: &mut Parser, token: RawToken) -> SyntaxNode {
/// Parse a heading.
fn heading(p: &mut Parser) -> SyntaxNode {
let start = p.next_start();
p.assert(Token::Eq);
let column = p.column(start);
p.eat_assert(Token::Eq);
// Count depth.
let mut level: usize = 1;
@ -198,28 +197,29 @@ fn heading(p: &mut Parser) -> SyntaxNode {
}
if level > 6 {
return SyntaxNode::Text(p.eaten_from(start).into());
return SyntaxNode::Text(p.get(start .. p.prev_end()).into());
}
let body = tree_indented(p);
SyntaxNode::Heading(HeadingNode { span: p.span(start), level, body })
let body = tree_indented(p, column);
SyntaxNode::Heading(HeadingNode { span: p.span_from(start), level, body })
}
/// Parse a single list item.
fn list_item(p: &mut Parser) -> SyntaxNode {
let start = p.next_start();
p.assert(Token::Hyph);
let body = tree_indented(p);
SyntaxNode::List(ListItem { span: p.span(start), body })
let column = p.column(start);
p.eat_assert(Token::Hyph);
let body = tree_indented(p, column);
SyntaxNode::List(ListItem { span: p.span_from(start), body })
}
/// Parse a single enum item.
fn enum_item(p: &mut Parser, number: Option<usize>) -> SyntaxNode {
let start = p.next_start();
p.assert(Token::Numbering(number));
let body = tree_indented(p);
SyntaxNode::Enum(EnumItem { span: p.span(start), number, body })
let column = p.column(start);
p.eat_assert(Token::Numbering(number));
let body = tree_indented(p, column);
SyntaxNode::Enum(EnumItem { span: p.span_from(start), number, body })
}
/// Parse an expression.
@ -240,7 +240,7 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> Option<Expr> {
Some(op) => {
let prec = op.precedence();
let expr = Box::new(expr_with(p, atomic, prec)?);
Expr::Unary(UnaryExpr { span: p.span(start), op, expr })
Expr::Unary(UnaryExpr { span: p.span_from(start), op, expr })
}
None => primary(p, atomic)?,
};
@ -529,7 +529,7 @@ fn block(p: &mut Parser, scoping: bool) -> Expr {
fn call(p: &mut Parser, callee: Expr) -> Option<Expr> {
let mut wide = p.eat_if(Token::Excl);
if wide && p.outer_mode() == TokenMode::Code {
let span = p.span(callee.span().start);
let span = p.span_from(callee.span().start);
p.error(span, "wide calls are only allowed directly in templates");
wide = false;
}
@ -552,7 +552,7 @@ fn call(p: &mut Parser, callee: Expr) -> Option<Expr> {
}
Some(Expr::Call(CallExpr {
span: p.span(callee.span().start),
span: p.span_from(callee.span().start),
callee: Box::new(callee),
wide,
args,
@ -571,7 +571,7 @@ fn args(p: &mut Parser) -> CallArgs {
fn with_expr(p: &mut Parser, callee: Expr) -> Option<Expr> {
if p.peek() == Some(Token::LeftParen) {
Some(Expr::With(WithExpr {
span: p.span(callee.span().start),
span: p.span_from(callee.span().start),
callee: Box::new(callee),
args: args(p),
}))
@ -584,7 +584,7 @@ fn with_expr(p: &mut Parser, callee: Expr) -> Option<Expr> {
/// Parse a let expression.
fn let_expr(p: &mut Parser) -> Option<Expr> {
let start = p.next_start();
p.assert(Token::Let);
p.eat_assert(Token::Let);
let mut let_expr = None;
if let Some(binding) = ident(p) {
@ -622,7 +622,7 @@ fn let_expr(p: &mut Parser) -> Option<Expr> {
}
let_expr = Some(Expr::Let(LetExpr {
span: p.span(start),
span: p.span_from(start),
binding,
init: init.map(Box::new),
}));
@ -634,7 +634,7 @@ fn let_expr(p: &mut Parser) -> Option<Expr> {
/// Parse an if expresion.
fn if_expr(p: &mut Parser) -> Option<Expr> {
let start = p.next_start();
p.assert(Token::If);
p.eat_assert(Token::If);
let mut if_expr = None;
if let Some(condition) = expr(p) {
@ -650,7 +650,7 @@ fn if_expr(p: &mut Parser) -> Option<Expr> {
}
if_expr = Some(Expr::If(IfExpr {
span: p.span(start),
span: p.span_from(start),
condition: Box::new(condition),
if_body: Box::new(if_body),
else_body: else_body.map(Box::new),
@ -664,13 +664,13 @@ fn if_expr(p: &mut Parser) -> Option<Expr> {
/// Parse a while expresion.
fn while_expr(p: &mut Parser) -> Option<Expr> {
let start = p.next_start();
p.assert(Token::While);
p.eat_assert(Token::While);
let mut while_expr = None;
if let Some(condition) = expr(p) {
if let Some(body) = body(p) {
while_expr = Some(Expr::While(WhileExpr {
span: p.span(start),
span: p.span_from(start),
condition: Box::new(condition),
body: Box::new(body),
}));
@ -683,15 +683,15 @@ fn while_expr(p: &mut Parser) -> Option<Expr> {
/// Parse a for expression.
fn for_expr(p: &mut Parser) -> Option<Expr> {
let start = p.next_start();
p.assert(Token::For);
p.eat_assert(Token::For);
let mut for_expr = None;
if let Some(pattern) = for_pattern(p) {
if p.expect(Token::In) {
if p.eat_expect(Token::In) {
if let Some(iter) = expr(p) {
if let Some(body) = body(p) {
for_expr = Some(Expr::For(ForExpr {
span: p.span(start),
span: p.span_from(start),
pattern,
iter: Box::new(iter),
body: Box::new(body),
@ -718,7 +718,7 @@ fn for_pattern(p: &mut Parser) -> Option<ForPattern> {
/// Parse an import expression.
fn import_expr(p: &mut Parser) -> Option<Expr> {
let start = p.next_start();
p.assert(Token::Import);
p.eat_assert(Token::Import);
let imports = if p.eat_if(Token::Star) {
// This is the wildcard scenario.
@ -735,10 +735,10 @@ fn import_expr(p: &mut Parser) -> Option<Expr> {
};
let mut import_expr = None;
if p.expect(Token::From) {
if p.eat_expect(Token::From) {
if let Some(path) = expr(p) {
import_expr = Some(Expr::Import(ImportExpr {
span: p.span(start),
span: p.span_from(start),
imports,
path: Box::new(path),
}));
@ -751,11 +751,11 @@ fn import_expr(p: &mut Parser) -> Option<Expr> {
/// Parse an include expression.
fn include_expr(p: &mut Parser) -> Option<Expr> {
let start = p.next_start();
p.assert(Token::Include);
p.eat_assert(Token::Include);
expr(p).map(|path| {
Expr::Include(IncludeExpr {
span: p.span(start),
span: p.span_from(start),
path: Box::new(path),
})
})

View File

@ -1,5 +1,4 @@
use std::fmt::{self, Debug, Formatter};
use std::ops::Range;
use super::{TokenMode, Tokens};
use crate::diag::Error;
@ -22,9 +21,9 @@ pub struct Parser<'s> {
/// (Same as `next` except if we are at the end of group, then `None`).
peeked: Option<Token<'s>>,
/// The end position of the last (non-whitespace if in code mode) token.
prev_end: usize,
prev_end: Pos,
/// The start position of the peeked token.
next_start: usize,
next_start: Pos,
}
/// A logical group of tokens, e.g. `[...]`.
@ -32,7 +31,7 @@ pub struct Parser<'s> {
struct GroupEntry {
/// The start position of the group. Used by `Parser::end_group` to return
/// The group's full span.
pub start: usize,
pub start: Pos,
/// The kind of group this is. This decides which tokens will end the group.
/// For example, a [`Group::Paren`] will be ended by
/// [`Token::RightParen`].
@ -70,8 +69,8 @@ impl<'s> Parser<'s> {
groups: vec![],
next,
peeked: next,
prev_end: 0,
next_start: 0,
prev_end: Pos::ZERO,
next_start: Pos::ZERO,
}
}
@ -80,37 +79,144 @@ impl<'s> Parser<'s> {
self.errors
}
/// Add an error with location and message.
pub fn error(&mut self, span: impl Into<Span>, message: impl Into<String>) {
self.errors.push(Error::new(self.source.id(), span, message));
/// Whether the end of the source string or group is reached.
pub fn eof(&self) -> bool {
self.peek().is_none()
}
/// Eat the next token and add an error that it is not the expected `thing`.
pub fn expected(&mut self, what: &str) {
let before = self.next_start();
if let Some(found) = self.eat() {
let after = self.prev_end();
self.error(
before .. after,
format!("expected {}, found {}", what, found.name()),
);
/// Consume the next token.
pub fn eat(&mut self) -> Option<Token<'s>> {
let token = self.peek()?;
self.bump();
Some(token)
}
/// Eat the next token and return its source range.
pub fn eat_span(&mut self) -> Span {
let start = self.next_start();
self.eat();
Span::new(start, self.prev_end())
}
/// Consume the next token if it is the given one.
pub fn eat_if(&mut self, t: Token) -> bool {
if self.peek() == Some(t) {
self.bump();
true
} else {
self.expected_at(self.next_start(), what);
false
}
}
/// Add an error that `what` was expected at the given position.
pub fn expected_at(&mut self, pos: impl Into<Pos>, what: &str) {
self.error(pos.into(), format!("expected {}", what));
/// Consume the next token if the closure maps it a to `Some`-variant.
pub fn eat_map<T, F>(&mut self, f: F) -> Option<T>
where
F: FnOnce(Token<'s>) -> Option<T>,
{
let token = self.peek()?;
let mapped = f(token);
if mapped.is_some() {
self.bump();
}
mapped
}
/// Eat the next token and add an error that it is unexpected.
pub fn unexpected(&mut self) {
let before = self.next_start();
if let Some(found) = self.eat() {
let after = self.prev_end();
self.error(before .. after, format!("unexpected {}", found.name()));
/// Consume the next token if it is the given one and produce an error if
/// not.
pub fn eat_expect(&mut self, t: Token) -> bool {
let eaten = self.eat_if(t);
if !eaten {
self.expected_at(self.prev_end(), t.name());
}
eaten
}
/// Consume the next token, debug-asserting that it is one of the given ones.
pub fn eat_assert(&mut self, t: Token) {
let next = self.eat();
debug_assert_eq!(next, Some(t));
}
/// Consume tokens while the condition is true.
pub fn eat_while<F>(&mut self, mut f: F)
where
F: FnMut(Token<'s>) -> bool,
{
while self.peek().map_or(false, |t| f(t)) {
self.eat();
}
}
/// Peek at the next token without consuming it.
pub fn peek(&self) -> Option<Token<'s>> {
self.peeked
}
/// Peek at the next token if it follows immediately after the last one
/// without any whitespace in between.
pub fn peek_direct(&self) -> Option<Token<'s>> {
if self.next_start() == self.prev_end() {
self.peeked
} else {
None
}
}
/// Peek at the span of the next token.
///
/// Has length zero if `peek()` returns `None`.
pub fn peek_span(&self) -> Span {
Span::new(self.next_start(), self.next_end())
}
/// Peek at the source of the next token.
pub fn peek_src(&self) -> &'s str {
self.get(self.peek_span())
}
/// Checks whether the next token fulfills a condition.
///
/// Returns `false` if there is no next token.
pub fn check<F>(&self, f: F) -> bool
where
F: FnOnce(Token<'s>) -> bool,
{
self.peek().map_or(false, f)
}
/// The byte position at which the last token ended.
///
/// Refers to the end of the last _non-whitespace_ token in code mode.
pub fn prev_end(&self) -> Pos {
self.prev_end.into()
}
/// The byte position at which the next token starts.
pub fn next_start(&self) -> Pos {
self.next_start.into()
}
/// The byte position at which the next token will end.
///
/// Is the same as [`next_start()`][Self::next_start] if `peek()` returns
/// `None`.
pub fn next_end(&self) -> Pos {
self.tokens.index().into()
}
/// The span from `start` to [`self.prev_end()`](Self::prev_end).
pub fn span_from(&self, start: Pos) -> Span {
Span::new(start, self.prev_end())
}
/// Determine the column index for the given byte position.
pub fn column(&self, pos: Pos) -> usize {
self.source.pos_to_column(pos).unwrap()
}
/// Slice out part of the source string.
pub fn get(&self, span: impl Into<Span>) -> &'s str {
self.tokens.scanner().get(span.into().to_range())
}
/// Continue parsing in a group.
@ -131,9 +237,9 @@ impl<'s> Parser<'s> {
self.repeek();
match kind {
Group::Paren => self.assert(Token::LeftParen),
Group::Bracket => self.assert(Token::LeftBracket),
Group::Brace => self.assert(Token::LeftBrace),
Group::Paren => self.eat_assert(Token::LeftParen),
Group::Bracket => self.eat_assert(Token::LeftBracket),
Group::Brace => self.eat_assert(Token::LeftBrace),
Group::Stmt => {}
Group::Expr => {}
Group::Imports => {}
@ -171,7 +277,8 @@ impl<'s> Parser<'s> {
// Rescan the peeked token if the mode changed.
if rescan {
self.jump(self.prev_end());
self.tokens.jump(self.prev_end().to_usize());
self.bump();
}
Span::new(group.start, self.prev_end())
@ -188,163 +295,43 @@ impl<'s> Parser<'s> {
self.groups.last().map_or(TokenMode::Markup, |group| group.outer_mode)
}
/// Whether the end of the source string or group is reached.
pub fn eof(&self) -> bool {
self.peek().is_none()
/// Add an error with location and message.
pub fn error(&mut self, span: impl Into<Span>, message: impl Into<String>) {
self.errors.push(Error::new(self.source.id(), span, message));
}
/// Peek at the next token without consuming it.
pub fn peek(&self) -> Option<Token<'s>> {
self.peeked
}
/// Peek at the next token if it follows immediately after the last one
/// without any whitespace in between.
pub fn peek_direct(&self) -> Option<Token<'s>> {
if self.next_start() == self.prev_end() {
self.peeked
/// Eat the next token and add an error that it is not the expected `thing`.
pub fn expected(&mut self, what: &str) {
let before = self.next_start();
if let Some(found) = self.eat() {
let after = self.prev_end();
self.error(
before .. after,
format!("expected {}, found {}", what, found.name()),
);
} else {
None
self.expected_at(self.next_start(), what);
}
}
/// Peek at the span of the next token.
///
/// Has length zero if `peek()` returns `None`.
pub fn peek_span(&self) -> Span {
self.peek_range().into()
/// Add an error that `what` was expected at the given position.
pub fn expected_at(&mut self, pos: Pos, what: &str) {
self.error(pos, format!("expected {}", what));
}
/// Peek at the source of the next token.
pub fn peek_src(&self) -> &'s str {
self.tokens.scanner().get(self.peek_range())
}
/// Peek at the source range (start and end index) of the next token.
pub fn peek_range(&self) -> Range<usize> {
self.next_start() .. self.next_end()
}
/// Checks whether the next token fulfills a condition.
///
/// Returns `false` if there is no next token.
pub fn check<F>(&self, f: F) -> bool
where
F: FnOnce(Token<'s>) -> bool,
{
self.peek().map_or(false, f)
}
/// Consume the next token.
pub fn eat(&mut self) -> Option<Token<'s>> {
let token = self.peek()?;
self.bump();
Some(token)
}
/// Consume the next token if it is the given one.
pub fn eat_if(&mut self, t: Token) -> bool {
if self.peek() == Some(t) {
self.bump();
true
} else {
false
/// Eat the next token and add an error that it is unexpected.
pub fn unexpected(&mut self) {
let before = self.next_start();
if let Some(found) = self.eat() {
let after = self.prev_end();
self.error(before .. after, format!("unexpected {}", found.name()));
}
}
/// Consume tokens while the condition is true.
pub fn eat_while<F>(&mut self, mut f: F)
where
F: FnMut(Token<'s>) -> bool,
{
while self.peek().map_or(false, |t| f(t)) {
self.eat();
}
}
/// Consume the next token if the closure maps it a to `Some`-variant.
pub fn eat_map<T, F>(&mut self, f: F) -> Option<T>
where
F: FnOnce(Token<'s>) -> Option<T>,
{
let token = self.peek()?;
let mapped = f(token);
if mapped.is_some() {
self.bump();
}
mapped
}
/// Eat the next token and return its source range.
pub fn eat_span(&mut self) -> Span {
let start = self.next_start();
self.eat();
Span::new(start, self.prev_end())
}
/// Consume the next token if it is the given one and produce an error if
/// not.
pub fn expect(&mut self, t: Token) -> bool {
let eaten = self.eat_if(t);
if !eaten {
self.expected_at(self.prev_end(), t.name());
}
eaten
}
/// Consume the next token, debug-asserting that it is one of the given ones.
pub fn assert(&mut self, t: Token) {
let next = self.eat();
debug_assert_eq!(next, Some(t));
}
/// The index at which the last token ended.
///
/// Refers to the end of the last _non-whitespace_ token in code mode.
pub fn prev_end(&self) -> usize {
self.prev_end
}
/// The index at which the next token starts.
pub fn next_start(&self) -> usize {
self.next_start
}
/// The index at which the next token will end.
///
/// Is the same as [`next_start()`][Self::next_start] if `peek()` returns
/// `None`.
pub fn next_end(&self) -> usize {
self.tokens.index()
}
/// Determine the column for the given index in the source.
pub fn column(&self, index: usize) -> usize {
self.source.pos_to_column(index.into()).unwrap()
}
/// The span from `start` to [`self.prev_end()`](Self::prev_end).
pub fn span(&self, start: impl Into<Pos>) -> Span {
Span::new(start, self.prev_end())
}
/// Return the source string from `start` to the end of the previous token.
pub fn eaten_from(&self, start: usize) -> &'s str {
self.tokens.scanner().get(start .. self.prev_end())
}
/// Jump to an index in the string.
///
/// You need to know the correct column.
fn jump(&mut self, index: usize) {
self.tokens.jump(index);
self.bump();
}
/// Move to the next token.
fn bump(&mut self) {
self.prev_end = self.tokens.index();
self.next_start = self.tokens.index();
self.prev_end = self.tokens.index().into();
self.next_start = self.tokens.index().into();
self.next = self.tokens.next();
if self.tokens.mode() == TokenMode::Code {
@ -355,7 +342,7 @@ impl<'s> Parser<'s> {
Some(Token::BlockComment(_)) => true,
_ => false,
} {
self.next_start = self.tokens.index();
self.next_start = self.tokens.index().into();
self.next = self.tokens.next();
}
}
@ -399,7 +386,7 @@ impl<'s> Parser<'s> {
impl Debug for Parser<'_> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let mut s = self.tokens.scanner();
s.jump(self.next_start());
s.jump(self.next_start().to_usize());
write!(f, "Parser({}|{})", s.eaten(), s.rest())
}
}

View File

@ -63,7 +63,7 @@ impl SourceStore {
io::Error::new(io::ErrorKind::InvalidData, "file is not valid utf-8")
})?;
Ok(self.insert(Some(hash), path, src))
Ok(self.insert(path, src, Some(hash)))
}
/// Directly provide a source file.
@ -82,16 +82,16 @@ impl SourceStore {
id
} else {
// Not loaded yet.
self.insert(Some(hash), path, src)
self.insert(path, src, Some(hash))
}
} else {
// Not known to the loader.
self.insert(None, path, src)
self.insert(path, src, None)
}
}
/// Insert a new source file.
fn insert(&mut self, hash: Option<FileHash>, path: &Path, src: String) -> SourceId {
fn insert(&mut self, path: &Path, src: String, hash: Option<FileHash>) -> SourceId {
let id = SourceId(self.sources.len() as u32);
if let Some(hash) = hash {
self.files.insert(hash, id);
@ -112,6 +112,9 @@ impl SourceStore {
}
/// A single source file.
///
/// _Note_: All line and column indices start at zero, just like byte indices.
/// Only for user-facing display, you should add 1 to them.
pub struct SourceFile {
id: SourceId,
path: PathBuf,
@ -120,7 +123,8 @@ pub struct SourceFile {
}
impl SourceFile {
fn new(id: SourceId, path: &Path, src: String) -> Self {
/// Create a new source file.
pub fn new(id: SourceId, path: &Path, src: String) -> Self {
let mut line_starts = vec![Pos::ZERO];
let mut s = Scanner::new(&src);
@ -151,7 +155,7 @@ impl SourceFile {
self.id
}
/// The path to the source file.
/// The normalized path to the source file.
pub fn path(&self) -> &Path {
&self.path
}
@ -161,6 +165,11 @@ impl SourceFile {
&self.src
}
/// Slice out the part of the source code enclosed by the span.
pub fn get(&self, span: impl Into<Span>) -> Option<&str> {
self.src.get(span.into().to_range())
}
/// Get the length of the file in bytes.
pub fn len_bytes(&self) -> usize {
self.src.len()
@ -171,11 +180,6 @@ impl SourceFile {
self.line_starts.len()
}
/// Slice out the part of the source code enclosed by the span.
pub fn get(&self, span: Span) -> Option<&str> {
self.src.get(span.to_range())
}
/// Return the index of the line that contains the given byte position.
pub fn pos_to_line(&self, byte_pos: Pos) -> Option<usize> {
(byte_pos.to_usize() <= self.src.len()).then(|| {
@ -186,14 +190,15 @@ impl SourceFile {
})
}
/// Return the column of the byte index.
/// Return the index of the column at the byte index.
///
/// Tabs are counted as occupying two columns.
/// The column is defined as the number of characters in the line before the
/// byte position.
pub fn pos_to_column(&self, byte_pos: Pos) -> Option<usize> {
let line = self.pos_to_line(byte_pos)?;
let start = self.line_to_pos(line)?;
let head = self.get(Span::new(start, byte_pos))?;
Some(head.chars().map(width).sum())
Some(head.chars().count())
}
/// Return the byte position at which the given line starts.
@ -210,32 +215,19 @@ impl SourceFile {
/// Return the byte position of the given (line, column) pair.
///
/// Tabs are counted as occupying two columns.
/// The column defines the number of characters to go beyond the start of
/// the line.
pub fn line_column_to_pos(&self, line_idx: usize, column_idx: usize) -> Option<Pos> {
let span = self.line_to_span(line_idx)?;
let line = self.get(span)?;
if column_idx == 0 {
return Some(span.start);
let mut chars = line.chars();
for _ in 0 .. column_idx {
chars.next();
}
let mut column = 0;
for (i, c) in line.char_indices() {
column += width(c);
if column >= column_idx {
return Some(span.start + Pos::from(i + c.len_utf8()));
}
}
None
Some(span.start + (line.len() - chars.as_str().len()))
}
}
/// The display width of the character.
fn width(c: char) -> usize {
if c == '\t' { 2 } else { 1 }
}
impl AsRef<str> for SourceFile {
fn as_ref(&self) -> &str {
&self.src
@ -256,14 +248,34 @@ impl<'a> Files<'a> for SourceStore {
Ok(self.get(id))
}
fn line_index(
fn line_index(&'a self, id: SourceId, given: usize) -> Result<usize, files::Error> {
let source = self.get(id);
source
.pos_to_line(given.into())
.ok_or_else(|| files::Error::IndexTooLarge { given, max: source.len_bytes() })
}
fn line_range(
&'a self,
id: SourceId,
byte_index: usize,
given: usize,
) -> Result<std::ops::Range<usize>, files::Error> {
let source = self.get(id);
source
.line_to_span(given)
.map(Span::to_range)
.ok_or_else(|| files::Error::LineTooLarge { given, max: source.len_lines() })
}
fn column_number(
&'a self,
id: SourceId,
_: usize,
given: usize,
) -> Result<usize, files::Error> {
let source = self.get(id);
source.pos_to_line(byte_index.into()).ok_or_else(|| {
let (given, max) = (byte_index, source.len_bytes());
source.pos_to_column(given.into()).ok_or_else(|| {
let max = source.len_bytes();
if given <= max {
files::Error::InvalidCharBoundary { given }
} else {
@ -271,28 +283,13 @@ impl<'a> Files<'a> for SourceStore {
}
})
}
fn line_range(
&'a self,
id: SourceId,
line_index: usize,
) -> Result<std::ops::Range<usize>, files::Error> {
let source = self.get(id);
match source.line_to_span(line_index) {
Some(span) => Ok(span.to_range()),
None => Err(files::Error::LineTooLarge {
given: line_index,
max: source.len_lines(),
}),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
const TEST: &str = "äbcde\nf💛g\r\nhi\rjkl";
const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
#[test]
fn test_source_file_new() {
@ -313,6 +310,17 @@ mod tests {
assert_eq!(source.pos_to_line(Pos(22)), None);
}
#[test]
fn test_source_file_pos_to_column() {
let source = SourceFile::detached(TEST);
assert_eq!(source.pos_to_column(Pos(0)), Some(0));
assert_eq!(source.pos_to_column(Pos(2)), Some(1));
assert_eq!(source.pos_to_column(Pos(6)), Some(5));
assert_eq!(source.pos_to_column(Pos(7)), Some(0));
assert_eq!(source.pos_to_column(Pos(8)), Some(1));
assert_eq!(source.pos_to_column(Pos(12)), Some(2));
}
#[test]
fn test_source_file_roundtrip() {
#[track_caller]

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -8,4 +8,4 @@
---
1. First.
2. Second.
1. Back to first.
1. Back to first.

View File

@ -33,7 +33,7 @@ No = heading
indented.
= This
is not.
is not.
// Code blocks continue heading.
= A {

View File

@ -6,6 +6,12 @@ _Shopping list_
- Potatoes
- Juice
---
Tightly
- surrounded
- by two
paragraphs.
---
- First level.
- Second level.
@ -18,28 +24,26 @@ _Shopping list_
- At the top.
---
- Works
- Also with four spaces
- Or two tabs
- Level 1
- Level [
2 through template
]
---
- Top-level indent
- is fine.
---
Tightly
- surrounded
- by two
paragraphs.
---
- A
- B
- C
- D
- D
---
- Level 1
- Level [
2 through template
]
// This works because tabs are used consistently.
- A with 1 tab
- B with 2 tabs
// This doesn't work because of mixed tabs and spaces.
- A with 2 spaces
- B with 2 tabs