Pretty-printed diagnostics with traceback

This commit is contained in:
Laurenz 2021-07-31 22:59:14 +02:00
parent fbd3d19113
commit 3c92bad9a7
25 changed files with 572 additions and 386 deletions

View File

@ -6,7 +6,7 @@ edition = "2018"
[features]
default = ["cli", "fs", "layout-cache"]
cli = ["anyhow", "fs", "same-file"]
cli = ["anyhow", "codespan-reporting", "fs", "same-file"]
fs = ["dirs", "memmap2", "same-file", "walkdir"]
layout-cache = []
@ -34,6 +34,7 @@ unicode-bidi = "0.3.5"
unicode-xid = "0.2"
xi-unicode = "0.3"
anyhow = { version = "1", optional = true }
codespan-reporting = { version = "0.11", optional = true }
dirs = { version = "3", optional = true }
memmap2 = { version = "0.3", optional = true }
same-file = { version = "1", optional = true }

View File

@ -4,12 +4,14 @@ use std::rc::Rc;
use criterion::{criterion_group, criterion_main, Criterion};
use typst::diag::TypResult;
use typst::eval::{eval, Module};
use typst::exec::exec;
use typst::export::pdf;
use typst::layout::{layout, Frame, LayoutTree};
use typst::loading::{FileId, FsLoader};
use typst::loading::FsLoader;
use typst::parse::parse;
use typst::source::SourceFile;
use typst::syntax::SyntaxTree;
use typst::Context;
@ -26,7 +28,8 @@ fn benchmarks(c: &mut Criterion) {
let name = path.file_stem().unwrap().to_string_lossy();
let file = loader.resolve(&path).unwrap();
let src = std::fs::read_to_string(&path).unwrap();
let case = Case::new(file, src, ctx.clone());
let source = SourceFile::new(file, src);
let case = Case::new(ctx.clone(), source);
macro_rules! bench {
($step:literal, setup = |$ctx:ident| $setup:expr, code = $code:expr $(,)?) => {
@ -79,8 +82,7 @@ fn benchmarks(c: &mut Criterion) {
/// A test case with prepared intermediate results.
struct Case {
ctx: Rc<RefCell<Context>>,
file: FileId,
src: String,
source: SourceFile,
ast: Rc<SyntaxTree>,
module: Module,
tree: LayoutTree,
@ -88,30 +90,26 @@ struct Case {
}
impl Case {
fn new(file: FileId, src: String, ctx: Rc<RefCell<Context>>) -> Self {
fn new(ctx: Rc<RefCell<Context>>, source: SourceFile) -> Self {
let mut borrowed = ctx.borrow_mut();
let ast = Rc::new(parse(file, &src).unwrap());
let module = eval(&mut borrowed, file, Rc::clone(&ast)).unwrap();
let ast = Rc::new(parse(&source).unwrap());
let module = eval(&mut borrowed, source.file(), Rc::clone(&ast)).unwrap();
let tree = exec(&mut borrowed, &module.template);
let frames = layout(&mut borrowed, &tree);
drop(borrowed);
Self {
ctx,
file,
src,
ast,
module,
tree,
frames,
}
Self { ctx, source, ast, module, tree, frames }
}
fn parse(&self) -> SyntaxTree {
parse(self.file, &self.src).unwrap()
parse(&self.source).unwrap()
}
fn eval(&self) -> Module {
eval(&mut self.ctx.borrow_mut(), self.file, Rc::clone(&self.ast)).unwrap()
fn eval(&self) -> TypResult<Module> {
eval(
&mut self.ctx.borrow_mut(),
self.source.file(),
Rc::clone(&self.ast),
)
}
fn exec(&self) -> LayoutTree {
@ -122,8 +120,8 @@ impl Case {
layout(&mut self.ctx.borrow_mut(), &self.tree)
}
fn typeset(&self) -> Vec<Rc<Frame>> {
self.ctx.borrow_mut().typeset(self.file, &self.src).unwrap()
fn typeset(&self) -> TypResult<Vec<Rc<Frame>>> {
self.ctx.borrow_mut().typeset(&self.source)
}
fn pdf(&self) -> Vec<u8> {

View File

@ -3,6 +3,7 @@ use iai::{black_box, main};
use typst::diag::TypResult;
use typst::loading::FileId;
use typst::parse::{parse, Scanner, TokenMode, Tokens};
use typst::source::SourceFile;
use typst::syntax::SyntaxTree;
const SRC: &str = include_str!("../../tests/typ/coma.typ");
@ -32,7 +33,8 @@ fn bench_tokenize() -> usize {
}
fn bench_parse() -> TypResult<SyntaxTree> {
parse(FileId::from_raw(0), black_box(SRC))
let source = SourceFile::new(FileId::from_raw(0), black_box(SRC).into());
parse(&source)
}
main!(bench_decode, bench_scan, bench_tokenize, bench_parse);

View File

@ -16,10 +16,21 @@ pub type StrResult<T> = Result<T, String>;
pub struct Error {
/// The file that contains the error.
pub file: FileId,
/// The erronous location in the source code.
/// The erroneous location in the source code.
pub span: Span,
/// A diagnostic message describing the problem.
pub message: String,
/// The trace of function calls leading to the error.
pub trace: Vec<(FileId, Span, Tracepoint)>,
}
/// A part of an error's [trace](Error::trace).
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
pub enum Tracepoint {
/// A function call.
Call(Option<String>),
/// A module import.
Import,
}
impl Error {
@ -28,6 +39,7 @@ impl Error {
Self {
file,
span: span.into(),
trace: vec![],
message: message.into(),
}
}

View File

@ -43,8 +43,8 @@ impl<'ast> Visit<'ast> for CapturesVisitor<'_> {
}
}
fn visit_binding(&mut self, id: &'ast Ident) {
self.internal.def_mut(id.as_str(), Value::None);
fn visit_binding(&mut self, ident: &'ast Ident) {
self.internal.def_mut(ident.as_str(), Value::None);
}
fn visit_enter(&mut self) {

View File

@ -25,11 +25,12 @@ use std::mem;
use std::path::Path;
use std::rc::Rc;
use crate::diag::{Error, StrResult, TypResult};
use crate::diag::{Error, StrResult, Tracepoint, TypResult};
use crate::geom::{Angle, Fractional, Length, Relative};
use crate::image::ImageCache;
use crate::loading::{FileId, Loader};
use crate::parse::parse;
use crate::source::{SourceFile, SourceMap};
use crate::syntax::visit::Visit;
use crate::syntax::*;
use crate::util::EcoString;
@ -67,6 +68,8 @@ pub trait Eval {
pub struct EvalContext<'a> {
/// The loader from which resources (files and images) are loaded.
pub loader: &'a dyn Loader,
/// The store for source files.
pub sources: &'a mut SourceMap,
/// The cache for decoded images.
pub images: &'a mut ImageCache,
/// The cache for loaded modules.
@ -86,6 +89,7 @@ impl<'a> EvalContext<'a> {
pub fn new(ctx: &'a mut Context, file: FileId) -> Self {
Self {
loader: ctx.loader.as_ref(),
sources: &mut ctx.sources,
images: &mut ctx.images,
modules: &mut ctx.modules,
scopes: Scopes::new(Some(&ctx.std)),
@ -106,49 +110,58 @@ impl<'a> EvalContext<'a> {
/// Process an import of a module relative to the current location.
pub fn import(&mut self, path: &str, span: Span) -> TypResult<FileId> {
let id = self.resolve(path, span)?;
let file = self.resolve(path, span)?;
// Prevent cyclic importing.
if self.file == id || self.route.contains(&id) {
if self.file == file || self.route.contains(&file) {
bail!(self.file, span, "cyclic import");
}
// Check whether the module was already loaded.
if self.modules.get(&id).is_some() {
return Ok(id);
if self.modules.get(&file).is_some() {
return Ok(file);
}
// Load the source file.
let buffer = self
.loader
.load_file(id)
.load_file(file)
.map_err(|_| Error::boxed(self.file, span, "failed to load file"))?;
// Decode UTF-8.
let string = std::str::from_utf8(&buffer)
let string = String::from_utf8(buffer)
.map_err(|_| Error::boxed(self.file, span, "file is not valid utf-8"))?;
// Parse the file.
let ast = parse(id, string)?;
let source = self.sources.insert(SourceFile::new(file, string));
let ast = parse(&source)?;
// Prepare the new context.
let new_scopes = Scopes::new(self.scopes.base);
let old_scopes = mem::replace(&mut self.scopes, new_scopes);
self.route.push(self.file);
self.file = id;
self.file = file;
// Evaluate the module.
let template = Rc::new(ast).eval(self)?;
let result = Rc::new(ast).eval(self);
// Restore the old context.
let new_scopes = mem::replace(&mut self.scopes, old_scopes);
self.file = self.route.pop().unwrap();
// Add a tracepoint to the errors.
let template = result.map_err(|mut errors| {
for error in errors.iter_mut() {
error.trace.push((self.file, span, Tracepoint::Import));
}
errors
})?;
// Save the evaluated module.
let module = Module { scope: new_scopes.top, template };
self.modules.insert(id, module);
self.modules.insert(file, module);
Ok(id)
Ok(file)
}
}
@ -399,7 +412,22 @@ impl Eval for CallExpr {
.map_err(Error::partial(ctx.file, self.callee.span()))?;
let mut args = self.args.eval(ctx)?;
let returned = callee(ctx, &mut args)?;
let returned = callee(ctx, &mut args).map_err(|mut errors| {
for error in errors.iter_mut() {
// Skip errors directly related to arguments.
if error.file == ctx.file && self.span.contains(error.span) {
continue;
}
error.trace.push((
ctx.file,
self.span,
Tracepoint::Call(callee.name().map(Into::into)),
));
}
errors
})?;
args.finish()?;
Ok(returned)
@ -445,6 +473,7 @@ impl Eval for ClosureExpr {
type Output = Value;
fn eval(&self, ctx: &mut EvalContext) -> TypResult<Self::Output> {
let file = ctx.file;
let params = Rc::clone(&self.params);
let body = Rc::clone(&self.body);
@ -459,7 +488,8 @@ impl Eval for ClosureExpr {
let func = Function::new(name, move |ctx, args| {
// Don't leak the scopes from the call site. Instead, we use the
// scope of captured variables we collected earlier.
let prev = mem::take(&mut ctx.scopes);
let prev_scopes = mem::take(&mut ctx.scopes);
let prev_file = mem::replace(&mut ctx.file, file);
ctx.scopes.top = captured.clone();
for param in params.iter() {
@ -468,7 +498,8 @@ impl Eval for ClosureExpr {
}
let result = body.eval(ctx);
ctx.scopes = prev;
ctx.scopes = prev_scopes;
ctx.file = prev_file;
result
});
@ -630,8 +661,8 @@ impl Eval for ImportExpr {
.cast::<EcoString>()
.map_err(Error::partial(ctx.file, self.path.span()))?;
let id = ctx.import(&path, self.path.span())?;
let module = &ctx.modules[&id];
let file = ctx.import(&path, self.path.span())?;
let module = &ctx.modules[&file];
match &self.imports {
Imports::Wildcard => {
@ -664,8 +695,8 @@ impl Eval for IncludeExpr {
.cast::<EcoString>()
.map_err(Error::partial(ctx.file, self.path.span()))?;
let id = ctx.import(&path, self.path.span())?;
let module = &ctx.modules[&id];
let file = ctx.import(&path, self.path.span())?;
let module = &ctx.modules[&file];
Ok(Value::Template(module.template.clone()))
}

View File

@ -325,12 +325,12 @@ impl FaceId {
///
/// This should only be called with values returned by
/// [`into_raw`](Self::into_raw).
pub fn from_raw(v: u64) -> Self {
pub const fn from_raw(v: u64) -> Self {
Self(v)
}
/// Convert into the raw underlying value.
pub fn into_raw(self) -> u64 {
pub const fn into_raw(self) -> u64 {
self.0
}
}

View File

@ -112,12 +112,12 @@ impl ImageId {
///
/// This should only be called with values returned by
/// [`into_raw`](Self::into_raw).
pub fn from_raw(v: u64) -> Self {
pub const fn from_raw(v: u64) -> Self {
Self(v)
}
/// Convert into the raw underlying value.
pub fn into_raw(self) -> u64 {
pub const fn into_raw(self) -> u64 {
self.0
}
}

View File

@ -146,9 +146,8 @@ impl Text {
pub fn encode_glyphs_be(&self) -> Vec<u8> {
let mut bytes = Vec::with_capacity(2 * self.glyphs.len());
for glyph in &self.glyphs {
let id = glyph.id;
bytes.push((id >> 8) as u8);
bytes.push((id & 0xff) as u8);
bytes.push((glyph.id >> 8) as u8);
bytes.push((glyph.id & 0xff) as u8);
}
bytes
}

View File

@ -8,7 +8,7 @@ use super::*;
///
/// _This is only available when the `layout-cache` feature is enabled._
#[cfg(feature = "layout-cache")]
#[derive(Debug, Default, Clone)]
#[derive(Default, Clone)]
pub struct LayoutCache {
/// Maps from node hashes to the resulting frames and regions in which the
/// frames are valid. The right hand side of the hash map is a vector of
@ -70,9 +70,9 @@ impl LayoutCache {
) {
let entry = FramesEntry::new(frames, level);
match self.frames.entry(hash) {
Entry::Occupied(o) => o.into_mut().push(entry),
Entry::Vacant(v) => {
v.insert(vec![entry]);
Entry::Occupied(occupied) => occupied.into_mut().push(entry),
Entry::Vacant(vacant) => {
vacant.insert(vec![entry]);
}
}
}

View File

@ -44,6 +44,7 @@ pub mod loading;
pub mod paper;
pub mod parse;
pub mod pretty;
pub mod source;
pub mod syntax;
pub mod util;
@ -57,18 +58,21 @@ use crate::image::ImageCache;
use crate::layout::Frame;
#[cfg(feature = "layout-cache")]
use crate::layout::LayoutCache;
use crate::loading::{FileId, Loader};
use crate::loading::Loader;
use crate::source::{SourceFile, SourceMap};
/// The core context which holds the loader, configuration and cached artifacts.
pub struct Context {
/// The loader the context was created with.
pub loader: Rc<dyn Loader>,
/// Stores loaded source files.
pub sources: SourceMap,
/// Caches evaluated modules.
pub modules: ModuleCache,
/// Caches parsed font faces.
pub fonts: FontCache,
/// Caches decoded images.
pub images: ImageCache,
/// Caches evaluated modules.
pub modules: ModuleCache,
/// Caches layouting artifacts.
#[cfg(feature = "layout-cache")]
pub layouts: LayoutCache,
@ -97,15 +101,12 @@ impl Context {
/// Typeset a source file into a collection of layouted frames.
///
/// The `file` identifies the source file and is used to resolve relative
/// paths (for importing and image loading).
///
/// Returns either a vector of frames representing individual pages or
/// diagnostics in the form of a vector of error message with file and span
/// information.
pub fn typeset(&mut self, file: FileId, src: &str) -> TypResult<Vec<Rc<Frame>>> {
let ast = parse::parse(file, src)?;
let module = eval::eval(self, file, Rc::new(ast))?;
pub fn typeset(&mut self, source: &SourceFile) -> TypResult<Vec<Rc<Frame>>> {
let ast = parse::parse(source)?;
let module = eval::eval(self, source.file(), Rc::new(ast))?;
let tree = exec::exec(self, &module.template);
let frames = layout::layout(self, &tree);
Ok(frames)
@ -140,6 +141,7 @@ impl ContextBuilder {
pub fn build(self, loader: Rc<dyn Loader>) -> Context {
Context {
loader: Rc::clone(&loader),
sources: SourceMap::new(),
fonts: FontCache::new(Rc::clone(&loader)),
images: ImageCache::new(loader),
modules: ModuleCache::new(),

View File

@ -1,4 +1,4 @@
use std::cell::RefCell;
use std::cell::{Ref, RefCell};
use std::collections::HashMap;
use std::fs::{self, File};
use std::io;
@ -92,6 +92,11 @@ impl FsLoader {
}
}
/// Return the path of a resolved file.
pub fn path(&self, id: FileId) -> Ref<Path> {
Ref::map(self.paths.borrow(), |paths| paths[&id].as_path())
}
#[cfg(all(unix, not(target_os = "macos")))]
fn search_system_impl(&mut self) {
self.search_path("/usr/share/fonts");

View File

@ -40,12 +40,12 @@ pub struct FileId(u64);
impl FileId {
/// Create a file id from a raw value.
pub fn from_raw(v: u64) -> Self {
pub const fn from_raw(v: u64) -> Self {
Self(v)
}
/// Convert into the raw underlying value.
pub fn into_raw(self) -> u64 {
pub const fn into_raw(self) -> u64 {
self.0
}
}

View File

@ -1,14 +1,33 @@
use std::fs;
use std::io::{self, Write};
use std::ops::Range;
use std::path::{Path, PathBuf};
use std::process;
use anyhow::{anyhow, bail, Context};
use codespan_reporting::diagnostic::{Diagnostic, Label};
use codespan_reporting::files::{self, Files};
use codespan_reporting::term::{self, termcolor, Config, Styles};
use same_file::is_same_file;
use termcolor::{ColorChoice, StandardStream, WriteColor};
fn main() -> anyhow::Result<()> {
use typst::diag::{Error, Tracepoint};
use typst::loading::{FileId, FsLoader};
use typst::source::{SourceFile, SourceMap};
fn main() {
if let Err(error) = try_main() {
print_error(error).unwrap();
process::exit(1);
}
}
/// The main compiler logic.
fn try_main() -> anyhow::Result<()> {
let args: Vec<_> = std::env::args().collect();
if args.len() < 2 || args.len() > 3 {
println!("usage: typst src.typ [out.pdf]");
return Ok(());
print_usage().unwrap();
process::exit(2);
}
// Determine source and destination path.
@ -36,12 +55,12 @@ fn main() -> anyhow::Result<()> {
// Resolve the file id of the source file and read the file.
let file = loader.resolve(src_path).context("source file not found")?;
let src = fs::read_to_string(&src_path)
.map_err(|_| anyhow!("failed to read source file"))?;
let string = fs::read_to_string(&src_path).context("failed to read source file")?;
let source = SourceFile::new(file, string);
// Typeset.
let mut ctx = typst::Context::new(loader);
match ctx.typeset(file, &src) {
let mut ctx = typst::Context::new(loader.clone());
match ctx.typeset(&source) {
// Export the PDF.
Ok(document) => {
let buffer = typst::export::pdf(&ctx, &document);
@ -50,20 +69,132 @@ fn main() -> anyhow::Result<()> {
// Print diagnostics.
Err(errors) => {
let map = typst::parse::LineMap::new(&src);
for error in errors.iter() {
let start = map.location(error.span.start).unwrap();
let end = map.location(error.span.end).unwrap();
println!(
"Error: {}:{}-{}: {}",
src_path.display(),
start,
end,
error.message,
);
}
ctx.sources.insert(source);
print_diagnostics(&loader, &ctx.sources, *errors)
.context("failed to print diagnostics")?;
}
}
Ok(())
}
/// Print a usage message.
fn print_usage() -> io::Result<()> {
let mut writer = StandardStream::stderr(ColorChoice::Always);
let styles = Styles::default();
writer.set_color(&styles.header_help)?;
write!(writer, "usage")?;
writer.set_color(&styles.header_message)?;
writeln!(writer, ": typst document.typ [output.pdf]")?;
writer.reset()
}
/// Print an error outside of a source file.
fn print_error(error: anyhow::Error) -> io::Result<()> {
let mut writer = StandardStream::stderr(ColorChoice::Always);
let styles = Styles::default();
for (i, cause) in error.chain().enumerate() {
writer.set_color(&styles.header_error)?;
write!(writer, "{}", if i == 0 { "error" } else { "cause" })?;
writer.set_color(&styles.header_message)?;
writeln!(writer, ": {}", cause)?;
}
writer.reset()
}
/// Print diagnostics messages to the terminal.
fn print_diagnostics(
loader: &FsLoader,
sources: &SourceMap,
errors: Vec<Error>,
) -> Result<(), files::Error> {
let mut writer = StandardStream::stderr(ColorChoice::Always);
let config = Config { tab_width: 2, ..Default::default() };
let files = FilesImpl(loader, sources);
for error in errors {
// The main diagnostic.
let main = Diagnostic::error()
.with_message(error.message)
.with_labels(vec![Label::primary(error.file, error.span.to_range())]);
term::emit(&mut writer, &config, &files, &main)?;
// Stacktrace-like helper diagnostics.
for (file, span, point) in error.trace {
let message = match point {
Tracepoint::Call(Some(name)) => {
format!("error occured in this call of function `{}`", name)
}
Tracepoint::Call(None) => "error occured in this function call".into(),
Tracepoint::Import => "error occured while importing this module".into(),
};
let help = Diagnostic::help()
.with_message(message)
.with_labels(vec![Label::primary(file, span.to_range())]);
term::emit(&mut writer, &config, &files, &help)?;
}
}
Ok(())
}
/// Required for error message formatting with codespan-reporting.
struct FilesImpl<'a>(&'a FsLoader, &'a SourceMap);
impl FilesImpl<'_> {
fn source(&self, id: FileId) -> Result<&SourceFile, files::Error> {
self.1.get(id).ok_or(files::Error::FileMissing)
}
}
impl<'a> Files<'a> for FilesImpl<'a> {
type FileId = FileId;
type Name = String;
type Source = &'a str;
fn name(&'a self, id: FileId) -> Result<Self::Name, files::Error> {
Ok(self.0.path(id).display().to_string())
}
fn source(&'a self, id: FileId) -> Result<Self::Source, files::Error> {
Ok(self.source(id)?.src())
}
fn line_index(
&'a self,
id: FileId,
byte_index: usize,
) -> Result<usize, files::Error> {
let source = self.source(id)?;
source.pos_to_line(byte_index.into()).ok_or_else(|| {
let (given, max) = (byte_index, source.len_bytes());
if given <= max {
files::Error::InvalidCharBoundary { given }
} else {
files::Error::IndexTooLarge { given, max }
}
})
}
fn line_range(
&'a self,
id: FileId,
line_index: usize,
) -> Result<Range<usize>, files::Error> {
let source = self.source(id)?;
let span = source.line_to_span(line_index).ok_or(files::Error::LineTooLarge {
given: line_index,
max: source.len_lines(),
})?;
Ok(span.to_range())
}
}

View File

@ -1,145 +0,0 @@
// FIXME:
// Both `LineMap::location` and `search_column` can lead to quadratic compile
// times for very long lines. We probably need some smart acceleration structure
// to determine columns.
use super::Scanner;
use crate::syntax::{Location, Pos};
/// Enables conversion of byte position to locations.
pub struct LineMap<'s> {
src: &'s str,
line_starts: Vec<Pos>,
}
impl<'s> LineMap<'s> {
/// Create a new line map for a source string.
pub fn new(src: &'s str) -> Self {
let mut line_starts = vec![Pos::ZERO];
let mut s = Scanner::new(src);
while let Some(c) = s.eat_merging_crlf() {
if is_newline(c) {
line_starts.push(s.index().into());
}
}
Self { src, line_starts }
}
/// Convert a byte position to a location.
pub fn location(&self, pos: Pos) -> Option<Location> {
// Find the line which contains the position.
let line_index = match self.line_starts.binary_search(&pos) {
Ok(i) => i,
Err(i) => i - 1,
};
let start = self.line_starts.get(line_index)?;
let head = self.src.get(start.to_usize() .. pos.to_usize())?;
// TODO: What about tabs?
let column_index = head.chars().count();
Some(Location {
line: 1 + line_index as u32,
column: 1 + column_index as u32,
})
}
/// Convert a location to a byte position.
pub fn pos(&self, location: Location) -> Option<Pos> {
// Determine the boundaries of the line.
let line_idx = location.line.checked_sub(1)? as usize;
let line_start = *self.line_starts.get(line_idx)?;
let line_end = self
.line_starts
.get(location.line as usize)
.map_or(self.src.len(), |pos| pos.to_usize());
let line = self.src.get(line_start.to_usize() .. line_end)?;
// Find the index in the line. For the first column, the index is always
// zero. For other columns, we have to look at which byte the char
// directly before the column in question ends. We can't do
// `nth(column_idx)` directly since the column may be behind the last
// char.
let column_idx = location.column.checked_sub(1)? as usize;
let line_offset = if let Some(prev_idx) = column_idx.checked_sub(1) {
// TODO: What about tabs?
let (idx, prev) = line.char_indices().nth(prev_idx)?;
idx + prev.len_utf8()
} else {
0
};
Some(line_start + line_offset)
}
}
/// Count how many column the string would fill.
pub fn count_columns(src: &str) -> usize {
let mut column = 0;
for c in src.chars().rev() {
if is_newline(c) {
break;
} else if c == '\t' {
// TODO: How many columns per tab?
column += 2;
} else {
column += 1;
}
}
column
}
/// Whether this character denotes a newline.
#[inline]
pub fn is_newline(character: char) -> bool {
matches!(
character,
// Line Feed, Vertical Tab, Form Feed, Carriage Return.
'\n' | '\x0B' | '\x0C' | '\r' |
// Next Line, Line Separator, Paragraph Separator.
'\u{0085}' | '\u{2028}' | '\u{2029}'
)
}
#[cfg(test)]
mod tests {
use super::*;
const TEST: &str = "äbcde\nf💛g\r\nhi\rjkl";
#[test]
fn test_line_map_new() {
let map = LineMap::new(TEST);
assert_eq!(map.line_starts, vec![Pos(0), Pos(7), Pos(15), Pos(18)]);
}
#[test]
fn test_line_map_location() {
let map = LineMap::new(TEST);
assert_eq!(map.location(Pos(0)), Some(Location::new(1, 1)));
assert_eq!(map.location(Pos(2)), Some(Location::new(1, 2)));
assert_eq!(map.location(Pos(6)), Some(Location::new(1, 6)));
assert_eq!(map.location(Pos(7)), Some(Location::new(2, 1)));
assert_eq!(map.location(Pos(8)), Some(Location::new(2, 2)));
assert_eq!(map.location(Pos(12)), Some(Location::new(2, 3)));
assert_eq!(map.location(Pos(21)), Some(Location::new(4, 4)));
assert_eq!(map.location(Pos(22)), None);
}
#[test]
fn test_line_map_pos() {
fn assert_round_trip(map: &LineMap, pos: Pos) {
assert_eq!(map.location(pos).and_then(|loc| map.pos(loc)), Some(pos));
}
let map = LineMap::new(TEST);
assert_round_trip(&map, Pos(0));
assert_round_trip(&map, Pos(7));
assert_round_trip(&map, Pos(12));
assert_round_trip(&map, Pos(21));
}
}

View File

@ -1,12 +1,10 @@
//! Parsing and tokenization.
mod lines;
mod parser;
mod resolve;
mod scanner;
mod tokens;
pub use lines::*;
pub use parser::*;
pub use resolve::*;
pub use scanner::*;
@ -15,13 +13,13 @@ pub use tokens::*;
use std::rc::Rc;
use crate::diag::TypResult;
use crate::loading::FileId;
use crate::source::SourceFile;
use crate::syntax::*;
use crate::util::EcoString;
/// Parse a string of source code.
pub fn parse(file: FileId, src: &str) -> TypResult<SyntaxTree> {
let mut p = Parser::new(file, src);
pub fn parse(source: &SourceFile) -> TypResult<SyntaxTree> {
let mut p = Parser::new(source);
let tree = tree(&mut p);
let errors = p.finish();
if errors.is_empty() {

View File

@ -1,15 +1,15 @@
use std::fmt::{self, Debug, Formatter};
use std::ops::Range;
use super::{count_columns, TokenMode, Tokens};
use super::{TokenMode, Tokens};
use crate::diag::Error;
use crate::loading::FileId;
use crate::source::SourceFile;
use crate::syntax::{Pos, Span, Token};
/// A convenient token-based parser.
pub struct Parser<'s> {
/// The id of the parsed file.
file: FileId,
source: &'s SourceFile,
/// Parsing errors.
errors: Vec<Error>,
/// An iterator over the source tokens.
@ -60,11 +60,11 @@ pub enum Group {
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
pub fn new(file: FileId, src: &'s str) -> Self {
let mut tokens = Tokens::new(src, TokenMode::Markup);
pub fn new(source: &'s SourceFile) -> Self {
let mut tokens = Tokens::new(source.src(), TokenMode::Markup);
let next = tokens.next();
Self {
file,
source,
errors: vec![],
tokens,
groups: vec![],
@ -82,11 +82,7 @@ impl<'s> Parser<'s> {
/// Add an error with location and message.
pub fn error(&mut self, span: impl Into<Span>, message: impl Into<String>) {
self.errors.push(Error {
file: self.file,
span: span.into(),
message: message.into(),
});
self.errors.push(Error::new(self.source.file(), span, message));
}
/// Eat the next token and add an error that it is not the expected `thing`.
@ -324,7 +320,7 @@ impl<'s> Parser<'s> {
/// Determine the column for the given index in the source.
pub fn column(&self, index: usize) -> usize {
count_columns(self.tokens.scanner().get(.. index))
self.source.pos_to_column(index.into()).unwrap()
}
/// The span from `start` to [`self.prev_end()`](Self::prev_end).

View File

@ -109,8 +109,11 @@ fn split_lines(text: &str) -> Vec<String> {
let mut line = String::new();
let mut lines = Vec::new();
while let Some(c) = s.eat_merging_crlf() {
while let Some(c) = s.eat() {
if is_newline(c) {
if c == '\r' {
s.eat_if('\n');
}
lines.push(std::mem::take(&mut line));
} else {
line.push(c);
@ -173,14 +176,10 @@ mod tests {
text: &str,
block: bool,
) {
Span::without_cmp(|| {
assert_eq!(resolve_raw(Span::ZERO, raw, backticks), RawNode {
span: Span::ZERO,
lang: lang.and_then(|id| Ident::new(id, 0)),
text: text.into(),
block,
});
});
let node = resolve_raw(Span::ZERO, raw, backticks);
assert_eq!(node.lang.as_deref(), lang);
assert_eq!(node.text, text);
assert_eq!(node.block, block);
}
// Just one backtick.

View File

@ -47,17 +47,6 @@ impl<'s> Scanner<'s> {
debug_assert_eq!(next, Some(c));
}
/// Consume the next char, coalescing `\r\n` to just `\n`.
#[inline]
pub fn eat_merging_crlf(&mut self) -> Option<char> {
if self.rest().starts_with("\r\n") {
self.index += 2;
Some('\n')
} else {
self.eat()
}
}
/// Eat chars while the condition is true.
#[inline]
pub fn eat_while<F>(&mut self, mut f: F) -> &'s str
@ -168,3 +157,15 @@ impl Debug for Scanner<'_> {
write!(f, "Scanner({}|{})", self.eaten(), self.rest())
}
}
/// Whether this character denotes a newline.
#[inline]
pub fn is_newline(character: char) -> bool {
matches!(
character,
// Line Feed, Vertical Tab, Form Feed, Carriage Return.
'\n' | '\x0B' | '\x0C' | '\r' |
// Next Line, Line Separator, Paragraph Separator.
'\u{0085}' | '\u{2028}' | '\u{2029}'
)
}

View File

@ -198,13 +198,16 @@ impl<'s> Tokens<'s> {
// Count the number of newlines.
let mut newlines = 0;
while let Some(c) = self.s.eat_merging_crlf() {
while let Some(c) = self.s.eat() {
if !c.is_whitespace() {
self.s.uneat();
break;
}
if is_newline(c) {
if c == '\r' {
self.s.eat_if('\n');
}
newlines += 1;
}
}
@ -484,8 +487,8 @@ impl Debug for Tokens<'_> {
}
}
fn keyword(id: &str) -> Option<Token<'static>> {
Some(match id {
fn keyword(ident: &str) -> Option<Token<'static>> {
Some(match ident {
"not" => Token::Not,
"and" => Token::And,
"or" => Token::Or,

View File

@ -610,6 +610,7 @@ mod tests {
use super::*;
use crate::loading::FileId;
use crate::parse::parse;
use crate::source::SourceFile;
#[track_caller]
fn roundtrip(src: &str) {
@ -618,7 +619,8 @@ mod tests {
#[track_caller]
fn test_parse(src: &str, exp: &str) {
let ast = parse(FileId::from_raw(0), src).unwrap();
let source = SourceFile::new(FileId::from_raw(0), src.into());
let ast = parse(&source).unwrap();
let found = pretty(&ast);
if exp != found {
println!("tree: {:#?}", ast);

195
src/source.rs Normal file
View File

@ -0,0 +1,195 @@
//! Source files.
use std::collections::{hash_map::Entry, HashMap};
use crate::loading::FileId;
use crate::parse::{is_newline, Scanner};
use crate::syntax::{Pos, Span};
/// A store for loaded source files.
#[derive(Default)]
pub struct SourceMap {
sources: HashMap<FileId, SourceFile>,
}
impl SourceMap {
/// Create a new, empty source map
pub fn new() -> Self {
Self::default()
}
/// Get a source file by id.
pub fn get(&self, file: FileId) -> Option<&SourceFile> {
self.sources.get(&file)
}
/// Insert a sources.
pub fn insert(&mut self, source: SourceFile) -> &SourceFile {
match self.sources.entry(source.file) {
Entry::Occupied(mut entry) => {
entry.insert(source);
entry.into_mut()
}
Entry::Vacant(entry) => entry.insert(source),
}
}
/// Remove all sources.
pub fn clear(&mut self) {
self.sources.clear();
}
}
/// A single source file.
pub struct SourceFile {
file: FileId,
src: String,
line_starts: Vec<Pos>,
}
impl SourceFile {
/// Create a new source file from string.
pub fn new(file: FileId, src: String) -> Self {
let mut line_starts = vec![Pos::ZERO];
let mut s = Scanner::new(&src);
while let Some(c) = s.eat() {
if is_newline(c) {
if c == '\r' {
s.eat_if('\n');
}
line_starts.push(s.index().into());
}
}
Self { file, src, line_starts }
}
/// The file id.
pub fn file(&self) -> FileId {
self.file
}
/// The whole source as a string slice.
pub fn src(&self) -> &str {
&self.src
}
/// Get the length of the file in bytes.
pub fn len_bytes(&self) -> usize {
self.src.len()
}
/// Get the length of the file in lines.
pub fn len_lines(&self) -> usize {
self.line_starts.len()
}
/// Slice out the part of the source code enclosed by the span.
pub fn get(&self, span: Span) -> Option<&str> {
self.src.get(span.to_range())
}
/// Return the index of the line that contains the given byte position.
pub fn pos_to_line(&self, byte_pos: Pos) -> Option<usize> {
(byte_pos.to_usize() <= self.src.len()).then(|| {
match self.line_starts.binary_search(&byte_pos) {
Ok(i) => i,
Err(i) => i - 1,
}
})
}
/// Return the column of the byte index.
///
/// Tabs are counted as occupying two columns.
pub fn pos_to_column(&self, byte_pos: Pos) -> Option<usize> {
let line = self.pos_to_line(byte_pos)?;
let start = self.line_to_pos(line)?;
let head = self.get(Span::new(start, byte_pos))?;
Some(head.chars().map(width).sum())
}
/// Return the byte position at which the given line starts.
pub fn line_to_pos(&self, line_idx: usize) -> Option<Pos> {
self.line_starts.get(line_idx).copied()
}
/// Return the span which encloses the given line.
pub fn line_to_span(&self, line_idx: usize) -> Option<Span> {
let start = self.line_to_pos(line_idx)?;
let end = self.line_to_pos(line_idx + 1).unwrap_or(self.src.len().into());
Some(Span::new(start, end))
}
/// Return the byte position of the given (line, column) pair.
///
/// Tabs are counted as occupying two columns.
pub fn line_column_to_pos(&self, line_idx: usize, column_idx: usize) -> Option<Pos> {
let span = self.line_to_span(line_idx)?;
let line = self.get(span)?;
if column_idx == 0 {
return Some(span.start);
}
let mut column = 0;
for (i, c) in line.char_indices() {
column += width(c);
if column >= column_idx {
return Some(span.start + Pos::from(i + c.len_utf8()));
}
}
None
}
}
/// The display width of the character.
fn width(c: char) -> usize {
if c == '\t' { 2 } else { 1 }
}
#[cfg(test)]
mod tests {
use super::*;
const ID: FileId = FileId::from_raw(0);
const TEST: &str = "äbcde\nf💛g\r\nhi\rjkl";
#[test]
fn test_source_file_new() {
let source = SourceFile::new(ID, TEST.into());
assert_eq!(source.line_starts, vec![Pos(0), Pos(7), Pos(15), Pos(18)]);
}
#[test]
fn test_source_file_pos_to_line() {
let source = SourceFile::new(ID, TEST.into());
assert_eq!(source.pos_to_line(Pos(0)), Some(0));
assert_eq!(source.pos_to_line(Pos(2)), Some(0));
assert_eq!(source.pos_to_line(Pos(6)), Some(0));
assert_eq!(source.pos_to_line(Pos(7)), Some(1));
assert_eq!(source.pos_to_line(Pos(8)), Some(1));
assert_eq!(source.pos_to_line(Pos(12)), Some(1));
assert_eq!(source.pos_to_line(Pos(21)), Some(3));
assert_eq!(source.pos_to_line(Pos(22)), None);
}
#[test]
fn test_source_file_roundtrip() {
#[track_caller]
fn roundtrip(source: &SourceFile, byte_pos: Pos) {
let line = source.pos_to_line(byte_pos).unwrap();
let column = source.pos_to_column(byte_pos).unwrap();
let result = source.line_column_to_pos(line, column).unwrap();
assert_eq!(result, byte_pos);
}
let source = SourceFile::new(ID, TEST.into());
roundtrip(&source, Pos(0));
roundtrip(&source, Pos(7));
roundtrip(&source, Pos(12));
roundtrip(&source, Pos(21));
}
}

View File

@ -1,13 +1,8 @@
use std::cell::Cell;
use std::fmt::{self, Debug, Display, Formatter};
use std::fmt::{self, Debug, Formatter};
use std::ops::{Add, Range};
use serde::{Deserialize, Serialize};
thread_local! {
static CMP_SPANS: Cell<bool> = Cell::new(true);
}
/// A value with the span it corresponds to in the source code.
#[derive(Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
#[derive(Serialize, Deserialize)]
@ -47,15 +42,17 @@ impl<T: Debug> Debug for Spanned<T> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
self.v.fmt(f)?;
if f.alternate() {
f.write_str(" ")?;
f.write_str(" <")?;
self.span.fmt(f)?;
f.write_str(">")?;
}
Ok(())
}
}
/// Bounds of a slice of source code.
#[derive(Copy, Clone, Ord, PartialOrd, Serialize, Deserialize)]
#[derive(Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
#[derive(Serialize, Deserialize)]
pub struct Span {
/// The inclusive start position.
pub start: Pos,
@ -90,34 +87,15 @@ impl Span {
*self = self.join(other)
}
/// Test whether one span complete contains the other span.
pub fn contains(self, other: Self) -> bool {
self.start <= other.start && self.end >= other.end
}
/// Convert to a `Range<usize>` for indexing.
pub fn to_range(self) -> Range<usize> {
self.start.to_usize() .. self.end.to_usize()
}
/// Run some code with span comparisons disabled.
pub fn without_cmp<F, T>(f: F) -> T
where
F: FnOnce() -> T,
{
let prev = Self::cmp();
Self::set_cmp(false);
let val = f();
Self::set_cmp(prev);
val
}
/// Whether spans will currently be compared.
fn cmp() -> bool {
CMP_SPANS.with(Cell::get)
}
/// Whether spans should be compared.
///
/// When set to `false` comparisons with `PartialEq` ignore spans.
fn set_cmp(cmp: bool) {
CMP_SPANS.with(|cell| cell.set(cmp));
}
}
impl<T> From<T> for Span
@ -138,28 +116,15 @@ where
}
}
impl Default for Span {
fn default() -> Self {
Span::ZERO
}
}
impl Debug for Span {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "<{:?}-{:?}>", self.start, self.end)
}
}
impl Eq for Span {}
impl PartialEq for Span {
fn eq(&self, other: &Self) -> bool {
!Self::cmp() || (self.start == other.start && self.end == other.end)
write!(f, "{:?}-{:?}", self.start, self.end)
}
}
/// A byte position in source code.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
#[derive(Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
#[derive(Serialize, Deserialize)]
pub struct Pos(pub u32);
impl Pos {
@ -178,12 +143,6 @@ impl From<u32> for Pos {
}
}
impl From<i32> for Pos {
fn from(index: i32) -> Self {
Self(index as u32)
}
}
impl From<usize> for Pos {
fn from(index: usize) -> Self {
Self(index as u32)
@ -206,31 +165,3 @@ where
Pos(self.0 + rhs.into().0)
}
}
/// A one-indexed line-column position in source code.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
pub struct Location {
/// The one-indexed line.
pub line: u32,
/// The one-indexed column.
pub column: u32,
}
impl Location {
/// Create a new location from line and column.
pub fn new(line: u32, column: u32) -> Self {
Self { line, column }
}
}
impl Display for Location {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{}:{}", self.line, self.column)
}
}
impl Debug for Location {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
Display::fmt(self, f)
}
}

View File

@ -193,6 +193,24 @@ impl From<&String> for EcoString {
}
}
impl From<EcoString> for String {
fn from(s: EcoString) -> Self {
match s.0 {
Repr::Small { .. } => s.as_str().to_owned(),
Repr::Large(rc) => match Rc::try_unwrap(rc) {
Ok(string) => string,
Err(rc) => (*rc).clone(),
},
}
}
}
impl From<&EcoString> for String {
fn from(s: &EcoString) -> Self {
s.as_str().to_owned()
}
}
impl Deref for EcoString {
type Target = str;

View File

@ -17,8 +17,9 @@ use typst::geom::{self, Length, PathElement, Point, Sides, Size};
use typst::image::ImageId;
use typst::layout::{layout, Element, Frame, Geometry, LayoutTree, Paint, Text};
use typst::loading::{FileId, FsLoader};
use typst::parse::{parse, LineMap, Scanner};
use typst::syntax::{Location, Pos};
use typst::parse::{parse, Scanner};
use typst::source::SourceFile;
use typst::syntax::Pos;
use typst::Context;
const TYP_DIR: &str = "./typ";
@ -157,12 +158,12 @@ fn test(
let mut ok = true;
let mut frames = vec![];
let mut lines = 0;
let mut line = 0;
let mut compare_ref = true;
let mut compare_ever = false;
let parts: Vec<_> = src.split("\n---").collect();
for (i, part) in parts.iter().enumerate() {
for (i, &part) in parts.iter().enumerate() {
let is_header = i == 0
&& parts.len() > 1
&& part
@ -177,13 +178,13 @@ fn test(
}
} else {
let (part_ok, compare_here, part_frames) =
test_part(ctx, file, part, i, compare_ref, lines);
test_part(ctx, file, part, i, compare_ref, line);
ok &= part_ok;
compare_ever |= compare_here;
frames.extend(part_frames);
}
lines += part.lines().count() as u32 + 1;
line += part.lines().count() + 1;
}
if compare_ever {
@ -221,15 +222,15 @@ fn test_part(
src: &str,
i: usize,
compare_ref: bool,
lines: u32,
line: usize,
) -> (bool, bool, Vec<Rc<Frame>>) {
let map = LineMap::new(src);
let (local_compare_ref, mut ref_errors) = parse_metadata(file, src, &map);
let source = SourceFile::new(file, src.into());
let (local_compare_ref, mut ref_errors) = parse_metadata(&source);
let compare_ref = local_compare_ref.unwrap_or(compare_ref);
let mut ok = true;
let result = typeset(ctx, file, src);
let result = typeset(ctx, &source);
let (frames, mut errors) = match result {
#[allow(unused_variables)]
Ok((tree, mut frames)) => {
@ -246,7 +247,11 @@ fn test_part(
};
// TODO: Also handle errors from other files.
errors.retain(|error| error.file == file);
errors.retain(|error| error.file == source.file());
for error in &mut errors {
error.trace.clear();
}
ref_errors.sort();
errors.sort();
@ -257,14 +262,14 @@ fn test_part(
for error in errors.iter() {
if error.file == file && !ref_errors.contains(error) {
print!(" Not annotated | ");
print_error(error, &map, lines);
print_error(&source, line, error);
}
}
for error in ref_errors.iter() {
if !errors.contains(error) {
print!(" Not emitted | ");
print_error(error, &map, lines);
print_error(&source, line, error);
}
}
}
@ -318,11 +323,11 @@ fn test_incremental(
ok
}
fn parse_metadata(file: FileId, src: &str, map: &LineMap) -> (Option<bool>, Vec<Error>) {
fn parse_metadata(source: &SourceFile) -> (Option<bool>, Vec<Error>) {
let mut compare_ref = None;
let mut errors = vec![];
let lines: Vec<_> = src.lines().map(str::trim).collect();
let lines: Vec<_> = source.src().lines().map(str::trim).collect();
for (i, line) in lines.iter().enumerate() {
if line.starts_with("// Ref: false") {
compare_ref = Some(false);
@ -338,7 +343,7 @@ fn parse_metadata(file: FileId, src: &str, map: &LineMap) -> (Option<bool>, Vec<
continue;
};
fn num(s: &mut Scanner) -> u32 {
fn num(s: &mut Scanner) -> usize {
s.eat_while(|c| c.is_numeric()).parse().unwrap()
}
@ -346,18 +351,18 @@ fn parse_metadata(file: FileId, src: &str, map: &LineMap) -> (Option<bool>, Vec<
lines[i ..].iter().take_while(|line| line.starts_with("//")).count();
let pos = |s: &mut Scanner| -> Pos {
let first = num(s);
let first = num(s) - 1;
let (delta, column) =
if s.eat_if(':') { (first, num(s)) } else { (1, first) };
let line = (i + comments) as u32 + delta;
map.pos(Location::new(line, column)).unwrap()
if s.eat_if(':') { (first, num(s) - 1) } else { (0, first) };
let line = (i + comments) + delta;
source.line_column_to_pos(line, column).unwrap()
};
let mut s = Scanner::new(rest);
let start = pos(&mut s);
let end = if s.eat_if('-') { pos(&mut s) } else { start };
errors.push(Error::new(file, start .. end, s.rest().trim()));
errors.push(Error::new(source.file(), start .. end, s.rest().trim()));
}
(compare_ref, errors)
@ -365,22 +370,24 @@ fn parse_metadata(file: FileId, src: &str, map: &LineMap) -> (Option<bool>, Vec<
fn typeset(
ctx: &mut Context,
file: FileId,
src: &str,
source: &SourceFile,
) -> TypResult<(LayoutTree, Vec<Rc<Frame>>)> {
let ast = parse(file, src)?;
let module = eval(ctx, file, Rc::new(ast))?;
let ast = parse(source)?;
let module = eval(ctx, source.file(), Rc::new(ast))?;
let tree = exec(ctx, &module.template);
let frames = layout(ctx, &tree);
Ok((tree, frames))
}
fn print_error(error: &Error, map: &LineMap, lines: u32) {
let mut start = map.location(error.span.start).unwrap();
let mut end = map.location(error.span.end).unwrap();
start.line += lines;
end.line += lines;
println!("Error: {}-{}: {}", start, end, error.message);
fn print_error(source: &SourceFile, line: usize, error: &Error) {
let start_line = line + source.pos_to_line(error.span.start).unwrap();
let start_col = source.pos_to_column(error.span.start).unwrap();
let end_line = line + source.pos_to_line(error.span.end).unwrap();
let end_col = source.pos_to_column(error.span.end).unwrap();
println!(
"Error: {}:{}-{}:{}: {}",
start_line, start_col, end_line, end_col, error.message
);
}
fn draw(ctx: &Context, frames: &[Rc<Frame>], dpi: f32) -> sk::Pixmap {