refactor: factor out a general Lines struct from Source

This commit is contained in:
Tobias Schmitz 2025-05-19 13:14:44 +02:00
parent 2e2f646f2a
commit e5d8f02554
No known key found for this signature in database
21 changed files with 645 additions and 525 deletions

2
Cargo.lock generated
View File

@ -3112,7 +3112,6 @@ dependencies = [
"unicode-segmentation",
"unscanny",
"usvg",
"utf8_iter",
"wasmi",
"xmlwriter",
]
@ -3201,6 +3200,7 @@ dependencies = [
name = "typst-syntax"
version = "0.13.1"
dependencies = [
"comemo",
"ecow",
"serde",
"toml",

View File

@ -135,7 +135,6 @@ unicode-segmentation = "1"
unscanny = "0.1"
ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
usvg = { version = "0.45", default-features = false, features = ["text"] }
utf8_iter = "1.0.4"
walkdir = "2"
wasmi = "0.40.0"
web-sys = "0.3"

View File

@ -16,7 +16,7 @@ use typst::diag::{
use typst::foundations::{Datetime, Smart};
use typst::html::HtmlDocument;
use typst::layout::{Frame, Page, PageRanges, PagedDocument};
use typst::syntax::{FileId, Source, Span};
use typst::syntax::{FileId, Lines, Span};
use typst::WorldExt;
use typst_pdf::{PdfOptions, PdfStandards, Timestamp};
@ -696,7 +696,7 @@ fn label(world: &SystemWorld, span: Span) -> Option<Label<FileId>> {
impl<'a> codespan_reporting::files::Files<'a> for SystemWorld {
type FileId = FileId;
type Name = String;
type Source = Source;
type Source = Lines<String>;
fn name(&'a self, id: FileId) -> CodespanResult<Self::Name> {
let vpath = id.vpath();

View File

@ -85,6 +85,6 @@ fn resolve_span(world: &SystemWorld, span: Span) -> Option<(String, u32)> {
let id = span.id()?;
let source = world.source(id).ok()?;
let range = source.range(span)?;
let line = source.byte_to_line(range.start)?;
let line = source.lines().byte_to_line(range.start)?;
Some((format!("{id:?}"), line as u32 + 1))
}

View File

@ -9,7 +9,7 @@ use ecow::{eco_format, EcoString};
use parking_lot::Mutex;
use typst::diag::{FileError, FileResult};
use typst::foundations::{Bytes, Datetime, Dict, IntoValue};
use typst::syntax::{FileId, Source, VirtualPath};
use typst::syntax::{FileId, Lines, Source, VirtualPath};
use typst::text::{Font, FontBook};
use typst::utils::LazyHash;
use typst::{Library, World};
@ -183,8 +183,18 @@ impl SystemWorld {
/// Lookup a source file by id.
#[track_caller]
pub fn lookup(&self, id: FileId) -> Source {
self.source(id).expect("file id does not point to any source file")
pub fn lookup(&self, id: FileId) -> Lines<String> {
self.slot(id, |slot| {
if let Some(source) = slot.source.get() {
let source = source.as_ref().expect("file is not valid");
source.lines()
} else if let Some(bytes) = slot.file.get() {
let bytes = bytes.as_ref().expect("file is not valid");
Lines::from_bytes(bytes.as_slice()).expect("file is not valid utf-8")
} else {
panic!("file id does not point to any source file");
}
})
}
}
@ -339,6 +349,11 @@ impl<T: Clone> SlotCell<T> {
self.accessed = false;
}
/// Gets the contents of the cell.
fn get(&self) -> Option<&FileResult<T>> {
self.data.as_ref()
}
/// Gets the contents of the cell or initialize them.
fn get_or_init(
&mut self,

View File

@ -66,7 +66,6 @@ unicode-normalization = { workspace = true }
unicode-segmentation = { workspace = true }
unscanny = { workspace = true }
usvg = { workspace = true }
utf8_iter = { workspace = true }
wasmi = { workspace = true }
xmlwriter = { workspace = true }

View File

@ -9,10 +9,10 @@ use std::string::FromUtf8Error;
use comemo::Tracked;
use ecow::{eco_vec, EcoVec};
use typst_syntax::package::{PackageSpec, PackageVersion};
use typst_syntax::{Span, Spanned, SyntaxError};
use typst_syntax::{Lines, Span, Spanned, SyntaxError};
use crate::engine::Engine;
use crate::loading::{Loaded, LineCol};
use crate::loading::{LoadSource, Loaded};
use crate::{World, WorldExt};
/// Early-return with a [`StrResult`] or [`SourceResult`].
@ -569,6 +569,144 @@ impl From<PackageError> for EcoString {
}
}
impl Loaded {
/// Report an error, possibly in an external file.
pub fn err_in_text(
&self,
pos: impl Into<ReportPos>,
msg: impl std::fmt::Display,
error: impl std::fmt::Display,
) -> EcoVec<SourceDiagnostic> {
let lines = Lines::from_bytes(&self.bytes);
match (self.source.v, lines) {
// Only report an error in an external file,
// if it is human readable (valid utf-8).
(LoadSource::Path(file_id), Ok(lines)) => {
let pos = pos.into();
if let Some(range) = pos.range(&lines) {
let span = Span::from_range(file_id, range);
return eco_vec!(error!(span, "{msg} ({error})"));
}
// Either `ReportPos::None` was provided, or resolving the range
// from the line/column failed. If present report the possibly
// wrong line/column in the error message anyway.
let span = Span::from_range(file_id, 0..self.bytes.len());
let error = if let Some(pair) = pos.line_col(&lines) {
let (line, col) = pair.numbers();
error!(span, "{msg} ({error} at {line}:{col})")
} else {
error!(span, "{msg} ({error})")
};
eco_vec![error]
}
_ => self.err_in_bytes(pos, msg, error),
}
}
/// Report an error, possibly in an external file.
pub fn err_in_bytes(
&self,
pos: impl Into<ReportPos>,
msg: impl std::fmt::Display,
error: impl std::fmt::Display,
) -> EcoVec<SourceDiagnostic> {
let pos = pos.into();
let result = Lines::from_bytes(&self.bytes).ok().and_then(|l| pos.line_col(&l));
let error = if let Some(pair) = result {
let (line, col) = pair.numbers();
error!(self.source.span, "{msg} ({error} at {line}:{col})")
} else {
error!(self.source.span, "{msg} ({error})")
};
eco_vec![error]
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub enum ReportPos {
/// Contains the range, and the 0-based line/column.
Full(std::ops::Range<usize>, LineCol),
/// Contains the range.
Range(std::ops::Range<usize>),
/// Contains the 0-based line/column.
LineCol(LineCol),
#[default]
None,
}
impl From<std::ops::Range<usize>> for ReportPos {
fn from(value: std::ops::Range<usize>) -> Self {
Self::Range(value)
}
}
impl From<LineCol> for ReportPos {
fn from(value: LineCol) -> Self {
Self::LineCol(value)
}
}
impl ReportPos {
fn range(&self, lines: &Lines<String>) -> Option<std::ops::Range<usize>> {
match self {
ReportPos::Full(range, _) => Some(range.clone()),
ReportPos::Range(range) => Some(range.clone()),
&ReportPos::LineCol(pair) => {
let i = lines.line_column_to_byte(pair.line, pair.col)?;
Some(i..i)
}
ReportPos::None => None,
}
}
fn line_col(&self, lines: &Lines<String>) -> Option<LineCol> {
match self {
&ReportPos::Full(_, pair) => Some(pair),
ReportPos::Range(range) => {
let (line, col) = lines.byte_to_line_column(range.start)?;
Some(LineCol::zero_based(line, col))
}
&ReportPos::LineCol(pair) => Some(pair),
ReportPos::None => None,
}
}
}
/// A line/column pair.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LineCol {
/// The 0-based line.
line: usize,
/// The 0-based column.
col: usize,
}
impl LineCol {
/// Constructs the line/column pair from 0-based indices.
pub fn zero_based(line: usize, col: usize) -> Self {
Self { line, col }
}
/// Constructs the line/column pair from 1-based numbers.
pub fn one_based(line: usize, col: usize) -> Self {
Self {
line: line.saturating_sub(1),
col: col.saturating_sub(1),
}
}
/// Returns the 0-based line/column indices.
pub fn indices(&self) -> (usize, usize) {
(self.line, self.col)
}
/// Returns the 1-based line/column numbers.
pub fn numbers(&self) -> (usize, usize) {
(self.line + 1, self.col + 1)
}
}
/// Format a user-facing error message for an XML-like file format.
pub fn format_xml_like_error(
format: &str,

View File

@ -1,10 +1,10 @@
use ecow::EcoVec;
use typst_syntax::Spanned;
use crate::diag::{bail, SourceDiagnostic, SourceResult};
use crate::diag::{bail, LineCol, ReportPos, SourceDiagnostic, SourceResult};
use crate::engine::Engine;
use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
use crate::loading::{Loaded, DataSource, LineCol, Load, Readable, ReportPos};
use crate::loading::{DataSource, Load, Loaded, Readable};
/// Reads structured data from a CSV file.
///
@ -176,7 +176,9 @@ fn format_csv_error(
})
.unwrap_or(LineCol::one_based(line, 1).into());
match err.kind() {
::csv::ErrorKind::Utf8 { .. } => data.err_in_text(pos, msg, "file is not valid utf-8"),
::csv::ErrorKind::Utf8 { .. } => {
data.err_in_text(pos, msg, "file is not valid utf-8")
}
::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
let err =
format!("found {len} instead of {expected_len} fields in line {line}");

View File

@ -1,10 +1,10 @@
use ecow::eco_format;
use typst_syntax::Spanned;
use crate::diag::{At, SourceResult};
use crate::diag::{At, LineCol, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value};
use crate::loading::{DataSource, LineCol, Load, Readable};
use crate::loading::{DataSource, Load, Readable};
/// Reads structured data from a JSON file.
///

View File

@ -16,9 +16,8 @@ mod xml_;
mod yaml_;
use comemo::Tracked;
use ecow::{eco_vec, EcoString, EcoVec};
use ecow::EcoString;
use typst_syntax::{FileId, Span, Spanned};
use utf8_iter::ErrorReportingUtf8Chars;
pub use self::cbor_::*;
pub use self::csv_::*;
@ -28,7 +27,7 @@ pub use self::toml_::*;
pub use self::xml_::*;
pub use self::yaml_::*;
use crate::diag::{error, At, FileError, SourceDiagnostic, SourceResult};
use crate::diag::{At, FileError, SourceResult};
use crate::foundations::OneOrMultiple;
use crate::foundations::{cast, Bytes, Scope, Str};
use crate::World;
@ -129,6 +128,7 @@ pub struct Loaded {
}
impl Loaded {
/// FIXME: remove this?
pub fn dummy() -> Self {
Loaded::new(
typst_syntax::Spanned::new(LoadSource::Bytes, Span::detached()),
@ -142,50 +142,16 @@ impl Loaded {
pub fn as_str(&self) -> SourceResult<&str> {
self.bytes.as_str().map_err(|err| {
// TODO: should the error even be reported in the file if it's possibly binary?
let start = err.valid_up_to();
let end = start + err.error_len().unwrap_or(0);
self.err_in_text(start..end, "failed to convert to string", FileError::from(err))
// always report this error in the source file.
self.err_in_bytes(
start..end,
"failed to convert to string",
FileError::from(err),
)
})
}
/// Report an error, possibly in an external file.
pub fn err_in_text(
&self,
pos: impl Into<ReportPos>,
msg: impl std::fmt::Display,
error: impl std::fmt::Display,
) -> EcoVec<SourceDiagnostic> {
let pos = pos.into();
let error = match self.source.v {
LoadSource::Path(file_id) => {
if let Some(range) = pos.range(self.bytes.as_slice()) {
let span = Span::from_range(file_id, range);
return eco_vec!(error!(span, "{msg} ({error})"));
}
// Either there was no range provided, or resolving the range
// from the line/column failed. If present report the possibly
// wrong line/column anyway.
let span = Span::from_range(file_id, 0..self.bytes.len());
if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
let (line, col) = pair.numbers();
error!(span, "{msg} ({error} at {line}:{col})")
} else {
error!(span, "{msg} ({error})")
}
}
LoadSource::Bytes => {
if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
let (line, col) = pair.numbers();
error!(self.source.span, "{msg} ({error} at {line}:{col})")
} else {
error!(self.source.span, "{msg} ({error})")
}
}
};
eco_vec![error]
}
}
/// A loaded [`DataSource`].
@ -195,142 +161,6 @@ pub enum LoadSource {
Bytes,
}
#[derive(Debug, Default)]
pub enum ReportPos {
/// Contains the range, and the 0-based line/column.
Full(std::ops::Range<usize>, LineCol),
/// Contains the range.
Range(std::ops::Range<usize>),
/// Contains the 0-based line/column.
LineCol(LineCol),
#[default]
None,
}
impl From<std::ops::Range<usize>> for ReportPos {
fn from(value: std::ops::Range<usize>) -> Self {
Self::Range(value)
}
}
impl From<LineCol> for ReportPos {
fn from(value: LineCol) -> Self {
Self::LineCol(value)
}
}
impl ReportPos {
fn range(&self, bytes: &[u8]) -> Option<std::ops::Range<usize>> {
match self {
ReportPos::Full(range, _) => Some(range.clone()),
ReportPos::Range(range) => Some(range.clone()),
&ReportPos::LineCol(pair) => pair.byte_pos(bytes).map(|i| i..i),
ReportPos::None => None,
}
}
fn line_col(&self, bytes: &[u8]) -> Option<LineCol> {
match self {
&ReportPos::Full(_, pair) => Some(pair),
ReportPos::Range(range) => LineCol::from_byte_pos(range.start, bytes),
&ReportPos::LineCol(pair) => Some(pair),
ReportPos::None => None,
}
}
}
#[derive(Clone, Copy, Debug)]
pub struct LineCol {
/// The 0-based line.
line: usize,
/// The 0-based column.
col: usize,
}
impl LineCol {
/// Constructs the line/column pair from 0-based indices.
pub fn zero_based(line: usize, col: usize) -> Self {
Self { line, col }
}
/// Constructs the line/column pair from 1-based numbers.
pub fn one_based(line: usize, col: usize) -> Self {
Self {
line: line.saturating_sub(1),
col: col.saturating_sub(1),
}
}
pub fn from_byte_pos(pos: usize, bytes: &[u8]) -> Option<Self> {
let bytes = &bytes[..pos];
let mut line = 0;
let line_start = memchr::memchr_iter(b'\n', bytes)
.inspect(|_| line += 1)
.last()
.map(|i| i + 1)
.unwrap_or(bytes.len());
// Try to compute a column even if the string isn't valid utf-8.
let col = ErrorReportingUtf8Chars::new(&bytes[line_start..]).count();
Some(LineCol::zero_based(line, col))
}
pub fn byte_pos(&self, bytes: &[u8]) -> Option<usize> {
let line_offset = if let Some(idx) = self.line.checked_sub(1) {
memchr::memchr_iter(b'\n', bytes).nth(idx).map(|i| i + 1)?
} else {
0
};
let col_offset = col_offset(line_offset, self.col, bytes)?;
let pos = line_offset + col_offset;
Some(pos)
}
pub fn byte_range(
range: std::ops::Range<Self>,
bytes: &[u8],
) -> Option<std::ops::Range<usize>> {
let mut line_iter = memchr::memchr_iter(b'\n', bytes);
let start_line_offset = if let Some(idx) = range.start.line.checked_sub(1) {
line_iter.nth(idx).map(|i| i + 1)?
} else {
0
};
let line_delta = range.end.line - range.start.line;
let end_line_offset = if let Some(idx) = line_delta.checked_sub(1) {
line_iter.nth(idx).map(|i| i + 1)?
} else {
start_line_offset
};
let start_col_offset = col_offset(start_line_offset, range.start.col, bytes)?;
let end_col_offset = col_offset(end_line_offset, range.end.col, bytes)?;
let start = start_line_offset + start_col_offset;
let end = end_line_offset + end_col_offset;
Some(start..end)
}
pub fn numbers(&self) -> (usize, usize) {
(self.line + 1, self.col + 1)
}
}
fn col_offset(line_offset: usize, col: usize, bytes: &[u8]) -> Option<usize> {
let line = &bytes[line_offset..];
// TODO: streaming-utf8 decoding ignore invalid characters
// might neeed to update error reporting too (use utf8_iter)
if let Some(idx) = col.checked_sub(1) {
// Try to compute position even if the string isn't valid utf-8.
let mut iter = ErrorReportingUtf8Chars::new(line);
_ = iter.nth(idx)?;
Some(line.len() - iter.as_slice().len())
} else {
Some(0)
}
}
/// A value that can be read from a file.
#[derive(Debug, Clone, PartialEq, Hash)]
pub enum Readable {

View File

@ -1,10 +1,10 @@
use ecow::{eco_format, EcoVec};
use typst_syntax::Spanned;
use crate::diag::{At, SourceDiagnostic, SourceResult};
use crate::diag::{At, ReportPos, SourceDiagnostic, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value};
use crate::loading::{Loaded, DataSource, Load, Readable, ReportPos};
use crate::loading::{DataSource, Load, Loaded, Readable};
/// Reads structured data from a TOML file.
///
@ -69,7 +69,10 @@ impl toml {
}
/// Format the user-facing TOML error message.
fn format_toml_error(data: &Loaded, error: ::toml::de::Error) -> EcoVec<SourceDiagnostic> {
fn format_toml_error(
data: &Loaded,
error: ::toml::de::Error,
) -> EcoVec<SourceDiagnostic> {
let pos = error.span().map(ReportPos::Range).unwrap_or_default();
data.err_in_text(pos, "failed to parse TOML", error.message())
}

View File

@ -5,7 +5,7 @@ use typst_syntax::Spanned;
use crate::diag::{format_xml_like_error, SourceDiagnostic, SourceResult};
use crate::engine::Engine;
use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
use crate::loading::{Loaded, DataSource, Load, Readable};
use crate::loading::{DataSource, Load, Loaded, Readable};
/// Reads structured data from an XML file.
///

View File

@ -1,10 +1,10 @@
use ecow::{eco_format, EcoVec};
use typst_syntax::Spanned;
use crate::diag::{At, SourceDiagnostic, SourceResult};
use crate::diag::{At, LineCol, ReportPos, SourceDiagnostic, SourceResult};
use crate::engine::Engine;
use crate::foundations::{func, scope, Str, Value};
use crate::loading::{Loaded, DataSource, LineCol, Load, Readable, ReportPos};
use crate::loading::{DataSource, Load, Loaded, Readable};
/// Reads structured data from a YAML file.
///

View File

@ -20,7 +20,8 @@ use typst_syntax::{Span, Spanned};
use typst_utils::{Get, ManuallyHash, NonZeroExt, PicoStr};
use crate::diag::{
bail, error, At, HintedStrResult, SourceDiagnostic, SourceResult, StrResult,
bail, error, At, HintedStrResult, ReportPos, SourceDiagnostic, SourceResult,
StrResult,
};
use crate::engine::{Engine, Sink};
use crate::foundations::{
@ -33,7 +34,7 @@ use crate::layout::{
BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem,
Sides, Sizing, TrackSizings,
};
use crate::loading::{format_yaml_error, Loaded, DataSource, Load, LoadSource, ReportPos};
use crate::loading::{format_yaml_error, DataSource, Load, LoadSource, Loaded};
use crate::model::{
CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem,
Url,
@ -480,7 +481,9 @@ impl CslStyle {
typst_utils::hash128(&(TypeId::of::<Bytes>(), data)),
)))
})
.map_err(|err| data.err_in_text(ReportPos::None, "failed to load CSL style", err))
.map_err(|err| {
data.err_in_text(ReportPos::None, "failed to load CSL style", err)
})
}
/// Get the underlying independent style.

View File

@ -11,7 +11,7 @@ use typst_utils::ManuallyHash;
use unicode_segmentation::UnicodeSegmentation;
use super::Lang;
use crate::diag::{SourceDiagnostic, SourceResult};
use crate::diag::{LineCol, ReportPos, SourceDiagnostic, SourceResult};
use crate::engine::Engine;
use crate::foundations::{
cast, elem, scope, Content, Derived, NativeElement, OneOrMultiple, Packed, PlainText,
@ -19,7 +19,7 @@ use crate::foundations::{
};
use crate::html::{tag, HtmlElem};
use crate::layout::{BlockBody, BlockElem, Em, HAlignment};
use crate::loading::{DataSource, LineCol, Load, Loaded, ReportPos};
use crate::loading::{DataSource, Load, Loaded};
use crate::model::{Figurable, ParElem};
use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize};
use crate::visualize::Color;

View File

@ -15,6 +15,7 @@ readme = { workspace = true }
[dependencies]
typst-timing = { workspace = true }
typst-utils = { workspace = true }
comemo = { workspace = true }
ecow = { workspace = true }
serde = { workspace = true }
toml = { workspace = true }

View File

@ -7,6 +7,7 @@ mod file;
mod highlight;
mod kind;
mod lexer;
mod lines;
mod node;
mod parser;
mod path;
@ -22,6 +23,7 @@ pub use self::lexer::{
is_id_continue, is_id_start, is_ident, is_newline, is_valid_label_literal_id,
link_prefix, split_newlines,
};
pub use self::lines::Lines;
pub use self::node::{LinkedChildren, LinkedNode, Side, SyntaxError, SyntaxNode};
pub use self::parser::{parse, parse_code, parse_math};
pub use self::path::VirtualPath;

View File

@ -0,0 +1,407 @@
use std::hash::{Hash, Hasher};
use std::iter::zip;
use std::ops::Range;
use std::str::Utf8Error;
use std::sync::Arc;
use crate::is_newline;
/// Metadata about lines.
#[derive(Clone)]
pub struct Lines<S>(Arc<Repr<S>>);
#[derive(Clone)]
struct Repr<S> {
lines: Vec<Line>,
str: S,
}
/// Metadata about a line.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Line {
/// The UTF-8 byte offset where the line starts.
byte_idx: usize,
/// The UTF-16 codepoint offset where the line starts.
utf16_idx: usize,
}
impl<S: AsRef<str>> Lines<S> {
/// TODO: memoize this?
pub fn new(str: S) -> Self {
let lines = lines(str.as_ref());
Lines(Arc::new(Repr { lines, str }))
}
pub fn text(&self) -> &str {
self.0.str.as_ref()
}
/// Get the length of the file in UTF-8 encoded bytes.
pub fn len_bytes(&self) -> usize {
self.0.str.as_ref().len()
}
/// Get the length of the file in UTF-16 code units.
pub fn len_utf16(&self) -> usize {
let last = self.0.lines.last().unwrap();
last.utf16_idx + len_utf16(&self.text()[last.byte_idx..])
}
/// Get the length of the file in lines.
pub fn len_lines(&self) -> usize {
self.0.lines.len()
}
/// Return the index of the UTF-16 code unit at the byte index.
pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
let line_idx = self.byte_to_line(byte_idx)?;
let line = self.0.lines.get(line_idx)?;
let head = self.text().get(line.byte_idx..byte_idx)?;
Some(line.utf16_idx + len_utf16(head))
}
/// Return the index of the line that contains the given byte index.
pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
(byte_idx <= self.text().len()).then(|| {
match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
Ok(i) => i,
Err(i) => i - 1,
}
})
}
/// Return the index of the column at the byte index.
///
/// The column is defined as the number of characters in the line before the
/// byte index.
pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
let line = self.byte_to_line(byte_idx)?;
let start = self.line_to_byte(line)?;
let head = self.text().get(start..byte_idx)?;
Some(head.chars().count())
}
/// Return the index of the line and column at the byte index.
pub fn byte_to_line_column(&self, byte_idx: usize) -> Option<(usize, usize)> {
let line = self.byte_to_line(byte_idx)?;
let start = self.line_to_byte(line)?;
let head = self.text().get(start..byte_idx)?;
let col = head.chars().count();
Some((line, col))
}
/// Return the byte index at the UTF-16 code unit.
pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
let line = self.0.lines.get(
match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
Ok(i) => i,
Err(i) => i - 1,
},
)?;
let text = self.text();
let mut k = line.utf16_idx;
for (i, c) in text[line.byte_idx..].char_indices() {
if k >= utf16_idx {
return Some(line.byte_idx + i);
}
k += c.len_utf16();
}
(k == utf16_idx).then_some(text.len())
}
/// Return the byte position at which the given line starts.
pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
self.0.lines.get(line_idx).map(|line| line.byte_idx)
}
/// Return the range which encloses the given line.
pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
let start = self.line_to_byte(line_idx)?;
let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text().len());
Some(start..end)
}
/// Return the byte index of the given (line, column) pair.
///
/// The column defines the number of characters to go beyond the start of
/// the line.
pub fn line_column_to_byte(
&self,
line_idx: usize,
column_idx: usize,
) -> Option<usize> {
let range = self.line_to_range(line_idx)?;
let line = self.text().get(range.clone())?;
let mut chars = line.chars();
for _ in 0..column_idx {
chars.next();
}
Some(range.start + (line.len() - chars.as_str().len()))
}
}
impl Lines<String> {
/// Tries to convert the bytes
#[comemo::memoize]
pub fn from_bytes(bytes: &[u8]) -> Result<Lines<String>, Utf8Error> {
let str = std::str::from_utf8(bytes)?;
Ok(Lines::new(str.to_string()))
}
/// Fully replace the source text.
///
/// This performs a naive (suffix/prefix-based) diff of the old and new text
/// to produce the smallest single edit that transforms old into new and
/// then calls [`edit`](Self::edit) with it.
///
/// Returns whether any changes were made.
pub fn replace(&mut self, new: &str) -> bool {
let Some((prefix, suffix)) = self.replacement_range(new) else {
return false;
};
let old = self.text();
let replace = prefix..old.len() - suffix;
let with = &new[prefix..new.len() - suffix];
self.edit(replace, with);
true
}
/// Returns the common prefix and suffix lengths.
/// Returns [`None`] if the old and new strings are equal.
pub fn replacement_range(&self, new: &str) -> Option<(usize, usize)> {
let old = self.text();
let mut prefix =
zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count();
if prefix == old.len() && prefix == new.len() {
return None;
}
while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
prefix -= 1;
}
let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev())
.take_while(|(x, y)| x == y)
.count();
while !old.is_char_boundary(old.len() - suffix)
|| !new.is_char_boundary(new.len() - suffix)
{
suffix += 1;
}
Some((prefix, suffix))
}
/// Edit the source file by replacing the given range.
///
/// Returns the range in the new source that was ultimately reparsed.
///
/// The method panics if the `replace` range is out of bounds.
#[track_caller]
pub fn edit(&mut self, replace: Range<usize>, with: &str) {
let start_byte = replace.start;
let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
let line = self.byte_to_line(start_byte).unwrap();
let inner = Arc::make_mut(&mut self.0);
// Update the text itself.
inner.str.replace_range(replace.clone(), with);
// Remove invalidated line starts.
inner.lines.truncate(line + 1);
// Handle adjoining of \r and \n.
if inner.str[..start_byte].ends_with('\r') && with.starts_with('\n') {
inner.lines.pop();
}
// Recalculate the line starts after the edit.
inner
.lines
.extend(lines_from(start_byte, start_utf16, &inner.str[start_byte..]));
}
}
/// Create a line vector.
fn lines(text: &str) -> Vec<Line> {
std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
.chain(lines_from(0, 0, text))
.collect()
}
/// Compute a line iterator from an offset.
fn lines_from(
byte_offset: usize,
utf16_offset: usize,
text: &str,
) -> impl Iterator<Item = Line> + '_ {
let mut s = unscanny::Scanner::new(text);
let mut utf16_idx = utf16_offset;
std::iter::from_fn(move || {
s.eat_until(|c: char| {
utf16_idx += c.len_utf16();
is_newline(c)
});
if s.done() {
return None;
}
if s.eat() == Some('\r') && s.eat_if('\n') {
utf16_idx += 1;
}
Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
})
}
/// The number of code units this string would use if it was encoded in
/// UTF16. This runs in linear time.
fn len_utf16(string: &str) -> usize {
string.chars().map(char::len_utf16).sum()
}
#[cfg(test)]
mod tests {
use super::*;
const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
#[test]
fn test_source_file_new() {
let lines = Lines::new(TEST);
assert_eq!(
lines.0.lines,
[
Line { byte_idx: 0, utf16_idx: 0 },
Line { byte_idx: 7, utf16_idx: 6 },
Line { byte_idx: 15, utf16_idx: 12 },
Line { byte_idx: 18, utf16_idx: 15 },
]
);
}
#[test]
fn test_source_file_pos_to_line() {
let lines = Lines::new(TEST);
assert_eq!(lines.byte_to_line(0), Some(0));
assert_eq!(lines.byte_to_line(2), Some(0));
assert_eq!(lines.byte_to_line(6), Some(0));
assert_eq!(lines.byte_to_line(7), Some(1));
assert_eq!(lines.byte_to_line(8), Some(1));
assert_eq!(lines.byte_to_line(12), Some(1));
assert_eq!(lines.byte_to_line(21), Some(3));
assert_eq!(lines.byte_to_line(22), None);
}
#[test]
fn test_source_file_pos_to_column() {
let lines = Lines::new(TEST);
assert_eq!(lines.byte_to_column(0), Some(0));
assert_eq!(lines.byte_to_column(2), Some(1));
assert_eq!(lines.byte_to_column(6), Some(5));
assert_eq!(lines.byte_to_column(7), Some(0));
assert_eq!(lines.byte_to_column(8), Some(1));
assert_eq!(lines.byte_to_column(12), Some(2));
}
#[test]
fn test_source_file_utf16() {
#[track_caller]
fn roundtrip(lines: &Lines<&str>, byte_idx: usize, utf16_idx: usize) {
let middle = lines.byte_to_utf16(byte_idx).unwrap();
let result = lines.utf16_to_byte(middle).unwrap();
assert_eq!(middle, utf16_idx);
assert_eq!(result, byte_idx);
}
let lines = Lines::new(TEST);
roundtrip(&lines, 0, 0);
roundtrip(&lines, 2, 1);
roundtrip(&lines, 3, 2);
roundtrip(&lines, 8, 7);
roundtrip(&lines, 12, 9);
roundtrip(&lines, 21, 18);
assert_eq!(lines.byte_to_utf16(22), None);
assert_eq!(lines.utf16_to_byte(19), None);
}
#[test]
fn test_source_file_roundtrip() {
#[track_caller]
fn roundtrip(lines: &Lines<&str>, byte_idx: usize) {
let line = lines.byte_to_line(byte_idx).unwrap();
let column = lines.byte_to_column(byte_idx).unwrap();
let result = lines.line_column_to_byte(line, column).unwrap();
assert_eq!(result, byte_idx);
}
let lines = Lines::new(TEST);
roundtrip(&lines, 0);
roundtrip(&lines, 7);
roundtrip(&lines, 12);
roundtrip(&lines, 21);
}
#[test]
fn test_source_file_edit() {
// This tests only the non-parser parts. The reparsing itself is
// tested separately.
#[track_caller]
fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
let reference = Lines::new(after);
let mut edited = Lines::new(prev.to_string());
edited.edit(range.clone(), with);
assert_eq!(edited.text(), reference.text());
assert_eq!(edited.0.lines, reference.0.lines);
let mut replaced = Lines::new(prev.to_string());
replaced.replace(&{
let mut s = prev.to_string();
s.replace_range(range, with);
s
});
assert_eq!(replaced.text(), reference.text());
assert_eq!(replaced.0.lines, reference.0.lines);
}
// Test inserting at the beginning.
test("abc\n", 0..0, "hi\n", "hi\nabc\n");
test("\nabc", 0..0, "hi\r", "hi\r\nabc");
// Test editing in the middle.
test(TEST, 4..16, "", "ä\tc❌i\rjkl");
// Test appending.
test("abc\ndef", 7..7, "hi", "abc\ndefhi");
test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
// Test appending with adjoining \r and \n.
test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
// Test removing everything.
test(TEST, 0..21, "", "");
}
}
impl<S: Hash> Hash for Lines<S> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.str.hash(state);
}
}
impl<S: AsRef<str>> AsRef<str> for Lines<S> {
fn as_ref(&self) -> &str {
self.0.str.as_ref()
}
}

View File

@ -2,14 +2,14 @@
use std::fmt::{self, Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::iter::zip;
use std::ops::Range;
use std::sync::Arc;
use typst_utils::LazyHash;
use crate::lines::Lines;
use crate::reparser::reparse;
use crate::{is_newline, parse, FileId, LinkedNode, Span, SyntaxNode, VirtualPath};
use crate::{parse, FileId, LinkedNode, Span, SyntaxNode, VirtualPath};
/// A source file.
///
@ -24,9 +24,8 @@ pub struct Source(Arc<Repr>);
#[derive(Clone)]
struct Repr {
id: FileId,
text: LazyHash<String>,
root: LazyHash<SyntaxNode>,
lines: Vec<Line>,
lines: LazyHash<Lines<String>>,
}
impl Source {
@ -37,8 +36,7 @@ impl Source {
root.numberize(id, Span::FULL).unwrap();
Self(Arc::new(Repr {
id,
lines: lines(&text),
text: LazyHash::new(text),
lines: LazyHash::new(Lines::new(text)),
root: LazyHash::new(root),
}))
}
@ -58,9 +56,14 @@ impl Source {
self.0.id
}
/// The whole source as a string slice.
pub fn lines(&self) -> Lines<String> {
Lines::clone(&self.0.lines)
}
/// The whole source as a string slice.
pub fn text(&self) -> &str {
&self.0.text
&self.0.lines.text()
}
/// Slice out the part of the source code enclosed by the range.
@ -77,29 +80,12 @@ impl Source {
/// Returns the range in the new source that was ultimately reparsed.
pub fn replace(&mut self, new: &str) -> Range<usize> {
let _scope = typst_timing::TimingScope::new("replace source");
let old = self.text();
let mut prefix =
zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count();
if prefix == old.len() && prefix == new.len() {
let Some((prefix, suffix)) = self.0.lines.replacement_range(new) else {
return 0..0;
}
while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
prefix -= 1;
}
let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev())
.take_while(|(x, y)| x == y)
.count();
while !old.is_char_boundary(old.len() - suffix)
|| !new.is_char_boundary(new.len() - suffix)
{
suffix += 1;
}
};
let old = self.text();
let replace = prefix..old.len() - suffix;
let with = &new[prefix..new.len() - suffix];
self.edit(replace, with)
@ -112,48 +98,28 @@ impl Source {
/// The method panics if the `replace` range is out of bounds.
#[track_caller]
pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> {
let start_byte = replace.start;
let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
let line = self.byte_to_line(start_byte).unwrap();
let inner = Arc::make_mut(&mut self.0);
// Update the text itself.
inner.text.replace_range(replace.clone(), with);
// Remove invalidated line starts.
inner.lines.truncate(line + 1);
// Handle adjoining of \r and \n.
if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') {
inner.lines.pop();
}
// Recalculate the line starts after the edit.
inner.lines.extend(lines_from(
start_byte,
start_utf16,
&inner.text[start_byte..],
));
// Update the text and lines.
inner.lines.edit(replace.clone(), with);
// Incrementally reparse the replaced range.
reparse(&mut inner.root, &inner.text, replace, with.len())
reparse(&mut inner.root, inner.lines.text(), replace, with.len())
}
/// Get the length of the file in UTF-8 encoded bytes.
pub fn len_bytes(&self) -> usize {
self.text().len()
self.0.lines.len_bytes()
}
/// Get the length of the file in UTF-16 code units.
pub fn len_utf16(&self) -> usize {
let last = self.0.lines.last().unwrap();
last.utf16_idx + len_utf16(&self.0.text[last.byte_idx..])
self.0.lines.len_utf16()
}
/// Get the length of the file in lines.
pub fn len_lines(&self) -> usize {
self.0.lines.len()
self.0.lines.len_lines()
}
/// Find the node with the given span.
@ -171,85 +137,6 @@ impl Source {
pub fn range(&self, span: Span) -> Option<Range<usize>> {
Some(self.find(span)?.range())
}
/// Return the index of the UTF-16 code unit at the byte index.
pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
let line_idx = self.byte_to_line(byte_idx)?;
let line = self.0.lines.get(line_idx)?;
let head = self.0.text.get(line.byte_idx..byte_idx)?;
Some(line.utf16_idx + len_utf16(head))
}
/// Return the index of the line that contains the given byte index.
pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
(byte_idx <= self.0.text.len()).then(|| {
match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
Ok(i) => i,
Err(i) => i - 1,
}
})
}
/// Return the index of the column at the byte index.
///
/// The column is defined as the number of characters in the line before the
/// byte index.
pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
let line = self.byte_to_line(byte_idx)?;
let start = self.line_to_byte(line)?;
let head = self.get(start..byte_idx)?;
Some(head.chars().count())
}
/// Return the byte index at the UTF-16 code unit.
pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
let line = self.0.lines.get(
match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
Ok(i) => i,
Err(i) => i - 1,
},
)?;
let mut k = line.utf16_idx;
for (i, c) in self.0.text[line.byte_idx..].char_indices() {
if k >= utf16_idx {
return Some(line.byte_idx + i);
}
k += c.len_utf16();
}
(k == utf16_idx).then_some(self.0.text.len())
}
/// Return the byte position at which the given line starts.
pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
self.0.lines.get(line_idx).map(|line| line.byte_idx)
}
/// Return the range which encloses the given line.
pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
let start = self.line_to_byte(line_idx)?;
let end = self.line_to_byte(line_idx + 1).unwrap_or(self.0.text.len());
Some(start..end)
}
/// Return the byte index of the given (line, column) pair.
///
/// The column defines the number of characters to go beyond the start of
/// the line.
pub fn line_column_to_byte(
&self,
line_idx: usize,
column_idx: usize,
) -> Option<usize> {
let range = self.line_to_range(line_idx)?;
let line = self.get(range.clone())?;
let mut chars = line.chars();
for _ in 0..column_idx {
chars.next();
}
Some(range.start + (line.len() - chars.as_str().len()))
}
}
impl Debug for Source {
@ -261,7 +148,7 @@ impl Debug for Source {
impl Hash for Source {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.id.hash(state);
self.0.text.hash(state);
self.0.lines.hash(state);
self.0.root.hash(state);
}
}
@ -271,176 +158,3 @@ impl AsRef<str> for Source {
self.text()
}
}
/// Metadata about a line.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
struct Line {
/// The UTF-8 byte offset where the line starts.
byte_idx: usize,
/// The UTF-16 codepoint offset where the line starts.
utf16_idx: usize,
}
/// Create a line vector.
fn lines(text: &str) -> Vec<Line> {
std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
.chain(lines_from(0, 0, text))
.collect()
}
/// Compute a line iterator from an offset.
fn lines_from(
byte_offset: usize,
utf16_offset: usize,
text: &str,
) -> impl Iterator<Item = Line> + '_ {
let mut s = unscanny::Scanner::new(text);
let mut utf16_idx = utf16_offset;
std::iter::from_fn(move || {
s.eat_until(|c: char| {
utf16_idx += c.len_utf16();
is_newline(c)
});
if s.done() {
return None;
}
if s.eat() == Some('\r') && s.eat_if('\n') {
utf16_idx += 1;
}
Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
})
}
/// The number of code units this string would use if it was encoded in
/// UTF16. This runs in linear time.
fn len_utf16(string: &str) -> usize {
string.chars().map(char::len_utf16).sum()
}
#[cfg(test)]
mod tests {
use super::*;
const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
#[test]
fn test_source_file_new() {
let source = Source::detached(TEST);
assert_eq!(
source.0.lines,
[
Line { byte_idx: 0, utf16_idx: 0 },
Line { byte_idx: 7, utf16_idx: 6 },
Line { byte_idx: 15, utf16_idx: 12 },
Line { byte_idx: 18, utf16_idx: 15 },
]
);
}
#[test]
fn test_source_file_pos_to_line() {
let source = Source::detached(TEST);
assert_eq!(source.byte_to_line(0), Some(0));
assert_eq!(source.byte_to_line(2), Some(0));
assert_eq!(source.byte_to_line(6), Some(0));
assert_eq!(source.byte_to_line(7), Some(1));
assert_eq!(source.byte_to_line(8), Some(1));
assert_eq!(source.byte_to_line(12), Some(1));
assert_eq!(source.byte_to_line(21), Some(3));
assert_eq!(source.byte_to_line(22), None);
}
#[test]
fn test_source_file_pos_to_column() {
let source = Source::detached(TEST);
assert_eq!(source.byte_to_column(0), Some(0));
assert_eq!(source.byte_to_column(2), Some(1));
assert_eq!(source.byte_to_column(6), Some(5));
assert_eq!(source.byte_to_column(7), Some(0));
assert_eq!(source.byte_to_column(8), Some(1));
assert_eq!(source.byte_to_column(12), Some(2));
}
#[test]
fn test_source_file_utf16() {
#[track_caller]
fn roundtrip(source: &Source, byte_idx: usize, utf16_idx: usize) {
let middle = source.byte_to_utf16(byte_idx).unwrap();
let result = source.utf16_to_byte(middle).unwrap();
assert_eq!(middle, utf16_idx);
assert_eq!(result, byte_idx);
}
let source = Source::detached(TEST);
roundtrip(&source, 0, 0);
roundtrip(&source, 2, 1);
roundtrip(&source, 3, 2);
roundtrip(&source, 8, 7);
roundtrip(&source, 12, 9);
roundtrip(&source, 21, 18);
assert_eq!(source.byte_to_utf16(22), None);
assert_eq!(source.utf16_to_byte(19), None);
}
#[test]
fn test_source_file_roundtrip() {
#[track_caller]
fn roundtrip(source: &Source, byte_idx: usize) {
let line = source.byte_to_line(byte_idx).unwrap();
let column = source.byte_to_column(byte_idx).unwrap();
let result = source.line_column_to_byte(line, column).unwrap();
assert_eq!(result, byte_idx);
}
let source = Source::detached(TEST);
roundtrip(&source, 0);
roundtrip(&source, 7);
roundtrip(&source, 12);
roundtrip(&source, 21);
}
#[test]
fn test_source_file_edit() {
// This tests only the non-parser parts. The reparsing itself is
// tested separately.
#[track_caller]
fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
let reference = Source::detached(after);
let mut edited = Source::detached(prev);
edited.edit(range.clone(), with);
assert_eq!(edited.text(), reference.text());
assert_eq!(edited.0.lines, reference.0.lines);
let mut replaced = Source::detached(prev);
replaced.replace(&{
let mut s = prev.to_string();
s.replace_range(range, with);
s
});
assert_eq!(replaced.text(), reference.text());
assert_eq!(replaced.0.lines, reference.0.lines);
}
// Test inserting at the beginning.
test("abc\n", 0..0, "hi\n", "hi\nabc\n");
test("\nabc", 0..0, "hi\r", "hi\r\nabc");
// Test editing in the middle.
test(TEST, 4..16, "", "ä\tc❌i\rjkl");
// Test appending.
test("abc\ndef", 7..7, "hi", "abc\ndefhi");
test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
// Test appending with adjoining \r and \n.
test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
// Test removing everything.
test(TEST, 0..21, "", "");
}
}

View File

@ -6,9 +6,11 @@ use std::str::FromStr;
use std::sync::LazyLock;
use ecow::{eco_format, EcoString};
use typst::loading::LineCol;
use typst::diag::LineCol;
use typst_syntax::package::PackageVersion;
use typst_syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
use typst_syntax::{
is_id_continue, is_ident, is_newline, FileId, Lines, Source, VirtualPath,
};
use unscanny::Scanner;
use crate::world::{read, system_path};
@ -426,11 +428,17 @@ impl<'a> Parser<'a> {
}
let start = self.parse_line_col()?;
let lines = Lines::from_bytes(text.as_ref()).expect("Errors shouldn't be annotated for files that aren't human readable (not valid utf-8)");
let range = if self.s.eat_if('-') {
let end = self.parse_line_col()?;
LineCol::byte_range(start..end, &text)
let (line, col) = start.indices();
let start = lines.line_column_to_byte(line, col);
let (line, col) = end.indices();
let end = lines.line_column_to_byte(line, col);
Option::zip(start, end).map(|(a, b)| a..b)
} else {
start.byte_pos(&text).map(|i| i..i)
let (line, col) = start.indices();
lines.line_column_to_byte(line, col).map(|i| i..i)
};
if range.is_none() {
self.error("range is out of bounds");
@ -484,13 +492,13 @@ impl<'a> Parser<'a> {
let line_idx = (line_idx_in_test + comments).checked_add_signed(line_delta)?;
let column_idx = if column < 0 {
// Negative column index is from the back.
let range = source.line_to_range(line_idx)?;
let range = source.lines().line_to_range(line_idx)?;
text[range].chars().count().saturating_add_signed(column)
} else {
usize::try_from(column).ok()?.checked_sub(1)?
};
source.line_column_to_byte(line_idx, column_idx)
source.lines().line_column_to_byte(line_idx, column_idx)
}
/// Parse a number.

View File

@ -7,11 +7,10 @@ use tiny_skia as sk;
use typst::diag::{SourceDiagnostic, Warned};
use typst::html::HtmlDocument;
use typst::layout::{Abs, Frame, FrameItem, PagedDocument, Transform};
use typst::loading::LineCol;
use typst::visualize::Color;
use typst::{Document, World, WorldExt};
use typst_pdf::PdfOptions;
use typst_syntax::FileId;
use typst_syntax::{FileId, Lines};
use crate::collect::{Attr, FileSize, NoteKind, Test};
use crate::logger::TestResult;
@ -329,12 +328,12 @@ impl<'a> Runner<'a> {
fn format_pos(&self, file: FileId, pos: usize) -> String {
let res = if file != self.test.source.id() {
let bytes = self.world.file(file).unwrap();
LineCol::from_byte_pos(pos, &bytes).map(|l| l.numbers())
let lines = Lines::from_bytes(&bytes).unwrap();
lines.byte_to_line_column(pos).map(|(line, col)| (line + 1, col + 1))
} else {
let line = self.test.source.byte_to_line(pos).map(|l| l + 1);
let col = (self.test.source.byte_to_column(pos))
.map(|c| self.test.pos.line + c + 1);
Option::zip(line, col)
(self.test.source.lines())
.byte_to_line_column(pos)
.map(|(line, col)| (line + 1, col + 1))
};
let Some((line, col)) = res else {
return "oob".into();