diff --git a/Cargo.lock b/Cargo.lock index 2778ec489..64276b901 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3112,7 +3112,6 @@ dependencies = [ "unicode-segmentation", "unscanny", "usvg", - "utf8_iter", "wasmi", "xmlwriter", ] @@ -3201,6 +3200,7 @@ dependencies = [ name = "typst-syntax" version = "0.13.1" dependencies = [ + "comemo", "ecow", "serde", "toml", diff --git a/Cargo.toml b/Cargo.toml index b548245fa..b4890e3c1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -135,7 +135,6 @@ unicode-segmentation = "1" unscanny = "0.1" ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] } usvg = { version = "0.45", default-features = false, features = ["text"] } -utf8_iter = "1.0.4" walkdir = "2" wasmi = "0.40.0" web-sys = "0.3" diff --git a/crates/typst-cli/src/compile.rs b/crates/typst-cli/src/compile.rs index 4edb4c323..207bb7d09 100644 --- a/crates/typst-cli/src/compile.rs +++ b/crates/typst-cli/src/compile.rs @@ -16,7 +16,7 @@ use typst::diag::{ use typst::foundations::{Datetime, Smart}; use typst::html::HtmlDocument; use typst::layout::{Frame, Page, PageRanges, PagedDocument}; -use typst::syntax::{FileId, Source, Span}; +use typst::syntax::{FileId, Lines, Span}; use typst::WorldExt; use typst_pdf::{PdfOptions, PdfStandards, Timestamp}; @@ -696,7 +696,7 @@ fn label(world: &SystemWorld, span: Span) -> Option> { impl<'a> codespan_reporting::files::Files<'a> for SystemWorld { type FileId = FileId; type Name = String; - type Source = Source; + type Source = Lines; fn name(&'a self, id: FileId) -> CodespanResult { let vpath = id.vpath(); diff --git a/crates/typst-cli/src/timings.rs b/crates/typst-cli/src/timings.rs index 9f017dc12..3d10bbc67 100644 --- a/crates/typst-cli/src/timings.rs +++ b/crates/typst-cli/src/timings.rs @@ -85,6 +85,6 @@ fn resolve_span(world: &SystemWorld, span: Span) -> Option<(String, u32)> { let id = span.id()?; let source = world.source(id).ok()?; let range = source.range(span)?; - let line = source.byte_to_line(range.start)?; + let line = source.lines().byte_to_line(range.start)?; Some((format!("{id:?}"), line as u32 + 1)) } diff --git a/crates/typst-cli/src/world.rs b/crates/typst-cli/src/world.rs index 2da03d4d5..be3526ad2 100644 --- a/crates/typst-cli/src/world.rs +++ b/crates/typst-cli/src/world.rs @@ -9,7 +9,7 @@ use ecow::{eco_format, EcoString}; use parking_lot::Mutex; use typst::diag::{FileError, FileResult}; use typst::foundations::{Bytes, Datetime, Dict, IntoValue}; -use typst::syntax::{FileId, Source, VirtualPath}; +use typst::syntax::{FileId, Lines, Source, VirtualPath}; use typst::text::{Font, FontBook}; use typst::utils::LazyHash; use typst::{Library, World}; @@ -183,8 +183,18 @@ impl SystemWorld { /// Lookup a source file by id. #[track_caller] - pub fn lookup(&self, id: FileId) -> Source { - self.source(id).expect("file id does not point to any source file") + pub fn lookup(&self, id: FileId) -> Lines { + self.slot(id, |slot| { + if let Some(source) = slot.source.get() { + let source = source.as_ref().expect("file is not valid"); + source.lines() + } else if let Some(bytes) = slot.file.get() { + let bytes = bytes.as_ref().expect("file is not valid"); + Lines::from_bytes(bytes.as_slice()).expect("file is not valid utf-8") + } else { + panic!("file id does not point to any source file"); + } + }) } } @@ -339,6 +349,11 @@ impl SlotCell { self.accessed = false; } + /// Gets the contents of the cell. + fn get(&self) -> Option<&FileResult> { + self.data.as_ref() + } + /// Gets the contents of the cell or initialize them. fn get_or_init( &mut self, diff --git a/crates/typst-library/Cargo.toml b/crates/typst-library/Cargo.toml index f4b219882..b210637a8 100644 --- a/crates/typst-library/Cargo.toml +++ b/crates/typst-library/Cargo.toml @@ -66,7 +66,6 @@ unicode-normalization = { workspace = true } unicode-segmentation = { workspace = true } unscanny = { workspace = true } usvg = { workspace = true } -utf8_iter = { workspace = true } wasmi = { workspace = true } xmlwriter = { workspace = true } diff --git a/crates/typst-library/src/diag.rs b/crates/typst-library/src/diag.rs index 47c2b6b50..1d8018afd 100644 --- a/crates/typst-library/src/diag.rs +++ b/crates/typst-library/src/diag.rs @@ -9,10 +9,10 @@ use std::string::FromUtf8Error; use comemo::Tracked; use ecow::{eco_vec, EcoVec}; use typst_syntax::package::{PackageSpec, PackageVersion}; -use typst_syntax::{Span, Spanned, SyntaxError}; +use typst_syntax::{Lines, Span, Spanned, SyntaxError}; use crate::engine::Engine; -use crate::loading::{Loaded, LineCol}; +use crate::loading::{LoadSource, Loaded}; use crate::{World, WorldExt}; /// Early-return with a [`StrResult`] or [`SourceResult`]. @@ -569,6 +569,144 @@ impl From for EcoString { } } +impl Loaded { + /// Report an error, possibly in an external file. + pub fn err_in_text( + &self, + pos: impl Into, + msg: impl std::fmt::Display, + error: impl std::fmt::Display, + ) -> EcoVec { + let lines = Lines::from_bytes(&self.bytes); + match (self.source.v, lines) { + // Only report an error in an external file, + // if it is human readable (valid utf-8). + (LoadSource::Path(file_id), Ok(lines)) => { + let pos = pos.into(); + if let Some(range) = pos.range(&lines) { + let span = Span::from_range(file_id, range); + return eco_vec!(error!(span, "{msg} ({error})")); + } + + // Either `ReportPos::None` was provided, or resolving the range + // from the line/column failed. If present report the possibly + // wrong line/column in the error message anyway. + let span = Span::from_range(file_id, 0..self.bytes.len()); + let error = if let Some(pair) = pos.line_col(&lines) { + let (line, col) = pair.numbers(); + error!(span, "{msg} ({error} at {line}:{col})") + } else { + error!(span, "{msg} ({error})") + }; + eco_vec![error] + } + _ => self.err_in_bytes(pos, msg, error), + } + } + + /// Report an error, possibly in an external file. + pub fn err_in_bytes( + &self, + pos: impl Into, + msg: impl std::fmt::Display, + error: impl std::fmt::Display, + ) -> EcoVec { + let pos = pos.into(); + let result = Lines::from_bytes(&self.bytes).ok().and_then(|l| pos.line_col(&l)); + let error = if let Some(pair) = result { + let (line, col) = pair.numbers(); + error!(self.source.span, "{msg} ({error} at {line}:{col})") + } else { + error!(self.source.span, "{msg} ({error})") + }; + eco_vec![error] + } +} + +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub enum ReportPos { + /// Contains the range, and the 0-based line/column. + Full(std::ops::Range, LineCol), + /// Contains the range. + Range(std::ops::Range), + /// Contains the 0-based line/column. + LineCol(LineCol), + #[default] + None, +} + +impl From> for ReportPos { + fn from(value: std::ops::Range) -> Self { + Self::Range(value) + } +} + +impl From for ReportPos { + fn from(value: LineCol) -> Self { + Self::LineCol(value) + } +} + +impl ReportPos { + fn range(&self, lines: &Lines) -> Option> { + match self { + ReportPos::Full(range, _) => Some(range.clone()), + ReportPos::Range(range) => Some(range.clone()), + &ReportPos::LineCol(pair) => { + let i = lines.line_column_to_byte(pair.line, pair.col)?; + Some(i..i) + } + ReportPos::None => None, + } + } + + fn line_col(&self, lines: &Lines) -> Option { + match self { + &ReportPos::Full(_, pair) => Some(pair), + ReportPos::Range(range) => { + let (line, col) = lines.byte_to_line_column(range.start)?; + Some(LineCol::zero_based(line, col)) + } + &ReportPos::LineCol(pair) => Some(pair), + ReportPos::None => None, + } + } +} + +/// A line/column pair. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct LineCol { + /// The 0-based line. + line: usize, + /// The 0-based column. + col: usize, +} + +impl LineCol { + /// Constructs the line/column pair from 0-based indices. + pub fn zero_based(line: usize, col: usize) -> Self { + Self { line, col } + } + + /// Constructs the line/column pair from 1-based numbers. + pub fn one_based(line: usize, col: usize) -> Self { + Self { + line: line.saturating_sub(1), + col: col.saturating_sub(1), + } + } + + /// Returns the 0-based line/column indices. + pub fn indices(&self) -> (usize, usize) { + (self.line, self.col) + } + + /// Returns the 1-based line/column numbers. + pub fn numbers(&self) -> (usize, usize) { + (self.line + 1, self.col + 1) + } +} + /// Format a user-facing error message for an XML-like file format. pub fn format_xml_like_error( format: &str, diff --git a/crates/typst-library/src/loading/csv.rs b/crates/typst-library/src/loading/csv.rs index ca7f6408b..4c7f103aa 100644 --- a/crates/typst-library/src/loading/csv.rs +++ b/crates/typst-library/src/loading/csv.rs @@ -1,10 +1,10 @@ use ecow::EcoVec; use typst_syntax::Spanned; -use crate::diag::{bail, SourceDiagnostic, SourceResult}; +use crate::diag::{bail, LineCol, ReportPos, SourceDiagnostic, SourceResult}; use crate::engine::Engine; use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value}; -use crate::loading::{Loaded, DataSource, LineCol, Load, Readable, ReportPos}; +use crate::loading::{DataSource, Load, Loaded, Readable}; /// Reads structured data from a CSV file. /// @@ -176,7 +176,9 @@ fn format_csv_error( }) .unwrap_or(LineCol::one_based(line, 1).into()); match err.kind() { - ::csv::ErrorKind::Utf8 { .. } => data.err_in_text(pos, msg, "file is not valid utf-8"), + ::csv::ErrorKind::Utf8 { .. } => { + data.err_in_text(pos, msg, "file is not valid utf-8") + } ::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => { let err = format!("found {len} instead of {expected_len} fields in line {line}"); diff --git a/crates/typst-library/src/loading/json.rs b/crates/typst-library/src/loading/json.rs index c2204c69e..a6a2f5b59 100644 --- a/crates/typst-library/src/loading/json.rs +++ b/crates/typst-library/src/loading/json.rs @@ -1,10 +1,10 @@ use ecow::eco_format; use typst_syntax::Spanned; -use crate::diag::{At, SourceResult}; +use crate::diag::{At, LineCol, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Str, Value}; -use crate::loading::{DataSource, LineCol, Load, Readable}; +use crate::loading::{DataSource, Load, Readable}; /// Reads structured data from a JSON file. /// diff --git a/crates/typst-library/src/loading/mod.rs b/crates/typst-library/src/loading/mod.rs index d9ba15526..53d5e9290 100644 --- a/crates/typst-library/src/loading/mod.rs +++ b/crates/typst-library/src/loading/mod.rs @@ -16,9 +16,8 @@ mod xml_; mod yaml_; use comemo::Tracked; -use ecow::{eco_vec, EcoString, EcoVec}; +use ecow::EcoString; use typst_syntax::{FileId, Span, Spanned}; -use utf8_iter::ErrorReportingUtf8Chars; pub use self::cbor_::*; pub use self::csv_::*; @@ -28,7 +27,7 @@ pub use self::toml_::*; pub use self::xml_::*; pub use self::yaml_::*; -use crate::diag::{error, At, FileError, SourceDiagnostic, SourceResult}; +use crate::diag::{At, FileError, SourceResult}; use crate::foundations::OneOrMultiple; use crate::foundations::{cast, Bytes, Scope, Str}; use crate::World; @@ -129,6 +128,7 @@ pub struct Loaded { } impl Loaded { + /// FIXME: remove this? pub fn dummy() -> Self { Loaded::new( typst_syntax::Spanned::new(LoadSource::Bytes, Span::detached()), @@ -142,50 +142,16 @@ impl Loaded { pub fn as_str(&self) -> SourceResult<&str> { self.bytes.as_str().map_err(|err| { - // TODO: should the error even be reported in the file if it's possibly binary? let start = err.valid_up_to(); let end = start + err.error_len().unwrap_or(0); - self.err_in_text(start..end, "failed to convert to string", FileError::from(err)) + // always report this error in the source file. + self.err_in_bytes( + start..end, + "failed to convert to string", + FileError::from(err), + ) }) } - - /// Report an error, possibly in an external file. - pub fn err_in_text( - &self, - pos: impl Into, - msg: impl std::fmt::Display, - error: impl std::fmt::Display, - ) -> EcoVec { - let pos = pos.into(); - let error = match self.source.v { - LoadSource::Path(file_id) => { - if let Some(range) = pos.range(self.bytes.as_slice()) { - let span = Span::from_range(file_id, range); - return eco_vec!(error!(span, "{msg} ({error})")); - } - - // Either there was no range provided, or resolving the range - // from the line/column failed. If present report the possibly - // wrong line/column anyway. - let span = Span::from_range(file_id, 0..self.bytes.len()); - if let Some(pair) = pos.line_col(self.bytes.as_slice()) { - let (line, col) = pair.numbers(); - error!(span, "{msg} ({error} at {line}:{col})") - } else { - error!(span, "{msg} ({error})") - } - } - LoadSource::Bytes => { - if let Some(pair) = pos.line_col(self.bytes.as_slice()) { - let (line, col) = pair.numbers(); - error!(self.source.span, "{msg} ({error} at {line}:{col})") - } else { - error!(self.source.span, "{msg} ({error})") - } - } - }; - eco_vec![error] - } } /// A loaded [`DataSource`]. @@ -195,142 +161,6 @@ pub enum LoadSource { Bytes, } -#[derive(Debug, Default)] -pub enum ReportPos { - /// Contains the range, and the 0-based line/column. - Full(std::ops::Range, LineCol), - /// Contains the range. - Range(std::ops::Range), - /// Contains the 0-based line/column. - LineCol(LineCol), - #[default] - None, -} - -impl From> for ReportPos { - fn from(value: std::ops::Range) -> Self { - Self::Range(value) - } -} - -impl From for ReportPos { - fn from(value: LineCol) -> Self { - Self::LineCol(value) - } -} - -impl ReportPos { - fn range(&self, bytes: &[u8]) -> Option> { - match self { - ReportPos::Full(range, _) => Some(range.clone()), - ReportPos::Range(range) => Some(range.clone()), - &ReportPos::LineCol(pair) => pair.byte_pos(bytes).map(|i| i..i), - ReportPos::None => None, - } - } - - fn line_col(&self, bytes: &[u8]) -> Option { - match self { - &ReportPos::Full(_, pair) => Some(pair), - ReportPos::Range(range) => LineCol::from_byte_pos(range.start, bytes), - &ReportPos::LineCol(pair) => Some(pair), - ReportPos::None => None, - } - } -} - -#[derive(Clone, Copy, Debug)] -pub struct LineCol { - /// The 0-based line. - line: usize, - /// The 0-based column. - col: usize, -} - -impl LineCol { - /// Constructs the line/column pair from 0-based indices. - pub fn zero_based(line: usize, col: usize) -> Self { - Self { line, col } - } - - /// Constructs the line/column pair from 1-based numbers. - pub fn one_based(line: usize, col: usize) -> Self { - Self { - line: line.saturating_sub(1), - col: col.saturating_sub(1), - } - } - - pub fn from_byte_pos(pos: usize, bytes: &[u8]) -> Option { - let bytes = &bytes[..pos]; - let mut line = 0; - let line_start = memchr::memchr_iter(b'\n', bytes) - .inspect(|_| line += 1) - .last() - .map(|i| i + 1) - .unwrap_or(bytes.len()); - - // Try to compute a column even if the string isn't valid utf-8. - let col = ErrorReportingUtf8Chars::new(&bytes[line_start..]).count(); - Some(LineCol::zero_based(line, col)) - } - - pub fn byte_pos(&self, bytes: &[u8]) -> Option { - let line_offset = if let Some(idx) = self.line.checked_sub(1) { - memchr::memchr_iter(b'\n', bytes).nth(idx).map(|i| i + 1)? - } else { - 0 - }; - - let col_offset = col_offset(line_offset, self.col, bytes)?; - let pos = line_offset + col_offset; - Some(pos) - } - - pub fn byte_range( - range: std::ops::Range, - bytes: &[u8], - ) -> Option> { - let mut line_iter = memchr::memchr_iter(b'\n', bytes); - let start_line_offset = if let Some(idx) = range.start.line.checked_sub(1) { - line_iter.nth(idx).map(|i| i + 1)? - } else { - 0 - }; - let line_delta = range.end.line - range.start.line; - let end_line_offset = if let Some(idx) = line_delta.checked_sub(1) { - line_iter.nth(idx).map(|i| i + 1)? - } else { - start_line_offset - }; - - let start_col_offset = col_offset(start_line_offset, range.start.col, bytes)?; - let end_col_offset = col_offset(end_line_offset, range.end.col, bytes)?; - - let start = start_line_offset + start_col_offset; - let end = end_line_offset + end_col_offset; - Some(start..end) - } - - pub fn numbers(&self) -> (usize, usize) { - (self.line + 1, self.col + 1) - } -} - -fn col_offset(line_offset: usize, col: usize, bytes: &[u8]) -> Option { - let line = &bytes[line_offset..]; - // TODO: streaming-utf8 decoding ignore invalid characters - // might neeed to update error reporting too (use utf8_iter) - if let Some(idx) = col.checked_sub(1) { - // Try to compute position even if the string isn't valid utf-8. - let mut iter = ErrorReportingUtf8Chars::new(line); - _ = iter.nth(idx)?; - Some(line.len() - iter.as_slice().len()) - } else { - Some(0) - } -} - /// A value that can be read from a file. #[derive(Debug, Clone, PartialEq, Hash)] pub enum Readable { diff --git a/crates/typst-library/src/loading/toml.rs b/crates/typst-library/src/loading/toml.rs index 294bf05a1..f20de9064 100644 --- a/crates/typst-library/src/loading/toml.rs +++ b/crates/typst-library/src/loading/toml.rs @@ -1,10 +1,10 @@ use ecow::{eco_format, EcoVec}; use typst_syntax::Spanned; -use crate::diag::{At, SourceDiagnostic, SourceResult}; +use crate::diag::{At, ReportPos, SourceDiagnostic, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Str, Value}; -use crate::loading::{Loaded, DataSource, Load, Readable, ReportPos}; +use crate::loading::{DataSource, Load, Loaded, Readable}; /// Reads structured data from a TOML file. /// @@ -69,7 +69,10 @@ impl toml { } /// Format the user-facing TOML error message. -fn format_toml_error(data: &Loaded, error: ::toml::de::Error) -> EcoVec { +fn format_toml_error( + data: &Loaded, + error: ::toml::de::Error, +) -> EcoVec { let pos = error.span().map(ReportPos::Range).unwrap_or_default(); data.err_in_text(pos, "failed to parse TOML", error.message()) } diff --git a/crates/typst-library/src/loading/xml.rs b/crates/typst-library/src/loading/xml.rs index 54fc9062e..170a29b29 100644 --- a/crates/typst-library/src/loading/xml.rs +++ b/crates/typst-library/src/loading/xml.rs @@ -5,7 +5,7 @@ use typst_syntax::Spanned; use crate::diag::{format_xml_like_error, SourceDiagnostic, SourceResult}; use crate::engine::Engine; use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value}; -use crate::loading::{Loaded, DataSource, Load, Readable}; +use crate::loading::{DataSource, Load, Loaded, Readable}; /// Reads structured data from an XML file. /// diff --git a/crates/typst-library/src/loading/yaml.rs b/crates/typst-library/src/loading/yaml.rs index 4732d87b7..4acce2e0f 100644 --- a/crates/typst-library/src/loading/yaml.rs +++ b/crates/typst-library/src/loading/yaml.rs @@ -1,10 +1,10 @@ use ecow::{eco_format, EcoVec}; use typst_syntax::Spanned; -use crate::diag::{At, SourceDiagnostic, SourceResult}; +use crate::diag::{At, LineCol, ReportPos, SourceDiagnostic, SourceResult}; use crate::engine::Engine; use crate::foundations::{func, scope, Str, Value}; -use crate::loading::{Loaded, DataSource, LineCol, Load, Readable, ReportPos}; +use crate::loading::{DataSource, Load, Loaded, Readable}; /// Reads structured data from a YAML file. /// diff --git a/crates/typst-library/src/model/bibliography.rs b/crates/typst-library/src/model/bibliography.rs index e4fdb3eab..4b8235663 100644 --- a/crates/typst-library/src/model/bibliography.rs +++ b/crates/typst-library/src/model/bibliography.rs @@ -20,7 +20,8 @@ use typst_syntax::{Span, Spanned}; use typst_utils::{Get, ManuallyHash, NonZeroExt, PicoStr}; use crate::diag::{ - bail, error, At, HintedStrResult, SourceDiagnostic, SourceResult, StrResult, + bail, error, At, HintedStrResult, ReportPos, SourceDiagnostic, SourceResult, + StrResult, }; use crate::engine::{Engine, Sink}; use crate::foundations::{ @@ -33,7 +34,7 @@ use crate::layout::{ BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem, Sides, Sizing, TrackSizings, }; -use crate::loading::{format_yaml_error, Loaded, DataSource, Load, LoadSource, ReportPos}; +use crate::loading::{format_yaml_error, DataSource, Load, LoadSource, Loaded}; use crate::model::{ CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem, Url, @@ -480,7 +481,9 @@ impl CslStyle { typst_utils::hash128(&(TypeId::of::(), data)), ))) }) - .map_err(|err| data.err_in_text(ReportPos::None, "failed to load CSL style", err)) + .map_err(|err| { + data.err_in_text(ReportPos::None, "failed to load CSL style", err) + }) } /// Get the underlying independent style. diff --git a/crates/typst-library/src/text/raw.rs b/crates/typst-library/src/text/raw.rs index 2e11bbfa7..8ce920b21 100644 --- a/crates/typst-library/src/text/raw.rs +++ b/crates/typst-library/src/text/raw.rs @@ -11,7 +11,7 @@ use typst_utils::ManuallyHash; use unicode_segmentation::UnicodeSegmentation; use super::Lang; -use crate::diag::{SourceDiagnostic, SourceResult}; +use crate::diag::{LineCol, ReportPos, SourceDiagnostic, SourceResult}; use crate::engine::Engine; use crate::foundations::{ cast, elem, scope, Content, Derived, NativeElement, OneOrMultiple, Packed, PlainText, @@ -19,7 +19,7 @@ use crate::foundations::{ }; use crate::html::{tag, HtmlElem}; use crate::layout::{BlockBody, BlockElem, Em, HAlignment}; -use crate::loading::{DataSource, LineCol, Load, Loaded, ReportPos}; +use crate::loading::{DataSource, Load, Loaded}; use crate::model::{Figurable, ParElem}; use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize}; use crate::visualize::Color; diff --git a/crates/typst-syntax/Cargo.toml b/crates/typst-syntax/Cargo.toml index 263595bd4..c20f6a087 100644 --- a/crates/typst-syntax/Cargo.toml +++ b/crates/typst-syntax/Cargo.toml @@ -15,6 +15,7 @@ readme = { workspace = true } [dependencies] typst-timing = { workspace = true } typst-utils = { workspace = true } +comemo = { workspace = true } ecow = { workspace = true } serde = { workspace = true } toml = { workspace = true } diff --git a/crates/typst-syntax/src/lib.rs b/crates/typst-syntax/src/lib.rs index 5e7b710fc..1249f88e9 100644 --- a/crates/typst-syntax/src/lib.rs +++ b/crates/typst-syntax/src/lib.rs @@ -7,6 +7,7 @@ mod file; mod highlight; mod kind; mod lexer; +mod lines; mod node; mod parser; mod path; @@ -22,6 +23,7 @@ pub use self::lexer::{ is_id_continue, is_id_start, is_ident, is_newline, is_valid_label_literal_id, link_prefix, split_newlines, }; +pub use self::lines::Lines; pub use self::node::{LinkedChildren, LinkedNode, Side, SyntaxError, SyntaxNode}; pub use self::parser::{parse, parse_code, parse_math}; pub use self::path::VirtualPath; diff --git a/crates/typst-syntax/src/lines.rs b/crates/typst-syntax/src/lines.rs new file mode 100644 index 000000000..99275ad28 --- /dev/null +++ b/crates/typst-syntax/src/lines.rs @@ -0,0 +1,407 @@ +use std::hash::{Hash, Hasher}; +use std::iter::zip; +use std::ops::Range; +use std::str::Utf8Error; +use std::sync::Arc; + +use crate::is_newline; + +/// Metadata about lines. +#[derive(Clone)] +pub struct Lines(Arc>); + +#[derive(Clone)] +struct Repr { + lines: Vec, + str: S, +} + +/// Metadata about a line. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct Line { + /// The UTF-8 byte offset where the line starts. + byte_idx: usize, + /// The UTF-16 codepoint offset where the line starts. + utf16_idx: usize, +} + +impl> Lines { + /// TODO: memoize this? + pub fn new(str: S) -> Self { + let lines = lines(str.as_ref()); + Lines(Arc::new(Repr { lines, str })) + } + + pub fn text(&self) -> &str { + self.0.str.as_ref() + } + + /// Get the length of the file in UTF-8 encoded bytes. + pub fn len_bytes(&self) -> usize { + self.0.str.as_ref().len() + } + + /// Get the length of the file in UTF-16 code units. + pub fn len_utf16(&self) -> usize { + let last = self.0.lines.last().unwrap(); + last.utf16_idx + len_utf16(&self.text()[last.byte_idx..]) + } + + /// Get the length of the file in lines. + pub fn len_lines(&self) -> usize { + self.0.lines.len() + } + + /// Return the index of the UTF-16 code unit at the byte index. + pub fn byte_to_utf16(&self, byte_idx: usize) -> Option { + let line_idx = self.byte_to_line(byte_idx)?; + let line = self.0.lines.get(line_idx)?; + let head = self.text().get(line.byte_idx..byte_idx)?; + Some(line.utf16_idx + len_utf16(head)) + } + + /// Return the index of the line that contains the given byte index. + pub fn byte_to_line(&self, byte_idx: usize) -> Option { + (byte_idx <= self.text().len()).then(|| { + match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) { + Ok(i) => i, + Err(i) => i - 1, + } + }) + } + + /// Return the index of the column at the byte index. + /// + /// The column is defined as the number of characters in the line before the + /// byte index. + pub fn byte_to_column(&self, byte_idx: usize) -> Option { + let line = self.byte_to_line(byte_idx)?; + let start = self.line_to_byte(line)?; + let head = self.text().get(start..byte_idx)?; + Some(head.chars().count()) + } + + /// Return the index of the line and column at the byte index. + pub fn byte_to_line_column(&self, byte_idx: usize) -> Option<(usize, usize)> { + let line = self.byte_to_line(byte_idx)?; + let start = self.line_to_byte(line)?; + let head = self.text().get(start..byte_idx)?; + let col = head.chars().count(); + Some((line, col)) + } + + /// Return the byte index at the UTF-16 code unit. + pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option { + let line = self.0.lines.get( + match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) { + Ok(i) => i, + Err(i) => i - 1, + }, + )?; + + let text = self.text(); + let mut k = line.utf16_idx; + for (i, c) in text[line.byte_idx..].char_indices() { + if k >= utf16_idx { + return Some(line.byte_idx + i); + } + k += c.len_utf16(); + } + + (k == utf16_idx).then_some(text.len()) + } + + /// Return the byte position at which the given line starts. + pub fn line_to_byte(&self, line_idx: usize) -> Option { + self.0.lines.get(line_idx).map(|line| line.byte_idx) + } + + /// Return the range which encloses the given line. + pub fn line_to_range(&self, line_idx: usize) -> Option> { + let start = self.line_to_byte(line_idx)?; + let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text().len()); + Some(start..end) + } + + /// Return the byte index of the given (line, column) pair. + /// + /// The column defines the number of characters to go beyond the start of + /// the line. + pub fn line_column_to_byte( + &self, + line_idx: usize, + column_idx: usize, + ) -> Option { + let range = self.line_to_range(line_idx)?; + let line = self.text().get(range.clone())?; + let mut chars = line.chars(); + for _ in 0..column_idx { + chars.next(); + } + Some(range.start + (line.len() - chars.as_str().len())) + } +} + +impl Lines { + /// Tries to convert the bytes + #[comemo::memoize] + pub fn from_bytes(bytes: &[u8]) -> Result, Utf8Error> { + let str = std::str::from_utf8(bytes)?; + Ok(Lines::new(str.to_string())) + } + + /// Fully replace the source text. + /// + /// This performs a naive (suffix/prefix-based) diff of the old and new text + /// to produce the smallest single edit that transforms old into new and + /// then calls [`edit`](Self::edit) with it. + /// + /// Returns whether any changes were made. + pub fn replace(&mut self, new: &str) -> bool { + let Some((prefix, suffix)) = self.replacement_range(new) else { + return false; + }; + + let old = self.text(); + let replace = prefix..old.len() - suffix; + let with = &new[prefix..new.len() - suffix]; + self.edit(replace, with); + + true + } + + /// Returns the common prefix and suffix lengths. + /// Returns [`None`] if the old and new strings are equal. + pub fn replacement_range(&self, new: &str) -> Option<(usize, usize)> { + let old = self.text(); + + let mut prefix = + zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count(); + + if prefix == old.len() && prefix == new.len() { + return None; + } + + while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) { + prefix -= 1; + } + + let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev()) + .take_while(|(x, y)| x == y) + .count(); + + while !old.is_char_boundary(old.len() - suffix) + || !new.is_char_boundary(new.len() - suffix) + { + suffix += 1; + } + + Some((prefix, suffix)) + } + + /// Edit the source file by replacing the given range. + /// + /// Returns the range in the new source that was ultimately reparsed. + /// + /// The method panics if the `replace` range is out of bounds. + #[track_caller] + pub fn edit(&mut self, replace: Range, with: &str) { + let start_byte = replace.start; + let start_utf16 = self.byte_to_utf16(start_byte).unwrap(); + let line = self.byte_to_line(start_byte).unwrap(); + + let inner = Arc::make_mut(&mut self.0); + + // Update the text itself. + inner.str.replace_range(replace.clone(), with); + + // Remove invalidated line starts. + inner.lines.truncate(line + 1); + + // Handle adjoining of \r and \n. + if inner.str[..start_byte].ends_with('\r') && with.starts_with('\n') { + inner.lines.pop(); + } + + // Recalculate the line starts after the edit. + inner + .lines + .extend(lines_from(start_byte, start_utf16, &inner.str[start_byte..])); + } +} + +/// Create a line vector. +fn lines(text: &str) -> Vec { + std::iter::once(Line { byte_idx: 0, utf16_idx: 0 }) + .chain(lines_from(0, 0, text)) + .collect() +} + +/// Compute a line iterator from an offset. +fn lines_from( + byte_offset: usize, + utf16_offset: usize, + text: &str, +) -> impl Iterator + '_ { + let mut s = unscanny::Scanner::new(text); + let mut utf16_idx = utf16_offset; + + std::iter::from_fn(move || { + s.eat_until(|c: char| { + utf16_idx += c.len_utf16(); + is_newline(c) + }); + + if s.done() { + return None; + } + + if s.eat() == Some('\r') && s.eat_if('\n') { + utf16_idx += 1; + } + + Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx }) + }) +} + +/// The number of code units this string would use if it was encoded in +/// UTF16. This runs in linear time. +fn len_utf16(string: &str) -> usize { + string.chars().map(char::len_utf16).sum() +} + +#[cfg(test)] +mod tests { + use super::*; + + const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl"; + + #[test] + fn test_source_file_new() { + let lines = Lines::new(TEST); + assert_eq!( + lines.0.lines, + [ + Line { byte_idx: 0, utf16_idx: 0 }, + Line { byte_idx: 7, utf16_idx: 6 }, + Line { byte_idx: 15, utf16_idx: 12 }, + Line { byte_idx: 18, utf16_idx: 15 }, + ] + ); + } + + #[test] + fn test_source_file_pos_to_line() { + let lines = Lines::new(TEST); + assert_eq!(lines.byte_to_line(0), Some(0)); + assert_eq!(lines.byte_to_line(2), Some(0)); + assert_eq!(lines.byte_to_line(6), Some(0)); + assert_eq!(lines.byte_to_line(7), Some(1)); + assert_eq!(lines.byte_to_line(8), Some(1)); + assert_eq!(lines.byte_to_line(12), Some(1)); + assert_eq!(lines.byte_to_line(21), Some(3)); + assert_eq!(lines.byte_to_line(22), None); + } + + #[test] + fn test_source_file_pos_to_column() { + let lines = Lines::new(TEST); + assert_eq!(lines.byte_to_column(0), Some(0)); + assert_eq!(lines.byte_to_column(2), Some(1)); + assert_eq!(lines.byte_to_column(6), Some(5)); + assert_eq!(lines.byte_to_column(7), Some(0)); + assert_eq!(lines.byte_to_column(8), Some(1)); + assert_eq!(lines.byte_to_column(12), Some(2)); + } + + #[test] + fn test_source_file_utf16() { + #[track_caller] + fn roundtrip(lines: &Lines<&str>, byte_idx: usize, utf16_idx: usize) { + let middle = lines.byte_to_utf16(byte_idx).unwrap(); + let result = lines.utf16_to_byte(middle).unwrap(); + assert_eq!(middle, utf16_idx); + assert_eq!(result, byte_idx); + } + + let lines = Lines::new(TEST); + roundtrip(&lines, 0, 0); + roundtrip(&lines, 2, 1); + roundtrip(&lines, 3, 2); + roundtrip(&lines, 8, 7); + roundtrip(&lines, 12, 9); + roundtrip(&lines, 21, 18); + assert_eq!(lines.byte_to_utf16(22), None); + assert_eq!(lines.utf16_to_byte(19), None); + } + + #[test] + fn test_source_file_roundtrip() { + #[track_caller] + fn roundtrip(lines: &Lines<&str>, byte_idx: usize) { + let line = lines.byte_to_line(byte_idx).unwrap(); + let column = lines.byte_to_column(byte_idx).unwrap(); + let result = lines.line_column_to_byte(line, column).unwrap(); + assert_eq!(result, byte_idx); + } + + let lines = Lines::new(TEST); + roundtrip(&lines, 0); + roundtrip(&lines, 7); + roundtrip(&lines, 12); + roundtrip(&lines, 21); + } + + #[test] + fn test_source_file_edit() { + // This tests only the non-parser parts. The reparsing itself is + // tested separately. + #[track_caller] + fn test(prev: &str, range: Range, with: &str, after: &str) { + let reference = Lines::new(after); + + let mut edited = Lines::new(prev.to_string()); + edited.edit(range.clone(), with); + assert_eq!(edited.text(), reference.text()); + assert_eq!(edited.0.lines, reference.0.lines); + + let mut replaced = Lines::new(prev.to_string()); + replaced.replace(&{ + let mut s = prev.to_string(); + s.replace_range(range, with); + s + }); + assert_eq!(replaced.text(), reference.text()); + assert_eq!(replaced.0.lines, reference.0.lines); + } + + // Test inserting at the beginning. + test("abc\n", 0..0, "hi\n", "hi\nabc\n"); + test("\nabc", 0..0, "hi\r", "hi\r\nabc"); + + // Test editing in the middle. + test(TEST, 4..16, "❌", "ä\tc❌i\rjkl"); + + // Test appending. + test("abc\ndef", 7..7, "hi", "abc\ndefhi"); + test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi"); + + // Test appending with adjoining \r and \n. + test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi"); + + // Test removing everything. + test(TEST, 0..21, "", ""); + } +} + +impl Hash for Lines { + fn hash(&self, state: &mut H) { + self.0.str.hash(state); + } +} + +impl> AsRef for Lines { + fn as_ref(&self) -> &str { + self.0.str.as_ref() + } +} diff --git a/crates/typst-syntax/src/source.rs b/crates/typst-syntax/src/source.rs index 6ff94c73f..e4a3982b7 100644 --- a/crates/typst-syntax/src/source.rs +++ b/crates/typst-syntax/src/source.rs @@ -2,14 +2,14 @@ use std::fmt::{self, Debug, Formatter}; use std::hash::{Hash, Hasher}; -use std::iter::zip; use std::ops::Range; use std::sync::Arc; use typst_utils::LazyHash; +use crate::lines::Lines; use crate::reparser::reparse; -use crate::{is_newline, parse, FileId, LinkedNode, Span, SyntaxNode, VirtualPath}; +use crate::{parse, FileId, LinkedNode, Span, SyntaxNode, VirtualPath}; /// A source file. /// @@ -24,9 +24,8 @@ pub struct Source(Arc); #[derive(Clone)] struct Repr { id: FileId, - text: LazyHash, root: LazyHash, - lines: Vec, + lines: LazyHash>, } impl Source { @@ -37,8 +36,7 @@ impl Source { root.numberize(id, Span::FULL).unwrap(); Self(Arc::new(Repr { id, - lines: lines(&text), - text: LazyHash::new(text), + lines: LazyHash::new(Lines::new(text)), root: LazyHash::new(root), })) } @@ -58,9 +56,14 @@ impl Source { self.0.id } + /// The whole source as a string slice. + pub fn lines(&self) -> Lines { + Lines::clone(&self.0.lines) + } + /// The whole source as a string slice. pub fn text(&self) -> &str { - &self.0.text + &self.0.lines.text() } /// Slice out the part of the source code enclosed by the range. @@ -77,29 +80,12 @@ impl Source { /// Returns the range in the new source that was ultimately reparsed. pub fn replace(&mut self, new: &str) -> Range { let _scope = typst_timing::TimingScope::new("replace source"); - let old = self.text(); - let mut prefix = - zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count(); - - if prefix == old.len() && prefix == new.len() { + let Some((prefix, suffix)) = self.0.lines.replacement_range(new) else { return 0..0; - } - - while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) { - prefix -= 1; - } - - let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev()) - .take_while(|(x, y)| x == y) - .count(); - - while !old.is_char_boundary(old.len() - suffix) - || !new.is_char_boundary(new.len() - suffix) - { - suffix += 1; - } + }; + let old = self.text(); let replace = prefix..old.len() - suffix; let with = &new[prefix..new.len() - suffix]; self.edit(replace, with) @@ -112,48 +98,28 @@ impl Source { /// The method panics if the `replace` range is out of bounds. #[track_caller] pub fn edit(&mut self, replace: Range, with: &str) -> Range { - let start_byte = replace.start; - let start_utf16 = self.byte_to_utf16(start_byte).unwrap(); - let line = self.byte_to_line(start_byte).unwrap(); - let inner = Arc::make_mut(&mut self.0); - // Update the text itself. - inner.text.replace_range(replace.clone(), with); - - // Remove invalidated line starts. - inner.lines.truncate(line + 1); - - // Handle adjoining of \r and \n. - if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') { - inner.lines.pop(); - } - - // Recalculate the line starts after the edit. - inner.lines.extend(lines_from( - start_byte, - start_utf16, - &inner.text[start_byte..], - )); + // Update the text and lines. + inner.lines.edit(replace.clone(), with); // Incrementally reparse the replaced range. - reparse(&mut inner.root, &inner.text, replace, with.len()) + reparse(&mut inner.root, inner.lines.text(), replace, with.len()) } /// Get the length of the file in UTF-8 encoded bytes. pub fn len_bytes(&self) -> usize { - self.text().len() + self.0.lines.len_bytes() } /// Get the length of the file in UTF-16 code units. pub fn len_utf16(&self) -> usize { - let last = self.0.lines.last().unwrap(); - last.utf16_idx + len_utf16(&self.0.text[last.byte_idx..]) + self.0.lines.len_utf16() } /// Get the length of the file in lines. pub fn len_lines(&self) -> usize { - self.0.lines.len() + self.0.lines.len_lines() } /// Find the node with the given span. @@ -171,85 +137,6 @@ impl Source { pub fn range(&self, span: Span) -> Option> { Some(self.find(span)?.range()) } - - /// Return the index of the UTF-16 code unit at the byte index. - pub fn byte_to_utf16(&self, byte_idx: usize) -> Option { - let line_idx = self.byte_to_line(byte_idx)?; - let line = self.0.lines.get(line_idx)?; - let head = self.0.text.get(line.byte_idx..byte_idx)?; - Some(line.utf16_idx + len_utf16(head)) - } - - /// Return the index of the line that contains the given byte index. - pub fn byte_to_line(&self, byte_idx: usize) -> Option { - (byte_idx <= self.0.text.len()).then(|| { - match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) { - Ok(i) => i, - Err(i) => i - 1, - } - }) - } - - /// Return the index of the column at the byte index. - /// - /// The column is defined as the number of characters in the line before the - /// byte index. - pub fn byte_to_column(&self, byte_idx: usize) -> Option { - let line = self.byte_to_line(byte_idx)?; - let start = self.line_to_byte(line)?; - let head = self.get(start..byte_idx)?; - Some(head.chars().count()) - } - - /// Return the byte index at the UTF-16 code unit. - pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option { - let line = self.0.lines.get( - match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) { - Ok(i) => i, - Err(i) => i - 1, - }, - )?; - - let mut k = line.utf16_idx; - for (i, c) in self.0.text[line.byte_idx..].char_indices() { - if k >= utf16_idx { - return Some(line.byte_idx + i); - } - k += c.len_utf16(); - } - - (k == utf16_idx).then_some(self.0.text.len()) - } - - /// Return the byte position at which the given line starts. - pub fn line_to_byte(&self, line_idx: usize) -> Option { - self.0.lines.get(line_idx).map(|line| line.byte_idx) - } - - /// Return the range which encloses the given line. - pub fn line_to_range(&self, line_idx: usize) -> Option> { - let start = self.line_to_byte(line_idx)?; - let end = self.line_to_byte(line_idx + 1).unwrap_or(self.0.text.len()); - Some(start..end) - } - - /// Return the byte index of the given (line, column) pair. - /// - /// The column defines the number of characters to go beyond the start of - /// the line. - pub fn line_column_to_byte( - &self, - line_idx: usize, - column_idx: usize, - ) -> Option { - let range = self.line_to_range(line_idx)?; - let line = self.get(range.clone())?; - let mut chars = line.chars(); - for _ in 0..column_idx { - chars.next(); - } - Some(range.start + (line.len() - chars.as_str().len())) - } } impl Debug for Source { @@ -261,7 +148,7 @@ impl Debug for Source { impl Hash for Source { fn hash(&self, state: &mut H) { self.0.id.hash(state); - self.0.text.hash(state); + self.0.lines.hash(state); self.0.root.hash(state); } } @@ -271,176 +158,3 @@ impl AsRef for Source { self.text() } } - -/// Metadata about a line. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -struct Line { - /// The UTF-8 byte offset where the line starts. - byte_idx: usize, - /// The UTF-16 codepoint offset where the line starts. - utf16_idx: usize, -} - -/// Create a line vector. -fn lines(text: &str) -> Vec { - std::iter::once(Line { byte_idx: 0, utf16_idx: 0 }) - .chain(lines_from(0, 0, text)) - .collect() -} - -/// Compute a line iterator from an offset. -fn lines_from( - byte_offset: usize, - utf16_offset: usize, - text: &str, -) -> impl Iterator + '_ { - let mut s = unscanny::Scanner::new(text); - let mut utf16_idx = utf16_offset; - - std::iter::from_fn(move || { - s.eat_until(|c: char| { - utf16_idx += c.len_utf16(); - is_newline(c) - }); - - if s.done() { - return None; - } - - if s.eat() == Some('\r') && s.eat_if('\n') { - utf16_idx += 1; - } - - Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx }) - }) -} - -/// The number of code units this string would use if it was encoded in -/// UTF16. This runs in linear time. -fn len_utf16(string: &str) -> usize { - string.chars().map(char::len_utf16).sum() -} - -#[cfg(test)] -mod tests { - use super::*; - - const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl"; - - #[test] - fn test_source_file_new() { - let source = Source::detached(TEST); - assert_eq!( - source.0.lines, - [ - Line { byte_idx: 0, utf16_idx: 0 }, - Line { byte_idx: 7, utf16_idx: 6 }, - Line { byte_idx: 15, utf16_idx: 12 }, - Line { byte_idx: 18, utf16_idx: 15 }, - ] - ); - } - - #[test] - fn test_source_file_pos_to_line() { - let source = Source::detached(TEST); - assert_eq!(source.byte_to_line(0), Some(0)); - assert_eq!(source.byte_to_line(2), Some(0)); - assert_eq!(source.byte_to_line(6), Some(0)); - assert_eq!(source.byte_to_line(7), Some(1)); - assert_eq!(source.byte_to_line(8), Some(1)); - assert_eq!(source.byte_to_line(12), Some(1)); - assert_eq!(source.byte_to_line(21), Some(3)); - assert_eq!(source.byte_to_line(22), None); - } - - #[test] - fn test_source_file_pos_to_column() { - let source = Source::detached(TEST); - assert_eq!(source.byte_to_column(0), Some(0)); - assert_eq!(source.byte_to_column(2), Some(1)); - assert_eq!(source.byte_to_column(6), Some(5)); - assert_eq!(source.byte_to_column(7), Some(0)); - assert_eq!(source.byte_to_column(8), Some(1)); - assert_eq!(source.byte_to_column(12), Some(2)); - } - - #[test] - fn test_source_file_utf16() { - #[track_caller] - fn roundtrip(source: &Source, byte_idx: usize, utf16_idx: usize) { - let middle = source.byte_to_utf16(byte_idx).unwrap(); - let result = source.utf16_to_byte(middle).unwrap(); - assert_eq!(middle, utf16_idx); - assert_eq!(result, byte_idx); - } - - let source = Source::detached(TEST); - roundtrip(&source, 0, 0); - roundtrip(&source, 2, 1); - roundtrip(&source, 3, 2); - roundtrip(&source, 8, 7); - roundtrip(&source, 12, 9); - roundtrip(&source, 21, 18); - assert_eq!(source.byte_to_utf16(22), None); - assert_eq!(source.utf16_to_byte(19), None); - } - - #[test] - fn test_source_file_roundtrip() { - #[track_caller] - fn roundtrip(source: &Source, byte_idx: usize) { - let line = source.byte_to_line(byte_idx).unwrap(); - let column = source.byte_to_column(byte_idx).unwrap(); - let result = source.line_column_to_byte(line, column).unwrap(); - assert_eq!(result, byte_idx); - } - - let source = Source::detached(TEST); - roundtrip(&source, 0); - roundtrip(&source, 7); - roundtrip(&source, 12); - roundtrip(&source, 21); - } - - #[test] - fn test_source_file_edit() { - // This tests only the non-parser parts. The reparsing itself is - // tested separately. - #[track_caller] - fn test(prev: &str, range: Range, with: &str, after: &str) { - let reference = Source::detached(after); - - let mut edited = Source::detached(prev); - edited.edit(range.clone(), with); - assert_eq!(edited.text(), reference.text()); - assert_eq!(edited.0.lines, reference.0.lines); - - let mut replaced = Source::detached(prev); - replaced.replace(&{ - let mut s = prev.to_string(); - s.replace_range(range, with); - s - }); - assert_eq!(replaced.text(), reference.text()); - assert_eq!(replaced.0.lines, reference.0.lines); - } - - // Test inserting at the beginning. - test("abc\n", 0..0, "hi\n", "hi\nabc\n"); - test("\nabc", 0..0, "hi\r", "hi\r\nabc"); - - // Test editing in the middle. - test(TEST, 4..16, "❌", "ä\tc❌i\rjkl"); - - // Test appending. - test("abc\ndef", 7..7, "hi", "abc\ndefhi"); - test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi"); - - // Test appending with adjoining \r and \n. - test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi"); - - // Test removing everything. - test(TEST, 0..21, "", ""); - } -} diff --git a/tests/src/collect.rs b/tests/src/collect.rs index c72747e2c..f8e1722ce 100644 --- a/tests/src/collect.rs +++ b/tests/src/collect.rs @@ -6,9 +6,11 @@ use std::str::FromStr; use std::sync::LazyLock; use ecow::{eco_format, EcoString}; -use typst::loading::LineCol; +use typst::diag::LineCol; use typst_syntax::package::PackageVersion; -use typst_syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath}; +use typst_syntax::{ + is_id_continue, is_ident, is_newline, FileId, Lines, Source, VirtualPath, +}; use unscanny::Scanner; use crate::world::{read, system_path}; @@ -426,11 +428,17 @@ impl<'a> Parser<'a> { } let start = self.parse_line_col()?; + let lines = Lines::from_bytes(text.as_ref()).expect("Errors shouldn't be annotated for files that aren't human readable (not valid utf-8)"); let range = if self.s.eat_if('-') { let end = self.parse_line_col()?; - LineCol::byte_range(start..end, &text) + let (line, col) = start.indices(); + let start = lines.line_column_to_byte(line, col); + let (line, col) = end.indices(); + let end = lines.line_column_to_byte(line, col); + Option::zip(start, end).map(|(a, b)| a..b) } else { - start.byte_pos(&text).map(|i| i..i) + let (line, col) = start.indices(); + lines.line_column_to_byte(line, col).map(|i| i..i) }; if range.is_none() { self.error("range is out of bounds"); @@ -484,13 +492,13 @@ impl<'a> Parser<'a> { let line_idx = (line_idx_in_test + comments).checked_add_signed(line_delta)?; let column_idx = if column < 0 { // Negative column index is from the back. - let range = source.line_to_range(line_idx)?; + let range = source.lines().line_to_range(line_idx)?; text[range].chars().count().saturating_add_signed(column) } else { usize::try_from(column).ok()?.checked_sub(1)? }; - source.line_column_to_byte(line_idx, column_idx) + source.lines().line_column_to_byte(line_idx, column_idx) } /// Parse a number. diff --git a/tests/src/run.rs b/tests/src/run.rs index 259349ad6..07d5a859c 100644 --- a/tests/src/run.rs +++ b/tests/src/run.rs @@ -7,11 +7,10 @@ use tiny_skia as sk; use typst::diag::{SourceDiagnostic, Warned}; use typst::html::HtmlDocument; use typst::layout::{Abs, Frame, FrameItem, PagedDocument, Transform}; -use typst::loading::LineCol; use typst::visualize::Color; use typst::{Document, World, WorldExt}; use typst_pdf::PdfOptions; -use typst_syntax::FileId; +use typst_syntax::{FileId, Lines}; use crate::collect::{Attr, FileSize, NoteKind, Test}; use crate::logger::TestResult; @@ -329,12 +328,12 @@ impl<'a> Runner<'a> { fn format_pos(&self, file: FileId, pos: usize) -> String { let res = if file != self.test.source.id() { let bytes = self.world.file(file).unwrap(); - LineCol::from_byte_pos(pos, &bytes).map(|l| l.numbers()) + let lines = Lines::from_bytes(&bytes).unwrap(); + lines.byte_to_line_column(pos).map(|(line, col)| (line + 1, col + 1)) } else { - let line = self.test.source.byte_to_line(pos).map(|l| l + 1); - let col = (self.test.source.byte_to_column(pos)) - .map(|c| self.test.pos.line + c + 1); - Option::zip(line, col) + (self.test.source.lines()) + .byte_to_line_column(pos) + .map(|(line, col)| (line + 1, col + 1)) }; let Some((line, col)) = res else { return "oob".into();