mirror of
https://github.com/typst/typst
synced 2025-08-24 19:54:14 +08:00
refactor: factor out a general Lines struct from Source
This commit is contained in:
parent
2e2f646f2a
commit
e5d8f02554
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -3112,7 +3112,6 @@ dependencies = [
|
|||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
"unscanny",
|
"unscanny",
|
||||||
"usvg",
|
"usvg",
|
||||||
"utf8_iter",
|
|
||||||
"wasmi",
|
"wasmi",
|
||||||
"xmlwriter",
|
"xmlwriter",
|
||||||
]
|
]
|
||||||
@ -3201,6 +3200,7 @@ dependencies = [
|
|||||||
name = "typst-syntax"
|
name = "typst-syntax"
|
||||||
version = "0.13.1"
|
version = "0.13.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"comemo",
|
||||||
"ecow",
|
"ecow",
|
||||||
"serde",
|
"serde",
|
||||||
"toml",
|
"toml",
|
||||||
|
@ -135,7 +135,6 @@ unicode-segmentation = "1"
|
|||||||
unscanny = "0.1"
|
unscanny = "0.1"
|
||||||
ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
|
ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
|
||||||
usvg = { version = "0.45", default-features = false, features = ["text"] }
|
usvg = { version = "0.45", default-features = false, features = ["text"] }
|
||||||
utf8_iter = "1.0.4"
|
|
||||||
walkdir = "2"
|
walkdir = "2"
|
||||||
wasmi = "0.40.0"
|
wasmi = "0.40.0"
|
||||||
web-sys = "0.3"
|
web-sys = "0.3"
|
||||||
|
@ -16,7 +16,7 @@ use typst::diag::{
|
|||||||
use typst::foundations::{Datetime, Smart};
|
use typst::foundations::{Datetime, Smart};
|
||||||
use typst::html::HtmlDocument;
|
use typst::html::HtmlDocument;
|
||||||
use typst::layout::{Frame, Page, PageRanges, PagedDocument};
|
use typst::layout::{Frame, Page, PageRanges, PagedDocument};
|
||||||
use typst::syntax::{FileId, Source, Span};
|
use typst::syntax::{FileId, Lines, Span};
|
||||||
use typst::WorldExt;
|
use typst::WorldExt;
|
||||||
use typst_pdf::{PdfOptions, PdfStandards, Timestamp};
|
use typst_pdf::{PdfOptions, PdfStandards, Timestamp};
|
||||||
|
|
||||||
@ -696,7 +696,7 @@ fn label(world: &SystemWorld, span: Span) -> Option<Label<FileId>> {
|
|||||||
impl<'a> codespan_reporting::files::Files<'a> for SystemWorld {
|
impl<'a> codespan_reporting::files::Files<'a> for SystemWorld {
|
||||||
type FileId = FileId;
|
type FileId = FileId;
|
||||||
type Name = String;
|
type Name = String;
|
||||||
type Source = Source;
|
type Source = Lines<String>;
|
||||||
|
|
||||||
fn name(&'a self, id: FileId) -> CodespanResult<Self::Name> {
|
fn name(&'a self, id: FileId) -> CodespanResult<Self::Name> {
|
||||||
let vpath = id.vpath();
|
let vpath = id.vpath();
|
||||||
|
@ -85,6 +85,6 @@ fn resolve_span(world: &SystemWorld, span: Span) -> Option<(String, u32)> {
|
|||||||
let id = span.id()?;
|
let id = span.id()?;
|
||||||
let source = world.source(id).ok()?;
|
let source = world.source(id).ok()?;
|
||||||
let range = source.range(span)?;
|
let range = source.range(span)?;
|
||||||
let line = source.byte_to_line(range.start)?;
|
let line = source.lines().byte_to_line(range.start)?;
|
||||||
Some((format!("{id:?}"), line as u32 + 1))
|
Some((format!("{id:?}"), line as u32 + 1))
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,7 @@ use ecow::{eco_format, EcoString};
|
|||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
use typst::diag::{FileError, FileResult};
|
use typst::diag::{FileError, FileResult};
|
||||||
use typst::foundations::{Bytes, Datetime, Dict, IntoValue};
|
use typst::foundations::{Bytes, Datetime, Dict, IntoValue};
|
||||||
use typst::syntax::{FileId, Source, VirtualPath};
|
use typst::syntax::{FileId, Lines, Source, VirtualPath};
|
||||||
use typst::text::{Font, FontBook};
|
use typst::text::{Font, FontBook};
|
||||||
use typst::utils::LazyHash;
|
use typst::utils::LazyHash;
|
||||||
use typst::{Library, World};
|
use typst::{Library, World};
|
||||||
@ -183,8 +183,18 @@ impl SystemWorld {
|
|||||||
|
|
||||||
/// Lookup a source file by id.
|
/// Lookup a source file by id.
|
||||||
#[track_caller]
|
#[track_caller]
|
||||||
pub fn lookup(&self, id: FileId) -> Source {
|
pub fn lookup(&self, id: FileId) -> Lines<String> {
|
||||||
self.source(id).expect("file id does not point to any source file")
|
self.slot(id, |slot| {
|
||||||
|
if let Some(source) = slot.source.get() {
|
||||||
|
let source = source.as_ref().expect("file is not valid");
|
||||||
|
source.lines()
|
||||||
|
} else if let Some(bytes) = slot.file.get() {
|
||||||
|
let bytes = bytes.as_ref().expect("file is not valid");
|
||||||
|
Lines::from_bytes(bytes.as_slice()).expect("file is not valid utf-8")
|
||||||
|
} else {
|
||||||
|
panic!("file id does not point to any source file");
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -339,6 +349,11 @@ impl<T: Clone> SlotCell<T> {
|
|||||||
self.accessed = false;
|
self.accessed = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Gets the contents of the cell.
|
||||||
|
fn get(&self) -> Option<&FileResult<T>> {
|
||||||
|
self.data.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
/// Gets the contents of the cell or initialize them.
|
/// Gets the contents of the cell or initialize them.
|
||||||
fn get_or_init(
|
fn get_or_init(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
@ -66,7 +66,6 @@ unicode-normalization = { workspace = true }
|
|||||||
unicode-segmentation = { workspace = true }
|
unicode-segmentation = { workspace = true }
|
||||||
unscanny = { workspace = true }
|
unscanny = { workspace = true }
|
||||||
usvg = { workspace = true }
|
usvg = { workspace = true }
|
||||||
utf8_iter = { workspace = true }
|
|
||||||
wasmi = { workspace = true }
|
wasmi = { workspace = true }
|
||||||
xmlwriter = { workspace = true }
|
xmlwriter = { workspace = true }
|
||||||
|
|
||||||
|
@ -9,10 +9,10 @@ use std::string::FromUtf8Error;
|
|||||||
use comemo::Tracked;
|
use comemo::Tracked;
|
||||||
use ecow::{eco_vec, EcoVec};
|
use ecow::{eco_vec, EcoVec};
|
||||||
use typst_syntax::package::{PackageSpec, PackageVersion};
|
use typst_syntax::package::{PackageSpec, PackageVersion};
|
||||||
use typst_syntax::{Span, Spanned, SyntaxError};
|
use typst_syntax::{Lines, Span, Spanned, SyntaxError};
|
||||||
|
|
||||||
use crate::engine::Engine;
|
use crate::engine::Engine;
|
||||||
use crate::loading::{Loaded, LineCol};
|
use crate::loading::{LoadSource, Loaded};
|
||||||
use crate::{World, WorldExt};
|
use crate::{World, WorldExt};
|
||||||
|
|
||||||
/// Early-return with a [`StrResult`] or [`SourceResult`].
|
/// Early-return with a [`StrResult`] or [`SourceResult`].
|
||||||
@ -569,6 +569,144 @@ impl From<PackageError> for EcoString {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Loaded {
|
||||||
|
/// Report an error, possibly in an external file.
|
||||||
|
pub fn err_in_text(
|
||||||
|
&self,
|
||||||
|
pos: impl Into<ReportPos>,
|
||||||
|
msg: impl std::fmt::Display,
|
||||||
|
error: impl std::fmt::Display,
|
||||||
|
) -> EcoVec<SourceDiagnostic> {
|
||||||
|
let lines = Lines::from_bytes(&self.bytes);
|
||||||
|
match (self.source.v, lines) {
|
||||||
|
// Only report an error in an external file,
|
||||||
|
// if it is human readable (valid utf-8).
|
||||||
|
(LoadSource::Path(file_id), Ok(lines)) => {
|
||||||
|
let pos = pos.into();
|
||||||
|
if let Some(range) = pos.range(&lines) {
|
||||||
|
let span = Span::from_range(file_id, range);
|
||||||
|
return eco_vec!(error!(span, "{msg} ({error})"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Either `ReportPos::None` was provided, or resolving the range
|
||||||
|
// from the line/column failed. If present report the possibly
|
||||||
|
// wrong line/column in the error message anyway.
|
||||||
|
let span = Span::from_range(file_id, 0..self.bytes.len());
|
||||||
|
let error = if let Some(pair) = pos.line_col(&lines) {
|
||||||
|
let (line, col) = pair.numbers();
|
||||||
|
error!(span, "{msg} ({error} at {line}:{col})")
|
||||||
|
} else {
|
||||||
|
error!(span, "{msg} ({error})")
|
||||||
|
};
|
||||||
|
eco_vec![error]
|
||||||
|
}
|
||||||
|
_ => self.err_in_bytes(pos, msg, error),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Report an error, possibly in an external file.
|
||||||
|
pub fn err_in_bytes(
|
||||||
|
&self,
|
||||||
|
pos: impl Into<ReportPos>,
|
||||||
|
msg: impl std::fmt::Display,
|
||||||
|
error: impl std::fmt::Display,
|
||||||
|
) -> EcoVec<SourceDiagnostic> {
|
||||||
|
let pos = pos.into();
|
||||||
|
let result = Lines::from_bytes(&self.bytes).ok().and_then(|l| pos.line_col(&l));
|
||||||
|
let error = if let Some(pair) = result {
|
||||||
|
let (line, col) = pair.numbers();
|
||||||
|
error!(self.source.span, "{msg} ({error} at {line}:{col})")
|
||||||
|
} else {
|
||||||
|
error!(self.source.span, "{msg} ({error})")
|
||||||
|
};
|
||||||
|
eco_vec![error]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||||
|
pub enum ReportPos {
|
||||||
|
/// Contains the range, and the 0-based line/column.
|
||||||
|
Full(std::ops::Range<usize>, LineCol),
|
||||||
|
/// Contains the range.
|
||||||
|
Range(std::ops::Range<usize>),
|
||||||
|
/// Contains the 0-based line/column.
|
||||||
|
LineCol(LineCol),
|
||||||
|
#[default]
|
||||||
|
None,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<std::ops::Range<usize>> for ReportPos {
|
||||||
|
fn from(value: std::ops::Range<usize>) -> Self {
|
||||||
|
Self::Range(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<LineCol> for ReportPos {
|
||||||
|
fn from(value: LineCol) -> Self {
|
||||||
|
Self::LineCol(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ReportPos {
|
||||||
|
fn range(&self, lines: &Lines<String>) -> Option<std::ops::Range<usize>> {
|
||||||
|
match self {
|
||||||
|
ReportPos::Full(range, _) => Some(range.clone()),
|
||||||
|
ReportPos::Range(range) => Some(range.clone()),
|
||||||
|
&ReportPos::LineCol(pair) => {
|
||||||
|
let i = lines.line_column_to_byte(pair.line, pair.col)?;
|
||||||
|
Some(i..i)
|
||||||
|
}
|
||||||
|
ReportPos::None => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line_col(&self, lines: &Lines<String>) -> Option<LineCol> {
|
||||||
|
match self {
|
||||||
|
&ReportPos::Full(_, pair) => Some(pair),
|
||||||
|
ReportPos::Range(range) => {
|
||||||
|
let (line, col) = lines.byte_to_line_column(range.start)?;
|
||||||
|
Some(LineCol::zero_based(line, col))
|
||||||
|
}
|
||||||
|
&ReportPos::LineCol(pair) => Some(pair),
|
||||||
|
ReportPos::None => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A line/column pair.
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
pub struct LineCol {
|
||||||
|
/// The 0-based line.
|
||||||
|
line: usize,
|
||||||
|
/// The 0-based column.
|
||||||
|
col: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LineCol {
|
||||||
|
/// Constructs the line/column pair from 0-based indices.
|
||||||
|
pub fn zero_based(line: usize, col: usize) -> Self {
|
||||||
|
Self { line, col }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Constructs the line/column pair from 1-based numbers.
|
||||||
|
pub fn one_based(line: usize, col: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
line: line.saturating_sub(1),
|
||||||
|
col: col.saturating_sub(1),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the 0-based line/column indices.
|
||||||
|
pub fn indices(&self) -> (usize, usize) {
|
||||||
|
(self.line, self.col)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the 1-based line/column numbers.
|
||||||
|
pub fn numbers(&self) -> (usize, usize) {
|
||||||
|
(self.line + 1, self.col + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Format a user-facing error message for an XML-like file format.
|
/// Format a user-facing error message for an XML-like file format.
|
||||||
pub fn format_xml_like_error(
|
pub fn format_xml_like_error(
|
||||||
format: &str,
|
format: &str,
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
use ecow::EcoVec;
|
use ecow::EcoVec;
|
||||||
use typst_syntax::Spanned;
|
use typst_syntax::Spanned;
|
||||||
|
|
||||||
use crate::diag::{bail, SourceDiagnostic, SourceResult};
|
use crate::diag::{bail, LineCol, ReportPos, SourceDiagnostic, SourceResult};
|
||||||
use crate::engine::Engine;
|
use crate::engine::Engine;
|
||||||
use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
|
use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
|
||||||
use crate::loading::{Loaded, DataSource, LineCol, Load, Readable, ReportPos};
|
use crate::loading::{DataSource, Load, Loaded, Readable};
|
||||||
|
|
||||||
/// Reads structured data from a CSV file.
|
/// Reads structured data from a CSV file.
|
||||||
///
|
///
|
||||||
@ -176,7 +176,9 @@ fn format_csv_error(
|
|||||||
})
|
})
|
||||||
.unwrap_or(LineCol::one_based(line, 1).into());
|
.unwrap_or(LineCol::one_based(line, 1).into());
|
||||||
match err.kind() {
|
match err.kind() {
|
||||||
::csv::ErrorKind::Utf8 { .. } => data.err_in_text(pos, msg, "file is not valid utf-8"),
|
::csv::ErrorKind::Utf8 { .. } => {
|
||||||
|
data.err_in_text(pos, msg, "file is not valid utf-8")
|
||||||
|
}
|
||||||
::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
|
::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
|
||||||
let err =
|
let err =
|
||||||
format!("found {len} instead of {expected_len} fields in line {line}");
|
format!("found {len} instead of {expected_len} fields in line {line}");
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
use ecow::eco_format;
|
use ecow::eco_format;
|
||||||
use typst_syntax::Spanned;
|
use typst_syntax::Spanned;
|
||||||
|
|
||||||
use crate::diag::{At, SourceResult};
|
use crate::diag::{At, LineCol, SourceResult};
|
||||||
use crate::engine::Engine;
|
use crate::engine::Engine;
|
||||||
use crate::foundations::{func, scope, Str, Value};
|
use crate::foundations::{func, scope, Str, Value};
|
||||||
use crate::loading::{DataSource, LineCol, Load, Readable};
|
use crate::loading::{DataSource, Load, Readable};
|
||||||
|
|
||||||
/// Reads structured data from a JSON file.
|
/// Reads structured data from a JSON file.
|
||||||
///
|
///
|
||||||
|
@ -16,9 +16,8 @@ mod xml_;
|
|||||||
mod yaml_;
|
mod yaml_;
|
||||||
|
|
||||||
use comemo::Tracked;
|
use comemo::Tracked;
|
||||||
use ecow::{eco_vec, EcoString, EcoVec};
|
use ecow::EcoString;
|
||||||
use typst_syntax::{FileId, Span, Spanned};
|
use typst_syntax::{FileId, Span, Spanned};
|
||||||
use utf8_iter::ErrorReportingUtf8Chars;
|
|
||||||
|
|
||||||
pub use self::cbor_::*;
|
pub use self::cbor_::*;
|
||||||
pub use self::csv_::*;
|
pub use self::csv_::*;
|
||||||
@ -28,7 +27,7 @@ pub use self::toml_::*;
|
|||||||
pub use self::xml_::*;
|
pub use self::xml_::*;
|
||||||
pub use self::yaml_::*;
|
pub use self::yaml_::*;
|
||||||
|
|
||||||
use crate::diag::{error, At, FileError, SourceDiagnostic, SourceResult};
|
use crate::diag::{At, FileError, SourceResult};
|
||||||
use crate::foundations::OneOrMultiple;
|
use crate::foundations::OneOrMultiple;
|
||||||
use crate::foundations::{cast, Bytes, Scope, Str};
|
use crate::foundations::{cast, Bytes, Scope, Str};
|
||||||
use crate::World;
|
use crate::World;
|
||||||
@ -129,6 +128,7 @@ pub struct Loaded {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Loaded {
|
impl Loaded {
|
||||||
|
/// FIXME: remove this?
|
||||||
pub fn dummy() -> Self {
|
pub fn dummy() -> Self {
|
||||||
Loaded::new(
|
Loaded::new(
|
||||||
typst_syntax::Spanned::new(LoadSource::Bytes, Span::detached()),
|
typst_syntax::Spanned::new(LoadSource::Bytes, Span::detached()),
|
||||||
@ -142,50 +142,16 @@ impl Loaded {
|
|||||||
|
|
||||||
pub fn as_str(&self) -> SourceResult<&str> {
|
pub fn as_str(&self) -> SourceResult<&str> {
|
||||||
self.bytes.as_str().map_err(|err| {
|
self.bytes.as_str().map_err(|err| {
|
||||||
// TODO: should the error even be reported in the file if it's possibly binary?
|
|
||||||
let start = err.valid_up_to();
|
let start = err.valid_up_to();
|
||||||
let end = start + err.error_len().unwrap_or(0);
|
let end = start + err.error_len().unwrap_or(0);
|
||||||
self.err_in_text(start..end, "failed to convert to string", FileError::from(err))
|
// always report this error in the source file.
|
||||||
|
self.err_in_bytes(
|
||||||
|
start..end,
|
||||||
|
"failed to convert to string",
|
||||||
|
FileError::from(err),
|
||||||
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Report an error, possibly in an external file.
|
|
||||||
pub fn err_in_text(
|
|
||||||
&self,
|
|
||||||
pos: impl Into<ReportPos>,
|
|
||||||
msg: impl std::fmt::Display,
|
|
||||||
error: impl std::fmt::Display,
|
|
||||||
) -> EcoVec<SourceDiagnostic> {
|
|
||||||
let pos = pos.into();
|
|
||||||
let error = match self.source.v {
|
|
||||||
LoadSource::Path(file_id) => {
|
|
||||||
if let Some(range) = pos.range(self.bytes.as_slice()) {
|
|
||||||
let span = Span::from_range(file_id, range);
|
|
||||||
return eco_vec!(error!(span, "{msg} ({error})"));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Either there was no range provided, or resolving the range
|
|
||||||
// from the line/column failed. If present report the possibly
|
|
||||||
// wrong line/column anyway.
|
|
||||||
let span = Span::from_range(file_id, 0..self.bytes.len());
|
|
||||||
if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
|
|
||||||
let (line, col) = pair.numbers();
|
|
||||||
error!(span, "{msg} ({error} at {line}:{col})")
|
|
||||||
} else {
|
|
||||||
error!(span, "{msg} ({error})")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LoadSource::Bytes => {
|
|
||||||
if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
|
|
||||||
let (line, col) = pair.numbers();
|
|
||||||
error!(self.source.span, "{msg} ({error} at {line}:{col})")
|
|
||||||
} else {
|
|
||||||
error!(self.source.span, "{msg} ({error})")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
eco_vec![error]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A loaded [`DataSource`].
|
/// A loaded [`DataSource`].
|
||||||
@ -195,142 +161,6 @@ pub enum LoadSource {
|
|||||||
Bytes,
|
Bytes,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
|
||||||
pub enum ReportPos {
|
|
||||||
/// Contains the range, and the 0-based line/column.
|
|
||||||
Full(std::ops::Range<usize>, LineCol),
|
|
||||||
/// Contains the range.
|
|
||||||
Range(std::ops::Range<usize>),
|
|
||||||
/// Contains the 0-based line/column.
|
|
||||||
LineCol(LineCol),
|
|
||||||
#[default]
|
|
||||||
None,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<std::ops::Range<usize>> for ReportPos {
|
|
||||||
fn from(value: std::ops::Range<usize>) -> Self {
|
|
||||||
Self::Range(value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<LineCol> for ReportPos {
|
|
||||||
fn from(value: LineCol) -> Self {
|
|
||||||
Self::LineCol(value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ReportPos {
|
|
||||||
fn range(&self, bytes: &[u8]) -> Option<std::ops::Range<usize>> {
|
|
||||||
match self {
|
|
||||||
ReportPos::Full(range, _) => Some(range.clone()),
|
|
||||||
ReportPos::Range(range) => Some(range.clone()),
|
|
||||||
&ReportPos::LineCol(pair) => pair.byte_pos(bytes).map(|i| i..i),
|
|
||||||
ReportPos::None => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn line_col(&self, bytes: &[u8]) -> Option<LineCol> {
|
|
||||||
match self {
|
|
||||||
&ReportPos::Full(_, pair) => Some(pair),
|
|
||||||
ReportPos::Range(range) => LineCol::from_byte_pos(range.start, bytes),
|
|
||||||
&ReportPos::LineCol(pair) => Some(pair),
|
|
||||||
ReportPos::None => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug)]
|
|
||||||
pub struct LineCol {
|
|
||||||
/// The 0-based line.
|
|
||||||
line: usize,
|
|
||||||
/// The 0-based column.
|
|
||||||
col: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl LineCol {
|
|
||||||
/// Constructs the line/column pair from 0-based indices.
|
|
||||||
pub fn zero_based(line: usize, col: usize) -> Self {
|
|
||||||
Self { line, col }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Constructs the line/column pair from 1-based numbers.
|
|
||||||
pub fn one_based(line: usize, col: usize) -> Self {
|
|
||||||
Self {
|
|
||||||
line: line.saturating_sub(1),
|
|
||||||
col: col.saturating_sub(1),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn from_byte_pos(pos: usize, bytes: &[u8]) -> Option<Self> {
|
|
||||||
let bytes = &bytes[..pos];
|
|
||||||
let mut line = 0;
|
|
||||||
let line_start = memchr::memchr_iter(b'\n', bytes)
|
|
||||||
.inspect(|_| line += 1)
|
|
||||||
.last()
|
|
||||||
.map(|i| i + 1)
|
|
||||||
.unwrap_or(bytes.len());
|
|
||||||
|
|
||||||
// Try to compute a column even if the string isn't valid utf-8.
|
|
||||||
let col = ErrorReportingUtf8Chars::new(&bytes[line_start..]).count();
|
|
||||||
Some(LineCol::zero_based(line, col))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn byte_pos(&self, bytes: &[u8]) -> Option<usize> {
|
|
||||||
let line_offset = if let Some(idx) = self.line.checked_sub(1) {
|
|
||||||
memchr::memchr_iter(b'\n', bytes).nth(idx).map(|i| i + 1)?
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
};
|
|
||||||
|
|
||||||
let col_offset = col_offset(line_offset, self.col, bytes)?;
|
|
||||||
let pos = line_offset + col_offset;
|
|
||||||
Some(pos)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn byte_range(
|
|
||||||
range: std::ops::Range<Self>,
|
|
||||||
bytes: &[u8],
|
|
||||||
) -> Option<std::ops::Range<usize>> {
|
|
||||||
let mut line_iter = memchr::memchr_iter(b'\n', bytes);
|
|
||||||
let start_line_offset = if let Some(idx) = range.start.line.checked_sub(1) {
|
|
||||||
line_iter.nth(idx).map(|i| i + 1)?
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
};
|
|
||||||
let line_delta = range.end.line - range.start.line;
|
|
||||||
let end_line_offset = if let Some(idx) = line_delta.checked_sub(1) {
|
|
||||||
line_iter.nth(idx).map(|i| i + 1)?
|
|
||||||
} else {
|
|
||||||
start_line_offset
|
|
||||||
};
|
|
||||||
|
|
||||||
let start_col_offset = col_offset(start_line_offset, range.start.col, bytes)?;
|
|
||||||
let end_col_offset = col_offset(end_line_offset, range.end.col, bytes)?;
|
|
||||||
|
|
||||||
let start = start_line_offset + start_col_offset;
|
|
||||||
let end = end_line_offset + end_col_offset;
|
|
||||||
Some(start..end)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn numbers(&self) -> (usize, usize) {
|
|
||||||
(self.line + 1, self.col + 1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn col_offset(line_offset: usize, col: usize, bytes: &[u8]) -> Option<usize> {
|
|
||||||
let line = &bytes[line_offset..];
|
|
||||||
// TODO: streaming-utf8 decoding ignore invalid characters
|
|
||||||
// might neeed to update error reporting too (use utf8_iter)
|
|
||||||
if let Some(idx) = col.checked_sub(1) {
|
|
||||||
// Try to compute position even if the string isn't valid utf-8.
|
|
||||||
let mut iter = ErrorReportingUtf8Chars::new(line);
|
|
||||||
_ = iter.nth(idx)?;
|
|
||||||
Some(line.len() - iter.as_slice().len())
|
|
||||||
} else {
|
|
||||||
Some(0)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A value that can be read from a file.
|
/// A value that can be read from a file.
|
||||||
#[derive(Debug, Clone, PartialEq, Hash)]
|
#[derive(Debug, Clone, PartialEq, Hash)]
|
||||||
pub enum Readable {
|
pub enum Readable {
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
use ecow::{eco_format, EcoVec};
|
use ecow::{eco_format, EcoVec};
|
||||||
use typst_syntax::Spanned;
|
use typst_syntax::Spanned;
|
||||||
|
|
||||||
use crate::diag::{At, SourceDiagnostic, SourceResult};
|
use crate::diag::{At, ReportPos, SourceDiagnostic, SourceResult};
|
||||||
use crate::engine::Engine;
|
use crate::engine::Engine;
|
||||||
use crate::foundations::{func, scope, Str, Value};
|
use crate::foundations::{func, scope, Str, Value};
|
||||||
use crate::loading::{Loaded, DataSource, Load, Readable, ReportPos};
|
use crate::loading::{DataSource, Load, Loaded, Readable};
|
||||||
|
|
||||||
/// Reads structured data from a TOML file.
|
/// Reads structured data from a TOML file.
|
||||||
///
|
///
|
||||||
@ -69,7 +69,10 @@ impl toml {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Format the user-facing TOML error message.
|
/// Format the user-facing TOML error message.
|
||||||
fn format_toml_error(data: &Loaded, error: ::toml::de::Error) -> EcoVec<SourceDiagnostic> {
|
fn format_toml_error(
|
||||||
|
data: &Loaded,
|
||||||
|
error: ::toml::de::Error,
|
||||||
|
) -> EcoVec<SourceDiagnostic> {
|
||||||
let pos = error.span().map(ReportPos::Range).unwrap_or_default();
|
let pos = error.span().map(ReportPos::Range).unwrap_or_default();
|
||||||
data.err_in_text(pos, "failed to parse TOML", error.message())
|
data.err_in_text(pos, "failed to parse TOML", error.message())
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@ use typst_syntax::Spanned;
|
|||||||
use crate::diag::{format_xml_like_error, SourceDiagnostic, SourceResult};
|
use crate::diag::{format_xml_like_error, SourceDiagnostic, SourceResult};
|
||||||
use crate::engine::Engine;
|
use crate::engine::Engine;
|
||||||
use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
|
use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
|
||||||
use crate::loading::{Loaded, DataSource, Load, Readable};
|
use crate::loading::{DataSource, Load, Loaded, Readable};
|
||||||
|
|
||||||
/// Reads structured data from an XML file.
|
/// Reads structured data from an XML file.
|
||||||
///
|
///
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
use ecow::{eco_format, EcoVec};
|
use ecow::{eco_format, EcoVec};
|
||||||
use typst_syntax::Spanned;
|
use typst_syntax::Spanned;
|
||||||
|
|
||||||
use crate::diag::{At, SourceDiagnostic, SourceResult};
|
use crate::diag::{At, LineCol, ReportPos, SourceDiagnostic, SourceResult};
|
||||||
use crate::engine::Engine;
|
use crate::engine::Engine;
|
||||||
use crate::foundations::{func, scope, Str, Value};
|
use crate::foundations::{func, scope, Str, Value};
|
||||||
use crate::loading::{Loaded, DataSource, LineCol, Load, Readable, ReportPos};
|
use crate::loading::{DataSource, Load, Loaded, Readable};
|
||||||
|
|
||||||
/// Reads structured data from a YAML file.
|
/// Reads structured data from a YAML file.
|
||||||
///
|
///
|
||||||
|
@ -20,7 +20,8 @@ use typst_syntax::{Span, Spanned};
|
|||||||
use typst_utils::{Get, ManuallyHash, NonZeroExt, PicoStr};
|
use typst_utils::{Get, ManuallyHash, NonZeroExt, PicoStr};
|
||||||
|
|
||||||
use crate::diag::{
|
use crate::diag::{
|
||||||
bail, error, At, HintedStrResult, SourceDiagnostic, SourceResult, StrResult,
|
bail, error, At, HintedStrResult, ReportPos, SourceDiagnostic, SourceResult,
|
||||||
|
StrResult,
|
||||||
};
|
};
|
||||||
use crate::engine::{Engine, Sink};
|
use crate::engine::{Engine, Sink};
|
||||||
use crate::foundations::{
|
use crate::foundations::{
|
||||||
@ -33,7 +34,7 @@ use crate::layout::{
|
|||||||
BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem,
|
BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem,
|
||||||
Sides, Sizing, TrackSizings,
|
Sides, Sizing, TrackSizings,
|
||||||
};
|
};
|
||||||
use crate::loading::{format_yaml_error, Loaded, DataSource, Load, LoadSource, ReportPos};
|
use crate::loading::{format_yaml_error, DataSource, Load, LoadSource, Loaded};
|
||||||
use crate::model::{
|
use crate::model::{
|
||||||
CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem,
|
CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem,
|
||||||
Url,
|
Url,
|
||||||
@ -480,7 +481,9 @@ impl CslStyle {
|
|||||||
typst_utils::hash128(&(TypeId::of::<Bytes>(), data)),
|
typst_utils::hash128(&(TypeId::of::<Bytes>(), data)),
|
||||||
)))
|
)))
|
||||||
})
|
})
|
||||||
.map_err(|err| data.err_in_text(ReportPos::None, "failed to load CSL style", err))
|
.map_err(|err| {
|
||||||
|
data.err_in_text(ReportPos::None, "failed to load CSL style", err)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the underlying independent style.
|
/// Get the underlying independent style.
|
||||||
|
@ -11,7 +11,7 @@ use typst_utils::ManuallyHash;
|
|||||||
use unicode_segmentation::UnicodeSegmentation;
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
|
|
||||||
use super::Lang;
|
use super::Lang;
|
||||||
use crate::diag::{SourceDiagnostic, SourceResult};
|
use crate::diag::{LineCol, ReportPos, SourceDiagnostic, SourceResult};
|
||||||
use crate::engine::Engine;
|
use crate::engine::Engine;
|
||||||
use crate::foundations::{
|
use crate::foundations::{
|
||||||
cast, elem, scope, Content, Derived, NativeElement, OneOrMultiple, Packed, PlainText,
|
cast, elem, scope, Content, Derived, NativeElement, OneOrMultiple, Packed, PlainText,
|
||||||
@ -19,7 +19,7 @@ use crate::foundations::{
|
|||||||
};
|
};
|
||||||
use crate::html::{tag, HtmlElem};
|
use crate::html::{tag, HtmlElem};
|
||||||
use crate::layout::{BlockBody, BlockElem, Em, HAlignment};
|
use crate::layout::{BlockBody, BlockElem, Em, HAlignment};
|
||||||
use crate::loading::{DataSource, LineCol, Load, Loaded, ReportPos};
|
use crate::loading::{DataSource, Load, Loaded};
|
||||||
use crate::model::{Figurable, ParElem};
|
use crate::model::{Figurable, ParElem};
|
||||||
use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize};
|
use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize};
|
||||||
use crate::visualize::Color;
|
use crate::visualize::Color;
|
||||||
|
@ -15,6 +15,7 @@ readme = { workspace = true }
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
typst-timing = { workspace = true }
|
typst-timing = { workspace = true }
|
||||||
typst-utils = { workspace = true }
|
typst-utils = { workspace = true }
|
||||||
|
comemo = { workspace = true }
|
||||||
ecow = { workspace = true }
|
ecow = { workspace = true }
|
||||||
serde = { workspace = true }
|
serde = { workspace = true }
|
||||||
toml = { workspace = true }
|
toml = { workspace = true }
|
||||||
|
@ -7,6 +7,7 @@ mod file;
|
|||||||
mod highlight;
|
mod highlight;
|
||||||
mod kind;
|
mod kind;
|
||||||
mod lexer;
|
mod lexer;
|
||||||
|
mod lines;
|
||||||
mod node;
|
mod node;
|
||||||
mod parser;
|
mod parser;
|
||||||
mod path;
|
mod path;
|
||||||
@ -22,6 +23,7 @@ pub use self::lexer::{
|
|||||||
is_id_continue, is_id_start, is_ident, is_newline, is_valid_label_literal_id,
|
is_id_continue, is_id_start, is_ident, is_newline, is_valid_label_literal_id,
|
||||||
link_prefix, split_newlines,
|
link_prefix, split_newlines,
|
||||||
};
|
};
|
||||||
|
pub use self::lines::Lines;
|
||||||
pub use self::node::{LinkedChildren, LinkedNode, Side, SyntaxError, SyntaxNode};
|
pub use self::node::{LinkedChildren, LinkedNode, Side, SyntaxError, SyntaxNode};
|
||||||
pub use self::parser::{parse, parse_code, parse_math};
|
pub use self::parser::{parse, parse_code, parse_math};
|
||||||
pub use self::path::VirtualPath;
|
pub use self::path::VirtualPath;
|
||||||
|
407
crates/typst-syntax/src/lines.rs
Normal file
407
crates/typst-syntax/src/lines.rs
Normal file
@ -0,0 +1,407 @@
|
|||||||
|
use std::hash::{Hash, Hasher};
|
||||||
|
use std::iter::zip;
|
||||||
|
use std::ops::Range;
|
||||||
|
use std::str::Utf8Error;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use crate::is_newline;
|
||||||
|
|
||||||
|
/// Metadata about lines.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Lines<S>(Arc<Repr<S>>);
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct Repr<S> {
|
||||||
|
lines: Vec<Line>,
|
||||||
|
str: S,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Metadata about a line.
|
||||||
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||||
|
pub struct Line {
|
||||||
|
/// The UTF-8 byte offset where the line starts.
|
||||||
|
byte_idx: usize,
|
||||||
|
/// The UTF-16 codepoint offset where the line starts.
|
||||||
|
utf16_idx: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: AsRef<str>> Lines<S> {
|
||||||
|
/// TODO: memoize this?
|
||||||
|
pub fn new(str: S) -> Self {
|
||||||
|
let lines = lines(str.as_ref());
|
||||||
|
Lines(Arc::new(Repr { lines, str }))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn text(&self) -> &str {
|
||||||
|
self.0.str.as_ref()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the length of the file in UTF-8 encoded bytes.
|
||||||
|
pub fn len_bytes(&self) -> usize {
|
||||||
|
self.0.str.as_ref().len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the length of the file in UTF-16 code units.
|
||||||
|
pub fn len_utf16(&self) -> usize {
|
||||||
|
let last = self.0.lines.last().unwrap();
|
||||||
|
last.utf16_idx + len_utf16(&self.text()[last.byte_idx..])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the length of the file in lines.
|
||||||
|
pub fn len_lines(&self) -> usize {
|
||||||
|
self.0.lines.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the index of the UTF-16 code unit at the byte index.
|
||||||
|
pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
|
||||||
|
let line_idx = self.byte_to_line(byte_idx)?;
|
||||||
|
let line = self.0.lines.get(line_idx)?;
|
||||||
|
let head = self.text().get(line.byte_idx..byte_idx)?;
|
||||||
|
Some(line.utf16_idx + len_utf16(head))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the index of the line that contains the given byte index.
|
||||||
|
pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
|
||||||
|
(byte_idx <= self.text().len()).then(|| {
|
||||||
|
match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
|
||||||
|
Ok(i) => i,
|
||||||
|
Err(i) => i - 1,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the index of the column at the byte index.
|
||||||
|
///
|
||||||
|
/// The column is defined as the number of characters in the line before the
|
||||||
|
/// byte index.
|
||||||
|
pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
|
||||||
|
let line = self.byte_to_line(byte_idx)?;
|
||||||
|
let start = self.line_to_byte(line)?;
|
||||||
|
let head = self.text().get(start..byte_idx)?;
|
||||||
|
Some(head.chars().count())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the index of the line and column at the byte index.
|
||||||
|
pub fn byte_to_line_column(&self, byte_idx: usize) -> Option<(usize, usize)> {
|
||||||
|
let line = self.byte_to_line(byte_idx)?;
|
||||||
|
let start = self.line_to_byte(line)?;
|
||||||
|
let head = self.text().get(start..byte_idx)?;
|
||||||
|
let col = head.chars().count();
|
||||||
|
Some((line, col))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the byte index at the UTF-16 code unit.
|
||||||
|
pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
|
||||||
|
let line = self.0.lines.get(
|
||||||
|
match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
|
||||||
|
Ok(i) => i,
|
||||||
|
Err(i) => i - 1,
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let text = self.text();
|
||||||
|
let mut k = line.utf16_idx;
|
||||||
|
for (i, c) in text[line.byte_idx..].char_indices() {
|
||||||
|
if k >= utf16_idx {
|
||||||
|
return Some(line.byte_idx + i);
|
||||||
|
}
|
||||||
|
k += c.len_utf16();
|
||||||
|
}
|
||||||
|
|
||||||
|
(k == utf16_idx).then_some(text.len())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the byte position at which the given line starts.
|
||||||
|
pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
|
||||||
|
self.0.lines.get(line_idx).map(|line| line.byte_idx)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the range which encloses the given line.
|
||||||
|
pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
|
||||||
|
let start = self.line_to_byte(line_idx)?;
|
||||||
|
let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text().len());
|
||||||
|
Some(start..end)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the byte index of the given (line, column) pair.
|
||||||
|
///
|
||||||
|
/// The column defines the number of characters to go beyond the start of
|
||||||
|
/// the line.
|
||||||
|
pub fn line_column_to_byte(
|
||||||
|
&self,
|
||||||
|
line_idx: usize,
|
||||||
|
column_idx: usize,
|
||||||
|
) -> Option<usize> {
|
||||||
|
let range = self.line_to_range(line_idx)?;
|
||||||
|
let line = self.text().get(range.clone())?;
|
||||||
|
let mut chars = line.chars();
|
||||||
|
for _ in 0..column_idx {
|
||||||
|
chars.next();
|
||||||
|
}
|
||||||
|
Some(range.start + (line.len() - chars.as_str().len()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Lines<String> {
|
||||||
|
/// Tries to convert the bytes
|
||||||
|
#[comemo::memoize]
|
||||||
|
pub fn from_bytes(bytes: &[u8]) -> Result<Lines<String>, Utf8Error> {
|
||||||
|
let str = std::str::from_utf8(bytes)?;
|
||||||
|
Ok(Lines::new(str.to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fully replace the source text.
|
||||||
|
///
|
||||||
|
/// This performs a naive (suffix/prefix-based) diff of the old and new text
|
||||||
|
/// to produce the smallest single edit that transforms old into new and
|
||||||
|
/// then calls [`edit`](Self::edit) with it.
|
||||||
|
///
|
||||||
|
/// Returns whether any changes were made.
|
||||||
|
pub fn replace(&mut self, new: &str) -> bool {
|
||||||
|
let Some((prefix, suffix)) = self.replacement_range(new) else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
let old = self.text();
|
||||||
|
let replace = prefix..old.len() - suffix;
|
||||||
|
let with = &new[prefix..new.len() - suffix];
|
||||||
|
self.edit(replace, with);
|
||||||
|
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the common prefix and suffix lengths.
|
||||||
|
/// Returns [`None`] if the old and new strings are equal.
|
||||||
|
pub fn replacement_range(&self, new: &str) -> Option<(usize, usize)> {
|
||||||
|
let old = self.text();
|
||||||
|
|
||||||
|
let mut prefix =
|
||||||
|
zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count();
|
||||||
|
|
||||||
|
if prefix == old.len() && prefix == new.len() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
|
||||||
|
prefix -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev())
|
||||||
|
.take_while(|(x, y)| x == y)
|
||||||
|
.count();
|
||||||
|
|
||||||
|
while !old.is_char_boundary(old.len() - suffix)
|
||||||
|
|| !new.is_char_boundary(new.len() - suffix)
|
||||||
|
{
|
||||||
|
suffix += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some((prefix, suffix))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Edit the source file by replacing the given range.
|
||||||
|
///
|
||||||
|
/// Returns the range in the new source that was ultimately reparsed.
|
||||||
|
///
|
||||||
|
/// The method panics if the `replace` range is out of bounds.
|
||||||
|
#[track_caller]
|
||||||
|
pub fn edit(&mut self, replace: Range<usize>, with: &str) {
|
||||||
|
let start_byte = replace.start;
|
||||||
|
let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
|
||||||
|
let line = self.byte_to_line(start_byte).unwrap();
|
||||||
|
|
||||||
|
let inner = Arc::make_mut(&mut self.0);
|
||||||
|
|
||||||
|
// Update the text itself.
|
||||||
|
inner.str.replace_range(replace.clone(), with);
|
||||||
|
|
||||||
|
// Remove invalidated line starts.
|
||||||
|
inner.lines.truncate(line + 1);
|
||||||
|
|
||||||
|
// Handle adjoining of \r and \n.
|
||||||
|
if inner.str[..start_byte].ends_with('\r') && with.starts_with('\n') {
|
||||||
|
inner.lines.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recalculate the line starts after the edit.
|
||||||
|
inner
|
||||||
|
.lines
|
||||||
|
.extend(lines_from(start_byte, start_utf16, &inner.str[start_byte..]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a line vector.
|
||||||
|
fn lines(text: &str) -> Vec<Line> {
|
||||||
|
std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
|
||||||
|
.chain(lines_from(0, 0, text))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute a line iterator from an offset.
|
||||||
|
fn lines_from(
|
||||||
|
byte_offset: usize,
|
||||||
|
utf16_offset: usize,
|
||||||
|
text: &str,
|
||||||
|
) -> impl Iterator<Item = Line> + '_ {
|
||||||
|
let mut s = unscanny::Scanner::new(text);
|
||||||
|
let mut utf16_idx = utf16_offset;
|
||||||
|
|
||||||
|
std::iter::from_fn(move || {
|
||||||
|
s.eat_until(|c: char| {
|
||||||
|
utf16_idx += c.len_utf16();
|
||||||
|
is_newline(c)
|
||||||
|
});
|
||||||
|
|
||||||
|
if s.done() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.eat() == Some('\r') && s.eat_if('\n') {
|
||||||
|
utf16_idx += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The number of code units this string would use if it was encoded in
|
||||||
|
/// UTF16. This runs in linear time.
|
||||||
|
fn len_utf16(string: &str) -> usize {
|
||||||
|
string.chars().map(char::len_utf16).sum()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_source_file_new() {
|
||||||
|
let lines = Lines::new(TEST);
|
||||||
|
assert_eq!(
|
||||||
|
lines.0.lines,
|
||||||
|
[
|
||||||
|
Line { byte_idx: 0, utf16_idx: 0 },
|
||||||
|
Line { byte_idx: 7, utf16_idx: 6 },
|
||||||
|
Line { byte_idx: 15, utf16_idx: 12 },
|
||||||
|
Line { byte_idx: 18, utf16_idx: 15 },
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_source_file_pos_to_line() {
|
||||||
|
let lines = Lines::new(TEST);
|
||||||
|
assert_eq!(lines.byte_to_line(0), Some(0));
|
||||||
|
assert_eq!(lines.byte_to_line(2), Some(0));
|
||||||
|
assert_eq!(lines.byte_to_line(6), Some(0));
|
||||||
|
assert_eq!(lines.byte_to_line(7), Some(1));
|
||||||
|
assert_eq!(lines.byte_to_line(8), Some(1));
|
||||||
|
assert_eq!(lines.byte_to_line(12), Some(1));
|
||||||
|
assert_eq!(lines.byte_to_line(21), Some(3));
|
||||||
|
assert_eq!(lines.byte_to_line(22), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_source_file_pos_to_column() {
|
||||||
|
let lines = Lines::new(TEST);
|
||||||
|
assert_eq!(lines.byte_to_column(0), Some(0));
|
||||||
|
assert_eq!(lines.byte_to_column(2), Some(1));
|
||||||
|
assert_eq!(lines.byte_to_column(6), Some(5));
|
||||||
|
assert_eq!(lines.byte_to_column(7), Some(0));
|
||||||
|
assert_eq!(lines.byte_to_column(8), Some(1));
|
||||||
|
assert_eq!(lines.byte_to_column(12), Some(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_source_file_utf16() {
|
||||||
|
#[track_caller]
|
||||||
|
fn roundtrip(lines: &Lines<&str>, byte_idx: usize, utf16_idx: usize) {
|
||||||
|
let middle = lines.byte_to_utf16(byte_idx).unwrap();
|
||||||
|
let result = lines.utf16_to_byte(middle).unwrap();
|
||||||
|
assert_eq!(middle, utf16_idx);
|
||||||
|
assert_eq!(result, byte_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
let lines = Lines::new(TEST);
|
||||||
|
roundtrip(&lines, 0, 0);
|
||||||
|
roundtrip(&lines, 2, 1);
|
||||||
|
roundtrip(&lines, 3, 2);
|
||||||
|
roundtrip(&lines, 8, 7);
|
||||||
|
roundtrip(&lines, 12, 9);
|
||||||
|
roundtrip(&lines, 21, 18);
|
||||||
|
assert_eq!(lines.byte_to_utf16(22), None);
|
||||||
|
assert_eq!(lines.utf16_to_byte(19), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_source_file_roundtrip() {
|
||||||
|
#[track_caller]
|
||||||
|
fn roundtrip(lines: &Lines<&str>, byte_idx: usize) {
|
||||||
|
let line = lines.byte_to_line(byte_idx).unwrap();
|
||||||
|
let column = lines.byte_to_column(byte_idx).unwrap();
|
||||||
|
let result = lines.line_column_to_byte(line, column).unwrap();
|
||||||
|
assert_eq!(result, byte_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
let lines = Lines::new(TEST);
|
||||||
|
roundtrip(&lines, 0);
|
||||||
|
roundtrip(&lines, 7);
|
||||||
|
roundtrip(&lines, 12);
|
||||||
|
roundtrip(&lines, 21);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_source_file_edit() {
|
||||||
|
// This tests only the non-parser parts. The reparsing itself is
|
||||||
|
// tested separately.
|
||||||
|
#[track_caller]
|
||||||
|
fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
|
||||||
|
let reference = Lines::new(after);
|
||||||
|
|
||||||
|
let mut edited = Lines::new(prev.to_string());
|
||||||
|
edited.edit(range.clone(), with);
|
||||||
|
assert_eq!(edited.text(), reference.text());
|
||||||
|
assert_eq!(edited.0.lines, reference.0.lines);
|
||||||
|
|
||||||
|
let mut replaced = Lines::new(prev.to_string());
|
||||||
|
replaced.replace(&{
|
||||||
|
let mut s = prev.to_string();
|
||||||
|
s.replace_range(range, with);
|
||||||
|
s
|
||||||
|
});
|
||||||
|
assert_eq!(replaced.text(), reference.text());
|
||||||
|
assert_eq!(replaced.0.lines, reference.0.lines);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test inserting at the beginning.
|
||||||
|
test("abc\n", 0..0, "hi\n", "hi\nabc\n");
|
||||||
|
test("\nabc", 0..0, "hi\r", "hi\r\nabc");
|
||||||
|
|
||||||
|
// Test editing in the middle.
|
||||||
|
test(TEST, 4..16, "❌", "ä\tc❌i\rjkl");
|
||||||
|
|
||||||
|
// Test appending.
|
||||||
|
test("abc\ndef", 7..7, "hi", "abc\ndefhi");
|
||||||
|
test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
|
||||||
|
|
||||||
|
// Test appending with adjoining \r and \n.
|
||||||
|
test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
|
||||||
|
|
||||||
|
// Test removing everything.
|
||||||
|
test(TEST, 0..21, "", "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: Hash> Hash for Lines<S> {
|
||||||
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||||
|
self.0.str.hash(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: AsRef<str>> AsRef<str> for Lines<S> {
|
||||||
|
fn as_ref(&self) -> &str {
|
||||||
|
self.0.str.as_ref()
|
||||||
|
}
|
||||||
|
}
|
@ -2,14 +2,14 @@
|
|||||||
|
|
||||||
use std::fmt::{self, Debug, Formatter};
|
use std::fmt::{self, Debug, Formatter};
|
||||||
use std::hash::{Hash, Hasher};
|
use std::hash::{Hash, Hasher};
|
||||||
use std::iter::zip;
|
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use typst_utils::LazyHash;
|
use typst_utils::LazyHash;
|
||||||
|
|
||||||
|
use crate::lines::Lines;
|
||||||
use crate::reparser::reparse;
|
use crate::reparser::reparse;
|
||||||
use crate::{is_newline, parse, FileId, LinkedNode, Span, SyntaxNode, VirtualPath};
|
use crate::{parse, FileId, LinkedNode, Span, SyntaxNode, VirtualPath};
|
||||||
|
|
||||||
/// A source file.
|
/// A source file.
|
||||||
///
|
///
|
||||||
@ -24,9 +24,8 @@ pub struct Source(Arc<Repr>);
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct Repr {
|
struct Repr {
|
||||||
id: FileId,
|
id: FileId,
|
||||||
text: LazyHash<String>,
|
|
||||||
root: LazyHash<SyntaxNode>,
|
root: LazyHash<SyntaxNode>,
|
||||||
lines: Vec<Line>,
|
lines: LazyHash<Lines<String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Source {
|
impl Source {
|
||||||
@ -37,8 +36,7 @@ impl Source {
|
|||||||
root.numberize(id, Span::FULL).unwrap();
|
root.numberize(id, Span::FULL).unwrap();
|
||||||
Self(Arc::new(Repr {
|
Self(Arc::new(Repr {
|
||||||
id,
|
id,
|
||||||
lines: lines(&text),
|
lines: LazyHash::new(Lines::new(text)),
|
||||||
text: LazyHash::new(text),
|
|
||||||
root: LazyHash::new(root),
|
root: LazyHash::new(root),
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
@ -58,9 +56,14 @@ impl Source {
|
|||||||
self.0.id
|
self.0.id
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The whole source as a string slice.
|
||||||
|
pub fn lines(&self) -> Lines<String> {
|
||||||
|
Lines::clone(&self.0.lines)
|
||||||
|
}
|
||||||
|
|
||||||
/// The whole source as a string slice.
|
/// The whole source as a string slice.
|
||||||
pub fn text(&self) -> &str {
|
pub fn text(&self) -> &str {
|
||||||
&self.0.text
|
&self.0.lines.text()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Slice out the part of the source code enclosed by the range.
|
/// Slice out the part of the source code enclosed by the range.
|
||||||
@ -77,29 +80,12 @@ impl Source {
|
|||||||
/// Returns the range in the new source that was ultimately reparsed.
|
/// Returns the range in the new source that was ultimately reparsed.
|
||||||
pub fn replace(&mut self, new: &str) -> Range<usize> {
|
pub fn replace(&mut self, new: &str) -> Range<usize> {
|
||||||
let _scope = typst_timing::TimingScope::new("replace source");
|
let _scope = typst_timing::TimingScope::new("replace source");
|
||||||
let old = self.text();
|
|
||||||
|
|
||||||
let mut prefix =
|
let Some((prefix, suffix)) = self.0.lines.replacement_range(new) else {
|
||||||
zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count();
|
|
||||||
|
|
||||||
if prefix == old.len() && prefix == new.len() {
|
|
||||||
return 0..0;
|
return 0..0;
|
||||||
}
|
};
|
||||||
|
|
||||||
while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
|
|
||||||
prefix -= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev())
|
|
||||||
.take_while(|(x, y)| x == y)
|
|
||||||
.count();
|
|
||||||
|
|
||||||
while !old.is_char_boundary(old.len() - suffix)
|
|
||||||
|| !new.is_char_boundary(new.len() - suffix)
|
|
||||||
{
|
|
||||||
suffix += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
let old = self.text();
|
||||||
let replace = prefix..old.len() - suffix;
|
let replace = prefix..old.len() - suffix;
|
||||||
let with = &new[prefix..new.len() - suffix];
|
let with = &new[prefix..new.len() - suffix];
|
||||||
self.edit(replace, with)
|
self.edit(replace, with)
|
||||||
@ -112,48 +98,28 @@ impl Source {
|
|||||||
/// The method panics if the `replace` range is out of bounds.
|
/// The method panics if the `replace` range is out of bounds.
|
||||||
#[track_caller]
|
#[track_caller]
|
||||||
pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> {
|
pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> {
|
||||||
let start_byte = replace.start;
|
|
||||||
let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
|
|
||||||
let line = self.byte_to_line(start_byte).unwrap();
|
|
||||||
|
|
||||||
let inner = Arc::make_mut(&mut self.0);
|
let inner = Arc::make_mut(&mut self.0);
|
||||||
|
|
||||||
// Update the text itself.
|
// Update the text and lines.
|
||||||
inner.text.replace_range(replace.clone(), with);
|
inner.lines.edit(replace.clone(), with);
|
||||||
|
|
||||||
// Remove invalidated line starts.
|
|
||||||
inner.lines.truncate(line + 1);
|
|
||||||
|
|
||||||
// Handle adjoining of \r and \n.
|
|
||||||
if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') {
|
|
||||||
inner.lines.pop();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recalculate the line starts after the edit.
|
|
||||||
inner.lines.extend(lines_from(
|
|
||||||
start_byte,
|
|
||||||
start_utf16,
|
|
||||||
&inner.text[start_byte..],
|
|
||||||
));
|
|
||||||
|
|
||||||
// Incrementally reparse the replaced range.
|
// Incrementally reparse the replaced range.
|
||||||
reparse(&mut inner.root, &inner.text, replace, with.len())
|
reparse(&mut inner.root, inner.lines.text(), replace, with.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the length of the file in UTF-8 encoded bytes.
|
/// Get the length of the file in UTF-8 encoded bytes.
|
||||||
pub fn len_bytes(&self) -> usize {
|
pub fn len_bytes(&self) -> usize {
|
||||||
self.text().len()
|
self.0.lines.len_bytes()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the length of the file in UTF-16 code units.
|
/// Get the length of the file in UTF-16 code units.
|
||||||
pub fn len_utf16(&self) -> usize {
|
pub fn len_utf16(&self) -> usize {
|
||||||
let last = self.0.lines.last().unwrap();
|
self.0.lines.len_utf16()
|
||||||
last.utf16_idx + len_utf16(&self.0.text[last.byte_idx..])
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the length of the file in lines.
|
/// Get the length of the file in lines.
|
||||||
pub fn len_lines(&self) -> usize {
|
pub fn len_lines(&self) -> usize {
|
||||||
self.0.lines.len()
|
self.0.lines.len_lines()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Find the node with the given span.
|
/// Find the node with the given span.
|
||||||
@ -171,85 +137,6 @@ impl Source {
|
|||||||
pub fn range(&self, span: Span) -> Option<Range<usize>> {
|
pub fn range(&self, span: Span) -> Option<Range<usize>> {
|
||||||
Some(self.find(span)?.range())
|
Some(self.find(span)?.range())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the index of the UTF-16 code unit at the byte index.
|
|
||||||
pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
|
|
||||||
let line_idx = self.byte_to_line(byte_idx)?;
|
|
||||||
let line = self.0.lines.get(line_idx)?;
|
|
||||||
let head = self.0.text.get(line.byte_idx..byte_idx)?;
|
|
||||||
Some(line.utf16_idx + len_utf16(head))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the index of the line that contains the given byte index.
|
|
||||||
pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
|
|
||||||
(byte_idx <= self.0.text.len()).then(|| {
|
|
||||||
match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
|
|
||||||
Ok(i) => i,
|
|
||||||
Err(i) => i - 1,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the index of the column at the byte index.
|
|
||||||
///
|
|
||||||
/// The column is defined as the number of characters in the line before the
|
|
||||||
/// byte index.
|
|
||||||
pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
|
|
||||||
let line = self.byte_to_line(byte_idx)?;
|
|
||||||
let start = self.line_to_byte(line)?;
|
|
||||||
let head = self.get(start..byte_idx)?;
|
|
||||||
Some(head.chars().count())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the byte index at the UTF-16 code unit.
|
|
||||||
pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
|
|
||||||
let line = self.0.lines.get(
|
|
||||||
match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
|
|
||||||
Ok(i) => i,
|
|
||||||
Err(i) => i - 1,
|
|
||||||
},
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let mut k = line.utf16_idx;
|
|
||||||
for (i, c) in self.0.text[line.byte_idx..].char_indices() {
|
|
||||||
if k >= utf16_idx {
|
|
||||||
return Some(line.byte_idx + i);
|
|
||||||
}
|
|
||||||
k += c.len_utf16();
|
|
||||||
}
|
|
||||||
|
|
||||||
(k == utf16_idx).then_some(self.0.text.len())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the byte position at which the given line starts.
|
|
||||||
pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
|
|
||||||
self.0.lines.get(line_idx).map(|line| line.byte_idx)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the range which encloses the given line.
|
|
||||||
pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
|
|
||||||
let start = self.line_to_byte(line_idx)?;
|
|
||||||
let end = self.line_to_byte(line_idx + 1).unwrap_or(self.0.text.len());
|
|
||||||
Some(start..end)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the byte index of the given (line, column) pair.
|
|
||||||
///
|
|
||||||
/// The column defines the number of characters to go beyond the start of
|
|
||||||
/// the line.
|
|
||||||
pub fn line_column_to_byte(
|
|
||||||
&self,
|
|
||||||
line_idx: usize,
|
|
||||||
column_idx: usize,
|
|
||||||
) -> Option<usize> {
|
|
||||||
let range = self.line_to_range(line_idx)?;
|
|
||||||
let line = self.get(range.clone())?;
|
|
||||||
let mut chars = line.chars();
|
|
||||||
for _ in 0..column_idx {
|
|
||||||
chars.next();
|
|
||||||
}
|
|
||||||
Some(range.start + (line.len() - chars.as_str().len()))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Debug for Source {
|
impl Debug for Source {
|
||||||
@ -261,7 +148,7 @@ impl Debug for Source {
|
|||||||
impl Hash for Source {
|
impl Hash for Source {
|
||||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||||
self.0.id.hash(state);
|
self.0.id.hash(state);
|
||||||
self.0.text.hash(state);
|
self.0.lines.hash(state);
|
||||||
self.0.root.hash(state);
|
self.0.root.hash(state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -271,176 +158,3 @@ impl AsRef<str> for Source {
|
|||||||
self.text()
|
self.text()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Metadata about a line.
|
|
||||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
||||||
struct Line {
|
|
||||||
/// The UTF-8 byte offset where the line starts.
|
|
||||||
byte_idx: usize,
|
|
||||||
/// The UTF-16 codepoint offset where the line starts.
|
|
||||||
utf16_idx: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a line vector.
|
|
||||||
fn lines(text: &str) -> Vec<Line> {
|
|
||||||
std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
|
|
||||||
.chain(lines_from(0, 0, text))
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Compute a line iterator from an offset.
|
|
||||||
fn lines_from(
|
|
||||||
byte_offset: usize,
|
|
||||||
utf16_offset: usize,
|
|
||||||
text: &str,
|
|
||||||
) -> impl Iterator<Item = Line> + '_ {
|
|
||||||
let mut s = unscanny::Scanner::new(text);
|
|
||||||
let mut utf16_idx = utf16_offset;
|
|
||||||
|
|
||||||
std::iter::from_fn(move || {
|
|
||||||
s.eat_until(|c: char| {
|
|
||||||
utf16_idx += c.len_utf16();
|
|
||||||
is_newline(c)
|
|
||||||
});
|
|
||||||
|
|
||||||
if s.done() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
if s.eat() == Some('\r') && s.eat_if('\n') {
|
|
||||||
utf16_idx += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The number of code units this string would use if it was encoded in
|
|
||||||
/// UTF16. This runs in linear time.
|
|
||||||
fn len_utf16(string: &str) -> usize {
|
|
||||||
string.chars().map(char::len_utf16).sum()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_source_file_new() {
|
|
||||||
let source = Source::detached(TEST);
|
|
||||||
assert_eq!(
|
|
||||||
source.0.lines,
|
|
||||||
[
|
|
||||||
Line { byte_idx: 0, utf16_idx: 0 },
|
|
||||||
Line { byte_idx: 7, utf16_idx: 6 },
|
|
||||||
Line { byte_idx: 15, utf16_idx: 12 },
|
|
||||||
Line { byte_idx: 18, utf16_idx: 15 },
|
|
||||||
]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_source_file_pos_to_line() {
|
|
||||||
let source = Source::detached(TEST);
|
|
||||||
assert_eq!(source.byte_to_line(0), Some(0));
|
|
||||||
assert_eq!(source.byte_to_line(2), Some(0));
|
|
||||||
assert_eq!(source.byte_to_line(6), Some(0));
|
|
||||||
assert_eq!(source.byte_to_line(7), Some(1));
|
|
||||||
assert_eq!(source.byte_to_line(8), Some(1));
|
|
||||||
assert_eq!(source.byte_to_line(12), Some(1));
|
|
||||||
assert_eq!(source.byte_to_line(21), Some(3));
|
|
||||||
assert_eq!(source.byte_to_line(22), None);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_source_file_pos_to_column() {
|
|
||||||
let source = Source::detached(TEST);
|
|
||||||
assert_eq!(source.byte_to_column(0), Some(0));
|
|
||||||
assert_eq!(source.byte_to_column(2), Some(1));
|
|
||||||
assert_eq!(source.byte_to_column(6), Some(5));
|
|
||||||
assert_eq!(source.byte_to_column(7), Some(0));
|
|
||||||
assert_eq!(source.byte_to_column(8), Some(1));
|
|
||||||
assert_eq!(source.byte_to_column(12), Some(2));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_source_file_utf16() {
|
|
||||||
#[track_caller]
|
|
||||||
fn roundtrip(source: &Source, byte_idx: usize, utf16_idx: usize) {
|
|
||||||
let middle = source.byte_to_utf16(byte_idx).unwrap();
|
|
||||||
let result = source.utf16_to_byte(middle).unwrap();
|
|
||||||
assert_eq!(middle, utf16_idx);
|
|
||||||
assert_eq!(result, byte_idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
let source = Source::detached(TEST);
|
|
||||||
roundtrip(&source, 0, 0);
|
|
||||||
roundtrip(&source, 2, 1);
|
|
||||||
roundtrip(&source, 3, 2);
|
|
||||||
roundtrip(&source, 8, 7);
|
|
||||||
roundtrip(&source, 12, 9);
|
|
||||||
roundtrip(&source, 21, 18);
|
|
||||||
assert_eq!(source.byte_to_utf16(22), None);
|
|
||||||
assert_eq!(source.utf16_to_byte(19), None);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_source_file_roundtrip() {
|
|
||||||
#[track_caller]
|
|
||||||
fn roundtrip(source: &Source, byte_idx: usize) {
|
|
||||||
let line = source.byte_to_line(byte_idx).unwrap();
|
|
||||||
let column = source.byte_to_column(byte_idx).unwrap();
|
|
||||||
let result = source.line_column_to_byte(line, column).unwrap();
|
|
||||||
assert_eq!(result, byte_idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
let source = Source::detached(TEST);
|
|
||||||
roundtrip(&source, 0);
|
|
||||||
roundtrip(&source, 7);
|
|
||||||
roundtrip(&source, 12);
|
|
||||||
roundtrip(&source, 21);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_source_file_edit() {
|
|
||||||
// This tests only the non-parser parts. The reparsing itself is
|
|
||||||
// tested separately.
|
|
||||||
#[track_caller]
|
|
||||||
fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
|
|
||||||
let reference = Source::detached(after);
|
|
||||||
|
|
||||||
let mut edited = Source::detached(prev);
|
|
||||||
edited.edit(range.clone(), with);
|
|
||||||
assert_eq!(edited.text(), reference.text());
|
|
||||||
assert_eq!(edited.0.lines, reference.0.lines);
|
|
||||||
|
|
||||||
let mut replaced = Source::detached(prev);
|
|
||||||
replaced.replace(&{
|
|
||||||
let mut s = prev.to_string();
|
|
||||||
s.replace_range(range, with);
|
|
||||||
s
|
|
||||||
});
|
|
||||||
assert_eq!(replaced.text(), reference.text());
|
|
||||||
assert_eq!(replaced.0.lines, reference.0.lines);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test inserting at the beginning.
|
|
||||||
test("abc\n", 0..0, "hi\n", "hi\nabc\n");
|
|
||||||
test("\nabc", 0..0, "hi\r", "hi\r\nabc");
|
|
||||||
|
|
||||||
// Test editing in the middle.
|
|
||||||
test(TEST, 4..16, "❌", "ä\tc❌i\rjkl");
|
|
||||||
|
|
||||||
// Test appending.
|
|
||||||
test("abc\ndef", 7..7, "hi", "abc\ndefhi");
|
|
||||||
test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
|
|
||||||
|
|
||||||
// Test appending with adjoining \r and \n.
|
|
||||||
test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
|
|
||||||
|
|
||||||
// Test removing everything.
|
|
||||||
test(TEST, 0..21, "", "");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -6,9 +6,11 @@ use std::str::FromStr;
|
|||||||
use std::sync::LazyLock;
|
use std::sync::LazyLock;
|
||||||
|
|
||||||
use ecow::{eco_format, EcoString};
|
use ecow::{eco_format, EcoString};
|
||||||
use typst::loading::LineCol;
|
use typst::diag::LineCol;
|
||||||
use typst_syntax::package::PackageVersion;
|
use typst_syntax::package::PackageVersion;
|
||||||
use typst_syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
|
use typst_syntax::{
|
||||||
|
is_id_continue, is_ident, is_newline, FileId, Lines, Source, VirtualPath,
|
||||||
|
};
|
||||||
use unscanny::Scanner;
|
use unscanny::Scanner;
|
||||||
|
|
||||||
use crate::world::{read, system_path};
|
use crate::world::{read, system_path};
|
||||||
@ -426,11 +428,17 @@ impl<'a> Parser<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let start = self.parse_line_col()?;
|
let start = self.parse_line_col()?;
|
||||||
|
let lines = Lines::from_bytes(text.as_ref()).expect("Errors shouldn't be annotated for files that aren't human readable (not valid utf-8)");
|
||||||
let range = if self.s.eat_if('-') {
|
let range = if self.s.eat_if('-') {
|
||||||
let end = self.parse_line_col()?;
|
let end = self.parse_line_col()?;
|
||||||
LineCol::byte_range(start..end, &text)
|
let (line, col) = start.indices();
|
||||||
|
let start = lines.line_column_to_byte(line, col);
|
||||||
|
let (line, col) = end.indices();
|
||||||
|
let end = lines.line_column_to_byte(line, col);
|
||||||
|
Option::zip(start, end).map(|(a, b)| a..b)
|
||||||
} else {
|
} else {
|
||||||
start.byte_pos(&text).map(|i| i..i)
|
let (line, col) = start.indices();
|
||||||
|
lines.line_column_to_byte(line, col).map(|i| i..i)
|
||||||
};
|
};
|
||||||
if range.is_none() {
|
if range.is_none() {
|
||||||
self.error("range is out of bounds");
|
self.error("range is out of bounds");
|
||||||
@ -484,13 +492,13 @@ impl<'a> Parser<'a> {
|
|||||||
let line_idx = (line_idx_in_test + comments).checked_add_signed(line_delta)?;
|
let line_idx = (line_idx_in_test + comments).checked_add_signed(line_delta)?;
|
||||||
let column_idx = if column < 0 {
|
let column_idx = if column < 0 {
|
||||||
// Negative column index is from the back.
|
// Negative column index is from the back.
|
||||||
let range = source.line_to_range(line_idx)?;
|
let range = source.lines().line_to_range(line_idx)?;
|
||||||
text[range].chars().count().saturating_add_signed(column)
|
text[range].chars().count().saturating_add_signed(column)
|
||||||
} else {
|
} else {
|
||||||
usize::try_from(column).ok()?.checked_sub(1)?
|
usize::try_from(column).ok()?.checked_sub(1)?
|
||||||
};
|
};
|
||||||
|
|
||||||
source.line_column_to_byte(line_idx, column_idx)
|
source.lines().line_column_to_byte(line_idx, column_idx)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a number.
|
/// Parse a number.
|
||||||
|
@ -7,11 +7,10 @@ use tiny_skia as sk;
|
|||||||
use typst::diag::{SourceDiagnostic, Warned};
|
use typst::diag::{SourceDiagnostic, Warned};
|
||||||
use typst::html::HtmlDocument;
|
use typst::html::HtmlDocument;
|
||||||
use typst::layout::{Abs, Frame, FrameItem, PagedDocument, Transform};
|
use typst::layout::{Abs, Frame, FrameItem, PagedDocument, Transform};
|
||||||
use typst::loading::LineCol;
|
|
||||||
use typst::visualize::Color;
|
use typst::visualize::Color;
|
||||||
use typst::{Document, World, WorldExt};
|
use typst::{Document, World, WorldExt};
|
||||||
use typst_pdf::PdfOptions;
|
use typst_pdf::PdfOptions;
|
||||||
use typst_syntax::FileId;
|
use typst_syntax::{FileId, Lines};
|
||||||
|
|
||||||
use crate::collect::{Attr, FileSize, NoteKind, Test};
|
use crate::collect::{Attr, FileSize, NoteKind, Test};
|
||||||
use crate::logger::TestResult;
|
use crate::logger::TestResult;
|
||||||
@ -329,12 +328,12 @@ impl<'a> Runner<'a> {
|
|||||||
fn format_pos(&self, file: FileId, pos: usize) -> String {
|
fn format_pos(&self, file: FileId, pos: usize) -> String {
|
||||||
let res = if file != self.test.source.id() {
|
let res = if file != self.test.source.id() {
|
||||||
let bytes = self.world.file(file).unwrap();
|
let bytes = self.world.file(file).unwrap();
|
||||||
LineCol::from_byte_pos(pos, &bytes).map(|l| l.numbers())
|
let lines = Lines::from_bytes(&bytes).unwrap();
|
||||||
|
lines.byte_to_line_column(pos).map(|(line, col)| (line + 1, col + 1))
|
||||||
} else {
|
} else {
|
||||||
let line = self.test.source.byte_to_line(pos).map(|l| l + 1);
|
(self.test.source.lines())
|
||||||
let col = (self.test.source.byte_to_column(pos))
|
.byte_to_line_column(pos)
|
||||||
.map(|c| self.test.pos.line + c + 1);
|
.map(|(line, col)| (line + 1, col + 1))
|
||||||
Option::zip(line, col)
|
|
||||||
};
|
};
|
||||||
let Some((line, col)) = res else {
|
let Some((line, col)) = res else {
|
||||||
return "oob".into();
|
return "oob".into();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user