mirror of
https://github.com/typst/typst
synced 2025-08-24 19:54:14 +08:00
refactor: factor out a general Lines struct from Source
This commit is contained in:
parent
2e2f646f2a
commit
e5d8f02554
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -3112,7 +3112,6 @@ dependencies = [
|
||||
"unicode-segmentation",
|
||||
"unscanny",
|
||||
"usvg",
|
||||
"utf8_iter",
|
||||
"wasmi",
|
||||
"xmlwriter",
|
||||
]
|
||||
@ -3201,6 +3200,7 @@ dependencies = [
|
||||
name = "typst-syntax"
|
||||
version = "0.13.1"
|
||||
dependencies = [
|
||||
"comemo",
|
||||
"ecow",
|
||||
"serde",
|
||||
"toml",
|
||||
|
@ -135,7 +135,6 @@ unicode-segmentation = "1"
|
||||
unscanny = "0.1"
|
||||
ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
|
||||
usvg = { version = "0.45", default-features = false, features = ["text"] }
|
||||
utf8_iter = "1.0.4"
|
||||
walkdir = "2"
|
||||
wasmi = "0.40.0"
|
||||
web-sys = "0.3"
|
||||
|
@ -16,7 +16,7 @@ use typst::diag::{
|
||||
use typst::foundations::{Datetime, Smart};
|
||||
use typst::html::HtmlDocument;
|
||||
use typst::layout::{Frame, Page, PageRanges, PagedDocument};
|
||||
use typst::syntax::{FileId, Source, Span};
|
||||
use typst::syntax::{FileId, Lines, Span};
|
||||
use typst::WorldExt;
|
||||
use typst_pdf::{PdfOptions, PdfStandards, Timestamp};
|
||||
|
||||
@ -696,7 +696,7 @@ fn label(world: &SystemWorld, span: Span) -> Option<Label<FileId>> {
|
||||
impl<'a> codespan_reporting::files::Files<'a> for SystemWorld {
|
||||
type FileId = FileId;
|
||||
type Name = String;
|
||||
type Source = Source;
|
||||
type Source = Lines<String>;
|
||||
|
||||
fn name(&'a self, id: FileId) -> CodespanResult<Self::Name> {
|
||||
let vpath = id.vpath();
|
||||
|
@ -85,6 +85,6 @@ fn resolve_span(world: &SystemWorld, span: Span) -> Option<(String, u32)> {
|
||||
let id = span.id()?;
|
||||
let source = world.source(id).ok()?;
|
||||
let range = source.range(span)?;
|
||||
let line = source.byte_to_line(range.start)?;
|
||||
let line = source.lines().byte_to_line(range.start)?;
|
||||
Some((format!("{id:?}"), line as u32 + 1))
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ use ecow::{eco_format, EcoString};
|
||||
use parking_lot::Mutex;
|
||||
use typst::diag::{FileError, FileResult};
|
||||
use typst::foundations::{Bytes, Datetime, Dict, IntoValue};
|
||||
use typst::syntax::{FileId, Source, VirtualPath};
|
||||
use typst::syntax::{FileId, Lines, Source, VirtualPath};
|
||||
use typst::text::{Font, FontBook};
|
||||
use typst::utils::LazyHash;
|
||||
use typst::{Library, World};
|
||||
@ -183,8 +183,18 @@ impl SystemWorld {
|
||||
|
||||
/// Lookup a source file by id.
|
||||
#[track_caller]
|
||||
pub fn lookup(&self, id: FileId) -> Source {
|
||||
self.source(id).expect("file id does not point to any source file")
|
||||
pub fn lookup(&self, id: FileId) -> Lines<String> {
|
||||
self.slot(id, |slot| {
|
||||
if let Some(source) = slot.source.get() {
|
||||
let source = source.as_ref().expect("file is not valid");
|
||||
source.lines()
|
||||
} else if let Some(bytes) = slot.file.get() {
|
||||
let bytes = bytes.as_ref().expect("file is not valid");
|
||||
Lines::from_bytes(bytes.as_slice()).expect("file is not valid utf-8")
|
||||
} else {
|
||||
panic!("file id does not point to any source file");
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -339,6 +349,11 @@ impl<T: Clone> SlotCell<T> {
|
||||
self.accessed = false;
|
||||
}
|
||||
|
||||
/// Gets the contents of the cell.
|
||||
fn get(&self) -> Option<&FileResult<T>> {
|
||||
self.data.as_ref()
|
||||
}
|
||||
|
||||
/// Gets the contents of the cell or initialize them.
|
||||
fn get_or_init(
|
||||
&mut self,
|
||||
|
@ -66,7 +66,6 @@ unicode-normalization = { workspace = true }
|
||||
unicode-segmentation = { workspace = true }
|
||||
unscanny = { workspace = true }
|
||||
usvg = { workspace = true }
|
||||
utf8_iter = { workspace = true }
|
||||
wasmi = { workspace = true }
|
||||
xmlwriter = { workspace = true }
|
||||
|
||||
|
@ -9,10 +9,10 @@ use std::string::FromUtf8Error;
|
||||
use comemo::Tracked;
|
||||
use ecow::{eco_vec, EcoVec};
|
||||
use typst_syntax::package::{PackageSpec, PackageVersion};
|
||||
use typst_syntax::{Span, Spanned, SyntaxError};
|
||||
use typst_syntax::{Lines, Span, Spanned, SyntaxError};
|
||||
|
||||
use crate::engine::Engine;
|
||||
use crate::loading::{Loaded, LineCol};
|
||||
use crate::loading::{LoadSource, Loaded};
|
||||
use crate::{World, WorldExt};
|
||||
|
||||
/// Early-return with a [`StrResult`] or [`SourceResult`].
|
||||
@ -569,6 +569,144 @@ impl From<PackageError> for EcoString {
|
||||
}
|
||||
}
|
||||
|
||||
impl Loaded {
|
||||
/// Report an error, possibly in an external file.
|
||||
pub fn err_in_text(
|
||||
&self,
|
||||
pos: impl Into<ReportPos>,
|
||||
msg: impl std::fmt::Display,
|
||||
error: impl std::fmt::Display,
|
||||
) -> EcoVec<SourceDiagnostic> {
|
||||
let lines = Lines::from_bytes(&self.bytes);
|
||||
match (self.source.v, lines) {
|
||||
// Only report an error in an external file,
|
||||
// if it is human readable (valid utf-8).
|
||||
(LoadSource::Path(file_id), Ok(lines)) => {
|
||||
let pos = pos.into();
|
||||
if let Some(range) = pos.range(&lines) {
|
||||
let span = Span::from_range(file_id, range);
|
||||
return eco_vec!(error!(span, "{msg} ({error})"));
|
||||
}
|
||||
|
||||
// Either `ReportPos::None` was provided, or resolving the range
|
||||
// from the line/column failed. If present report the possibly
|
||||
// wrong line/column in the error message anyway.
|
||||
let span = Span::from_range(file_id, 0..self.bytes.len());
|
||||
let error = if let Some(pair) = pos.line_col(&lines) {
|
||||
let (line, col) = pair.numbers();
|
||||
error!(span, "{msg} ({error} at {line}:{col})")
|
||||
} else {
|
||||
error!(span, "{msg} ({error})")
|
||||
};
|
||||
eco_vec![error]
|
||||
}
|
||||
_ => self.err_in_bytes(pos, msg, error),
|
||||
}
|
||||
}
|
||||
|
||||
/// Report an error, possibly in an external file.
|
||||
pub fn err_in_bytes(
|
||||
&self,
|
||||
pos: impl Into<ReportPos>,
|
||||
msg: impl std::fmt::Display,
|
||||
error: impl std::fmt::Display,
|
||||
) -> EcoVec<SourceDiagnostic> {
|
||||
let pos = pos.into();
|
||||
let result = Lines::from_bytes(&self.bytes).ok().and_then(|l| pos.line_col(&l));
|
||||
let error = if let Some(pair) = result {
|
||||
let (line, col) = pair.numbers();
|
||||
error!(self.source.span, "{msg} ({error} at {line}:{col})")
|
||||
} else {
|
||||
error!(self.source.span, "{msg} ({error})")
|
||||
};
|
||||
eco_vec![error]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub enum ReportPos {
|
||||
/// Contains the range, and the 0-based line/column.
|
||||
Full(std::ops::Range<usize>, LineCol),
|
||||
/// Contains the range.
|
||||
Range(std::ops::Range<usize>),
|
||||
/// Contains the 0-based line/column.
|
||||
LineCol(LineCol),
|
||||
#[default]
|
||||
None,
|
||||
}
|
||||
|
||||
impl From<std::ops::Range<usize>> for ReportPos {
|
||||
fn from(value: std::ops::Range<usize>) -> Self {
|
||||
Self::Range(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LineCol> for ReportPos {
|
||||
fn from(value: LineCol) -> Self {
|
||||
Self::LineCol(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReportPos {
|
||||
fn range(&self, lines: &Lines<String>) -> Option<std::ops::Range<usize>> {
|
||||
match self {
|
||||
ReportPos::Full(range, _) => Some(range.clone()),
|
||||
ReportPos::Range(range) => Some(range.clone()),
|
||||
&ReportPos::LineCol(pair) => {
|
||||
let i = lines.line_column_to_byte(pair.line, pair.col)?;
|
||||
Some(i..i)
|
||||
}
|
||||
ReportPos::None => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn line_col(&self, lines: &Lines<String>) -> Option<LineCol> {
|
||||
match self {
|
||||
&ReportPos::Full(_, pair) => Some(pair),
|
||||
ReportPos::Range(range) => {
|
||||
let (line, col) = lines.byte_to_line_column(range.start)?;
|
||||
Some(LineCol::zero_based(line, col))
|
||||
}
|
||||
&ReportPos::LineCol(pair) => Some(pair),
|
||||
ReportPos::None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A line/column pair.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct LineCol {
|
||||
/// The 0-based line.
|
||||
line: usize,
|
||||
/// The 0-based column.
|
||||
col: usize,
|
||||
}
|
||||
|
||||
impl LineCol {
|
||||
/// Constructs the line/column pair from 0-based indices.
|
||||
pub fn zero_based(line: usize, col: usize) -> Self {
|
||||
Self { line, col }
|
||||
}
|
||||
|
||||
/// Constructs the line/column pair from 1-based numbers.
|
||||
pub fn one_based(line: usize, col: usize) -> Self {
|
||||
Self {
|
||||
line: line.saturating_sub(1),
|
||||
col: col.saturating_sub(1),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the 0-based line/column indices.
|
||||
pub fn indices(&self) -> (usize, usize) {
|
||||
(self.line, self.col)
|
||||
}
|
||||
|
||||
/// Returns the 1-based line/column numbers.
|
||||
pub fn numbers(&self) -> (usize, usize) {
|
||||
(self.line + 1, self.col + 1)
|
||||
}
|
||||
}
|
||||
|
||||
/// Format a user-facing error message for an XML-like file format.
|
||||
pub fn format_xml_like_error(
|
||||
format: &str,
|
||||
|
@ -1,10 +1,10 @@
|
||||
use ecow::EcoVec;
|
||||
use typst_syntax::Spanned;
|
||||
|
||||
use crate::diag::{bail, SourceDiagnostic, SourceResult};
|
||||
use crate::diag::{bail, LineCol, ReportPos, SourceDiagnostic, SourceResult};
|
||||
use crate::engine::Engine;
|
||||
use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
|
||||
use crate::loading::{Loaded, DataSource, LineCol, Load, Readable, ReportPos};
|
||||
use crate::loading::{DataSource, Load, Loaded, Readable};
|
||||
|
||||
/// Reads structured data from a CSV file.
|
||||
///
|
||||
@ -176,7 +176,9 @@ fn format_csv_error(
|
||||
})
|
||||
.unwrap_or(LineCol::one_based(line, 1).into());
|
||||
match err.kind() {
|
||||
::csv::ErrorKind::Utf8 { .. } => data.err_in_text(pos, msg, "file is not valid utf-8"),
|
||||
::csv::ErrorKind::Utf8 { .. } => {
|
||||
data.err_in_text(pos, msg, "file is not valid utf-8")
|
||||
}
|
||||
::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
|
||||
let err =
|
||||
format!("found {len} instead of {expected_len} fields in line {line}");
|
||||
|
@ -1,10 +1,10 @@
|
||||
use ecow::eco_format;
|
||||
use typst_syntax::Spanned;
|
||||
|
||||
use crate::diag::{At, SourceResult};
|
||||
use crate::diag::{At, LineCol, SourceResult};
|
||||
use crate::engine::Engine;
|
||||
use crate::foundations::{func, scope, Str, Value};
|
||||
use crate::loading::{DataSource, LineCol, Load, Readable};
|
||||
use crate::loading::{DataSource, Load, Readable};
|
||||
|
||||
/// Reads structured data from a JSON file.
|
||||
///
|
||||
|
@ -16,9 +16,8 @@ mod xml_;
|
||||
mod yaml_;
|
||||
|
||||
use comemo::Tracked;
|
||||
use ecow::{eco_vec, EcoString, EcoVec};
|
||||
use ecow::EcoString;
|
||||
use typst_syntax::{FileId, Span, Spanned};
|
||||
use utf8_iter::ErrorReportingUtf8Chars;
|
||||
|
||||
pub use self::cbor_::*;
|
||||
pub use self::csv_::*;
|
||||
@ -28,7 +27,7 @@ pub use self::toml_::*;
|
||||
pub use self::xml_::*;
|
||||
pub use self::yaml_::*;
|
||||
|
||||
use crate::diag::{error, At, FileError, SourceDiagnostic, SourceResult};
|
||||
use crate::diag::{At, FileError, SourceResult};
|
||||
use crate::foundations::OneOrMultiple;
|
||||
use crate::foundations::{cast, Bytes, Scope, Str};
|
||||
use crate::World;
|
||||
@ -129,6 +128,7 @@ pub struct Loaded {
|
||||
}
|
||||
|
||||
impl Loaded {
|
||||
/// FIXME: remove this?
|
||||
pub fn dummy() -> Self {
|
||||
Loaded::new(
|
||||
typst_syntax::Spanned::new(LoadSource::Bytes, Span::detached()),
|
||||
@ -142,50 +142,16 @@ impl Loaded {
|
||||
|
||||
pub fn as_str(&self) -> SourceResult<&str> {
|
||||
self.bytes.as_str().map_err(|err| {
|
||||
// TODO: should the error even be reported in the file if it's possibly binary?
|
||||
let start = err.valid_up_to();
|
||||
let end = start + err.error_len().unwrap_or(0);
|
||||
self.err_in_text(start..end, "failed to convert to string", FileError::from(err))
|
||||
// always report this error in the source file.
|
||||
self.err_in_bytes(
|
||||
start..end,
|
||||
"failed to convert to string",
|
||||
FileError::from(err),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Report an error, possibly in an external file.
|
||||
pub fn err_in_text(
|
||||
&self,
|
||||
pos: impl Into<ReportPos>,
|
||||
msg: impl std::fmt::Display,
|
||||
error: impl std::fmt::Display,
|
||||
) -> EcoVec<SourceDiagnostic> {
|
||||
let pos = pos.into();
|
||||
let error = match self.source.v {
|
||||
LoadSource::Path(file_id) => {
|
||||
if let Some(range) = pos.range(self.bytes.as_slice()) {
|
||||
let span = Span::from_range(file_id, range);
|
||||
return eco_vec!(error!(span, "{msg} ({error})"));
|
||||
}
|
||||
|
||||
// Either there was no range provided, or resolving the range
|
||||
// from the line/column failed. If present report the possibly
|
||||
// wrong line/column anyway.
|
||||
let span = Span::from_range(file_id, 0..self.bytes.len());
|
||||
if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
|
||||
let (line, col) = pair.numbers();
|
||||
error!(span, "{msg} ({error} at {line}:{col})")
|
||||
} else {
|
||||
error!(span, "{msg} ({error})")
|
||||
}
|
||||
}
|
||||
LoadSource::Bytes => {
|
||||
if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
|
||||
let (line, col) = pair.numbers();
|
||||
error!(self.source.span, "{msg} ({error} at {line}:{col})")
|
||||
} else {
|
||||
error!(self.source.span, "{msg} ({error})")
|
||||
}
|
||||
}
|
||||
};
|
||||
eco_vec![error]
|
||||
}
|
||||
}
|
||||
|
||||
/// A loaded [`DataSource`].
|
||||
@ -195,142 +161,6 @@ pub enum LoadSource {
|
||||
Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub enum ReportPos {
|
||||
/// Contains the range, and the 0-based line/column.
|
||||
Full(std::ops::Range<usize>, LineCol),
|
||||
/// Contains the range.
|
||||
Range(std::ops::Range<usize>),
|
||||
/// Contains the 0-based line/column.
|
||||
LineCol(LineCol),
|
||||
#[default]
|
||||
None,
|
||||
}
|
||||
|
||||
impl From<std::ops::Range<usize>> for ReportPos {
|
||||
fn from(value: std::ops::Range<usize>) -> Self {
|
||||
Self::Range(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LineCol> for ReportPos {
|
||||
fn from(value: LineCol) -> Self {
|
||||
Self::LineCol(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReportPos {
|
||||
fn range(&self, bytes: &[u8]) -> Option<std::ops::Range<usize>> {
|
||||
match self {
|
||||
ReportPos::Full(range, _) => Some(range.clone()),
|
||||
ReportPos::Range(range) => Some(range.clone()),
|
||||
&ReportPos::LineCol(pair) => pair.byte_pos(bytes).map(|i| i..i),
|
||||
ReportPos::None => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn line_col(&self, bytes: &[u8]) -> Option<LineCol> {
|
||||
match self {
|
||||
&ReportPos::Full(_, pair) => Some(pair),
|
||||
ReportPos::Range(range) => LineCol::from_byte_pos(range.start, bytes),
|
||||
&ReportPos::LineCol(pair) => Some(pair),
|
||||
ReportPos::None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct LineCol {
|
||||
/// The 0-based line.
|
||||
line: usize,
|
||||
/// The 0-based column.
|
||||
col: usize,
|
||||
}
|
||||
|
||||
impl LineCol {
|
||||
/// Constructs the line/column pair from 0-based indices.
|
||||
pub fn zero_based(line: usize, col: usize) -> Self {
|
||||
Self { line, col }
|
||||
}
|
||||
|
||||
/// Constructs the line/column pair from 1-based numbers.
|
||||
pub fn one_based(line: usize, col: usize) -> Self {
|
||||
Self {
|
||||
line: line.saturating_sub(1),
|
||||
col: col.saturating_sub(1),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_byte_pos(pos: usize, bytes: &[u8]) -> Option<Self> {
|
||||
let bytes = &bytes[..pos];
|
||||
let mut line = 0;
|
||||
let line_start = memchr::memchr_iter(b'\n', bytes)
|
||||
.inspect(|_| line += 1)
|
||||
.last()
|
||||
.map(|i| i + 1)
|
||||
.unwrap_or(bytes.len());
|
||||
|
||||
// Try to compute a column even if the string isn't valid utf-8.
|
||||
let col = ErrorReportingUtf8Chars::new(&bytes[line_start..]).count();
|
||||
Some(LineCol::zero_based(line, col))
|
||||
}
|
||||
|
||||
pub fn byte_pos(&self, bytes: &[u8]) -> Option<usize> {
|
||||
let line_offset = if let Some(idx) = self.line.checked_sub(1) {
|
||||
memchr::memchr_iter(b'\n', bytes).nth(idx).map(|i| i + 1)?
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let col_offset = col_offset(line_offset, self.col, bytes)?;
|
||||
let pos = line_offset + col_offset;
|
||||
Some(pos)
|
||||
}
|
||||
|
||||
pub fn byte_range(
|
||||
range: std::ops::Range<Self>,
|
||||
bytes: &[u8],
|
||||
) -> Option<std::ops::Range<usize>> {
|
||||
let mut line_iter = memchr::memchr_iter(b'\n', bytes);
|
||||
let start_line_offset = if let Some(idx) = range.start.line.checked_sub(1) {
|
||||
line_iter.nth(idx).map(|i| i + 1)?
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let line_delta = range.end.line - range.start.line;
|
||||
let end_line_offset = if let Some(idx) = line_delta.checked_sub(1) {
|
||||
line_iter.nth(idx).map(|i| i + 1)?
|
||||
} else {
|
||||
start_line_offset
|
||||
};
|
||||
|
||||
let start_col_offset = col_offset(start_line_offset, range.start.col, bytes)?;
|
||||
let end_col_offset = col_offset(end_line_offset, range.end.col, bytes)?;
|
||||
|
||||
let start = start_line_offset + start_col_offset;
|
||||
let end = end_line_offset + end_col_offset;
|
||||
Some(start..end)
|
||||
}
|
||||
|
||||
pub fn numbers(&self) -> (usize, usize) {
|
||||
(self.line + 1, self.col + 1)
|
||||
}
|
||||
}
|
||||
|
||||
fn col_offset(line_offset: usize, col: usize, bytes: &[u8]) -> Option<usize> {
|
||||
let line = &bytes[line_offset..];
|
||||
// TODO: streaming-utf8 decoding ignore invalid characters
|
||||
// might neeed to update error reporting too (use utf8_iter)
|
||||
if let Some(idx) = col.checked_sub(1) {
|
||||
// Try to compute position even if the string isn't valid utf-8.
|
||||
let mut iter = ErrorReportingUtf8Chars::new(line);
|
||||
_ = iter.nth(idx)?;
|
||||
Some(line.len() - iter.as_slice().len())
|
||||
} else {
|
||||
Some(0)
|
||||
}
|
||||
}
|
||||
|
||||
/// A value that can be read from a file.
|
||||
#[derive(Debug, Clone, PartialEq, Hash)]
|
||||
pub enum Readable {
|
||||
|
@ -1,10 +1,10 @@
|
||||
use ecow::{eco_format, EcoVec};
|
||||
use typst_syntax::Spanned;
|
||||
|
||||
use crate::diag::{At, SourceDiagnostic, SourceResult};
|
||||
use crate::diag::{At, ReportPos, SourceDiagnostic, SourceResult};
|
||||
use crate::engine::Engine;
|
||||
use crate::foundations::{func, scope, Str, Value};
|
||||
use crate::loading::{Loaded, DataSource, Load, Readable, ReportPos};
|
||||
use crate::loading::{DataSource, Load, Loaded, Readable};
|
||||
|
||||
/// Reads structured data from a TOML file.
|
||||
///
|
||||
@ -69,7 +69,10 @@ impl toml {
|
||||
}
|
||||
|
||||
/// Format the user-facing TOML error message.
|
||||
fn format_toml_error(data: &Loaded, error: ::toml::de::Error) -> EcoVec<SourceDiagnostic> {
|
||||
fn format_toml_error(
|
||||
data: &Loaded,
|
||||
error: ::toml::de::Error,
|
||||
) -> EcoVec<SourceDiagnostic> {
|
||||
let pos = error.span().map(ReportPos::Range).unwrap_or_default();
|
||||
data.err_in_text(pos, "failed to parse TOML", error.message())
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ use typst_syntax::Spanned;
|
||||
use crate::diag::{format_xml_like_error, SourceDiagnostic, SourceResult};
|
||||
use crate::engine::Engine;
|
||||
use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
|
||||
use crate::loading::{Loaded, DataSource, Load, Readable};
|
||||
use crate::loading::{DataSource, Load, Loaded, Readable};
|
||||
|
||||
/// Reads structured data from an XML file.
|
||||
///
|
||||
|
@ -1,10 +1,10 @@
|
||||
use ecow::{eco_format, EcoVec};
|
||||
use typst_syntax::Spanned;
|
||||
|
||||
use crate::diag::{At, SourceDiagnostic, SourceResult};
|
||||
use crate::diag::{At, LineCol, ReportPos, SourceDiagnostic, SourceResult};
|
||||
use crate::engine::Engine;
|
||||
use crate::foundations::{func, scope, Str, Value};
|
||||
use crate::loading::{Loaded, DataSource, LineCol, Load, Readable, ReportPos};
|
||||
use crate::loading::{DataSource, Load, Loaded, Readable};
|
||||
|
||||
/// Reads structured data from a YAML file.
|
||||
///
|
||||
|
@ -20,7 +20,8 @@ use typst_syntax::{Span, Spanned};
|
||||
use typst_utils::{Get, ManuallyHash, NonZeroExt, PicoStr};
|
||||
|
||||
use crate::diag::{
|
||||
bail, error, At, HintedStrResult, SourceDiagnostic, SourceResult, StrResult,
|
||||
bail, error, At, HintedStrResult, ReportPos, SourceDiagnostic, SourceResult,
|
||||
StrResult,
|
||||
};
|
||||
use crate::engine::{Engine, Sink};
|
||||
use crate::foundations::{
|
||||
@ -33,7 +34,7 @@ use crate::layout::{
|
||||
BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem,
|
||||
Sides, Sizing, TrackSizings,
|
||||
};
|
||||
use crate::loading::{format_yaml_error, Loaded, DataSource, Load, LoadSource, ReportPos};
|
||||
use crate::loading::{format_yaml_error, DataSource, Load, LoadSource, Loaded};
|
||||
use crate::model::{
|
||||
CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem,
|
||||
Url,
|
||||
@ -480,7 +481,9 @@ impl CslStyle {
|
||||
typst_utils::hash128(&(TypeId::of::<Bytes>(), data)),
|
||||
)))
|
||||
})
|
||||
.map_err(|err| data.err_in_text(ReportPos::None, "failed to load CSL style", err))
|
||||
.map_err(|err| {
|
||||
data.err_in_text(ReportPos::None, "failed to load CSL style", err)
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the underlying independent style.
|
||||
|
@ -11,7 +11,7 @@ use typst_utils::ManuallyHash;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
use super::Lang;
|
||||
use crate::diag::{SourceDiagnostic, SourceResult};
|
||||
use crate::diag::{LineCol, ReportPos, SourceDiagnostic, SourceResult};
|
||||
use crate::engine::Engine;
|
||||
use crate::foundations::{
|
||||
cast, elem, scope, Content, Derived, NativeElement, OneOrMultiple, Packed, PlainText,
|
||||
@ -19,7 +19,7 @@ use crate::foundations::{
|
||||
};
|
||||
use crate::html::{tag, HtmlElem};
|
||||
use crate::layout::{BlockBody, BlockElem, Em, HAlignment};
|
||||
use crate::loading::{DataSource, LineCol, Load, Loaded, ReportPos};
|
||||
use crate::loading::{DataSource, Load, Loaded};
|
||||
use crate::model::{Figurable, ParElem};
|
||||
use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize};
|
||||
use crate::visualize::Color;
|
||||
|
@ -15,6 +15,7 @@ readme = { workspace = true }
|
||||
[dependencies]
|
||||
typst-timing = { workspace = true }
|
||||
typst-utils = { workspace = true }
|
||||
comemo = { workspace = true }
|
||||
ecow = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
toml = { workspace = true }
|
||||
|
@ -7,6 +7,7 @@ mod file;
|
||||
mod highlight;
|
||||
mod kind;
|
||||
mod lexer;
|
||||
mod lines;
|
||||
mod node;
|
||||
mod parser;
|
||||
mod path;
|
||||
@ -22,6 +23,7 @@ pub use self::lexer::{
|
||||
is_id_continue, is_id_start, is_ident, is_newline, is_valid_label_literal_id,
|
||||
link_prefix, split_newlines,
|
||||
};
|
||||
pub use self::lines::Lines;
|
||||
pub use self::node::{LinkedChildren, LinkedNode, Side, SyntaxError, SyntaxNode};
|
||||
pub use self::parser::{parse, parse_code, parse_math};
|
||||
pub use self::path::VirtualPath;
|
||||
|
407
crates/typst-syntax/src/lines.rs
Normal file
407
crates/typst-syntax/src/lines.rs
Normal file
@ -0,0 +1,407 @@
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::iter::zip;
|
||||
use std::ops::Range;
|
||||
use std::str::Utf8Error;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::is_newline;
|
||||
|
||||
/// Metadata about lines.
|
||||
#[derive(Clone)]
|
||||
pub struct Lines<S>(Arc<Repr<S>>);
|
||||
|
||||
#[derive(Clone)]
|
||||
struct Repr<S> {
|
||||
lines: Vec<Line>,
|
||||
str: S,
|
||||
}
|
||||
|
||||
/// Metadata about a line.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
pub struct Line {
|
||||
/// The UTF-8 byte offset where the line starts.
|
||||
byte_idx: usize,
|
||||
/// The UTF-16 codepoint offset where the line starts.
|
||||
utf16_idx: usize,
|
||||
}
|
||||
|
||||
impl<S: AsRef<str>> Lines<S> {
|
||||
/// TODO: memoize this?
|
||||
pub fn new(str: S) -> Self {
|
||||
let lines = lines(str.as_ref());
|
||||
Lines(Arc::new(Repr { lines, str }))
|
||||
}
|
||||
|
||||
pub fn text(&self) -> &str {
|
||||
self.0.str.as_ref()
|
||||
}
|
||||
|
||||
/// Get the length of the file in UTF-8 encoded bytes.
|
||||
pub fn len_bytes(&self) -> usize {
|
||||
self.0.str.as_ref().len()
|
||||
}
|
||||
|
||||
/// Get the length of the file in UTF-16 code units.
|
||||
pub fn len_utf16(&self) -> usize {
|
||||
let last = self.0.lines.last().unwrap();
|
||||
last.utf16_idx + len_utf16(&self.text()[last.byte_idx..])
|
||||
}
|
||||
|
||||
/// Get the length of the file in lines.
|
||||
pub fn len_lines(&self) -> usize {
|
||||
self.0.lines.len()
|
||||
}
|
||||
|
||||
/// Return the index of the UTF-16 code unit at the byte index.
|
||||
pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
|
||||
let line_idx = self.byte_to_line(byte_idx)?;
|
||||
let line = self.0.lines.get(line_idx)?;
|
||||
let head = self.text().get(line.byte_idx..byte_idx)?;
|
||||
Some(line.utf16_idx + len_utf16(head))
|
||||
}
|
||||
|
||||
/// Return the index of the line that contains the given byte index.
|
||||
pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
|
||||
(byte_idx <= self.text().len()).then(|| {
|
||||
match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
|
||||
Ok(i) => i,
|
||||
Err(i) => i - 1,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the index of the column at the byte index.
|
||||
///
|
||||
/// The column is defined as the number of characters in the line before the
|
||||
/// byte index.
|
||||
pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
|
||||
let line = self.byte_to_line(byte_idx)?;
|
||||
let start = self.line_to_byte(line)?;
|
||||
let head = self.text().get(start..byte_idx)?;
|
||||
Some(head.chars().count())
|
||||
}
|
||||
|
||||
/// Return the index of the line and column at the byte index.
|
||||
pub fn byte_to_line_column(&self, byte_idx: usize) -> Option<(usize, usize)> {
|
||||
let line = self.byte_to_line(byte_idx)?;
|
||||
let start = self.line_to_byte(line)?;
|
||||
let head = self.text().get(start..byte_idx)?;
|
||||
let col = head.chars().count();
|
||||
Some((line, col))
|
||||
}
|
||||
|
||||
/// Return the byte index at the UTF-16 code unit.
|
||||
pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
|
||||
let line = self.0.lines.get(
|
||||
match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
|
||||
Ok(i) => i,
|
||||
Err(i) => i - 1,
|
||||
},
|
||||
)?;
|
||||
|
||||
let text = self.text();
|
||||
let mut k = line.utf16_idx;
|
||||
for (i, c) in text[line.byte_idx..].char_indices() {
|
||||
if k >= utf16_idx {
|
||||
return Some(line.byte_idx + i);
|
||||
}
|
||||
k += c.len_utf16();
|
||||
}
|
||||
|
||||
(k == utf16_idx).then_some(text.len())
|
||||
}
|
||||
|
||||
/// Return the byte position at which the given line starts.
|
||||
pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
|
||||
self.0.lines.get(line_idx).map(|line| line.byte_idx)
|
||||
}
|
||||
|
||||
/// Return the range which encloses the given line.
|
||||
pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
|
||||
let start = self.line_to_byte(line_idx)?;
|
||||
let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text().len());
|
||||
Some(start..end)
|
||||
}
|
||||
|
||||
/// Return the byte index of the given (line, column) pair.
|
||||
///
|
||||
/// The column defines the number of characters to go beyond the start of
|
||||
/// the line.
|
||||
pub fn line_column_to_byte(
|
||||
&self,
|
||||
line_idx: usize,
|
||||
column_idx: usize,
|
||||
) -> Option<usize> {
|
||||
let range = self.line_to_range(line_idx)?;
|
||||
let line = self.text().get(range.clone())?;
|
||||
let mut chars = line.chars();
|
||||
for _ in 0..column_idx {
|
||||
chars.next();
|
||||
}
|
||||
Some(range.start + (line.len() - chars.as_str().len()))
|
||||
}
|
||||
}
|
||||
|
||||
impl Lines<String> {
|
||||
/// Tries to convert the bytes
|
||||
#[comemo::memoize]
|
||||
pub fn from_bytes(bytes: &[u8]) -> Result<Lines<String>, Utf8Error> {
|
||||
let str = std::str::from_utf8(bytes)?;
|
||||
Ok(Lines::new(str.to_string()))
|
||||
}
|
||||
|
||||
/// Fully replace the source text.
|
||||
///
|
||||
/// This performs a naive (suffix/prefix-based) diff of the old and new text
|
||||
/// to produce the smallest single edit that transforms old into new and
|
||||
/// then calls [`edit`](Self::edit) with it.
|
||||
///
|
||||
/// Returns whether any changes were made.
|
||||
pub fn replace(&mut self, new: &str) -> bool {
|
||||
let Some((prefix, suffix)) = self.replacement_range(new) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let old = self.text();
|
||||
let replace = prefix..old.len() - suffix;
|
||||
let with = &new[prefix..new.len() - suffix];
|
||||
self.edit(replace, with);
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Returns the common prefix and suffix lengths.
|
||||
/// Returns [`None`] if the old and new strings are equal.
|
||||
pub fn replacement_range(&self, new: &str) -> Option<(usize, usize)> {
|
||||
let old = self.text();
|
||||
|
||||
let mut prefix =
|
||||
zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count();
|
||||
|
||||
if prefix == old.len() && prefix == new.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
|
||||
prefix -= 1;
|
||||
}
|
||||
|
||||
let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev())
|
||||
.take_while(|(x, y)| x == y)
|
||||
.count();
|
||||
|
||||
while !old.is_char_boundary(old.len() - suffix)
|
||||
|| !new.is_char_boundary(new.len() - suffix)
|
||||
{
|
||||
suffix += 1;
|
||||
}
|
||||
|
||||
Some((prefix, suffix))
|
||||
}
|
||||
|
||||
/// Edit the source file by replacing the given range.
|
||||
///
|
||||
/// Returns the range in the new source that was ultimately reparsed.
|
||||
///
|
||||
/// The method panics if the `replace` range is out of bounds.
|
||||
#[track_caller]
|
||||
pub fn edit(&mut self, replace: Range<usize>, with: &str) {
|
||||
let start_byte = replace.start;
|
||||
let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
|
||||
let line = self.byte_to_line(start_byte).unwrap();
|
||||
|
||||
let inner = Arc::make_mut(&mut self.0);
|
||||
|
||||
// Update the text itself.
|
||||
inner.str.replace_range(replace.clone(), with);
|
||||
|
||||
// Remove invalidated line starts.
|
||||
inner.lines.truncate(line + 1);
|
||||
|
||||
// Handle adjoining of \r and \n.
|
||||
if inner.str[..start_byte].ends_with('\r') && with.starts_with('\n') {
|
||||
inner.lines.pop();
|
||||
}
|
||||
|
||||
// Recalculate the line starts after the edit.
|
||||
inner
|
||||
.lines
|
||||
.extend(lines_from(start_byte, start_utf16, &inner.str[start_byte..]));
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a line vector.
|
||||
fn lines(text: &str) -> Vec<Line> {
|
||||
std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
|
||||
.chain(lines_from(0, 0, text))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Compute a line iterator from an offset.
|
||||
fn lines_from(
|
||||
byte_offset: usize,
|
||||
utf16_offset: usize,
|
||||
text: &str,
|
||||
) -> impl Iterator<Item = Line> + '_ {
|
||||
let mut s = unscanny::Scanner::new(text);
|
||||
let mut utf16_idx = utf16_offset;
|
||||
|
||||
std::iter::from_fn(move || {
|
||||
s.eat_until(|c: char| {
|
||||
utf16_idx += c.len_utf16();
|
||||
is_newline(c)
|
||||
});
|
||||
|
||||
if s.done() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if s.eat() == Some('\r') && s.eat_if('\n') {
|
||||
utf16_idx += 1;
|
||||
}
|
||||
|
||||
Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
|
||||
})
|
||||
}
|
||||
|
||||
/// The number of code units this string would use if it was encoded in
|
||||
/// UTF16. This runs in linear time.
|
||||
fn len_utf16(string: &str) -> usize {
|
||||
string.chars().map(char::len_utf16).sum()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
|
||||
|
||||
#[test]
|
||||
fn test_source_file_new() {
|
||||
let lines = Lines::new(TEST);
|
||||
assert_eq!(
|
||||
lines.0.lines,
|
||||
[
|
||||
Line { byte_idx: 0, utf16_idx: 0 },
|
||||
Line { byte_idx: 7, utf16_idx: 6 },
|
||||
Line { byte_idx: 15, utf16_idx: 12 },
|
||||
Line { byte_idx: 18, utf16_idx: 15 },
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_source_file_pos_to_line() {
|
||||
let lines = Lines::new(TEST);
|
||||
assert_eq!(lines.byte_to_line(0), Some(0));
|
||||
assert_eq!(lines.byte_to_line(2), Some(0));
|
||||
assert_eq!(lines.byte_to_line(6), Some(0));
|
||||
assert_eq!(lines.byte_to_line(7), Some(1));
|
||||
assert_eq!(lines.byte_to_line(8), Some(1));
|
||||
assert_eq!(lines.byte_to_line(12), Some(1));
|
||||
assert_eq!(lines.byte_to_line(21), Some(3));
|
||||
assert_eq!(lines.byte_to_line(22), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_source_file_pos_to_column() {
|
||||
let lines = Lines::new(TEST);
|
||||
assert_eq!(lines.byte_to_column(0), Some(0));
|
||||
assert_eq!(lines.byte_to_column(2), Some(1));
|
||||
assert_eq!(lines.byte_to_column(6), Some(5));
|
||||
assert_eq!(lines.byte_to_column(7), Some(0));
|
||||
assert_eq!(lines.byte_to_column(8), Some(1));
|
||||
assert_eq!(lines.byte_to_column(12), Some(2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_source_file_utf16() {
|
||||
#[track_caller]
|
||||
fn roundtrip(lines: &Lines<&str>, byte_idx: usize, utf16_idx: usize) {
|
||||
let middle = lines.byte_to_utf16(byte_idx).unwrap();
|
||||
let result = lines.utf16_to_byte(middle).unwrap();
|
||||
assert_eq!(middle, utf16_idx);
|
||||
assert_eq!(result, byte_idx);
|
||||
}
|
||||
|
||||
let lines = Lines::new(TEST);
|
||||
roundtrip(&lines, 0, 0);
|
||||
roundtrip(&lines, 2, 1);
|
||||
roundtrip(&lines, 3, 2);
|
||||
roundtrip(&lines, 8, 7);
|
||||
roundtrip(&lines, 12, 9);
|
||||
roundtrip(&lines, 21, 18);
|
||||
assert_eq!(lines.byte_to_utf16(22), None);
|
||||
assert_eq!(lines.utf16_to_byte(19), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_source_file_roundtrip() {
|
||||
#[track_caller]
|
||||
fn roundtrip(lines: &Lines<&str>, byte_idx: usize) {
|
||||
let line = lines.byte_to_line(byte_idx).unwrap();
|
||||
let column = lines.byte_to_column(byte_idx).unwrap();
|
||||
let result = lines.line_column_to_byte(line, column).unwrap();
|
||||
assert_eq!(result, byte_idx);
|
||||
}
|
||||
|
||||
let lines = Lines::new(TEST);
|
||||
roundtrip(&lines, 0);
|
||||
roundtrip(&lines, 7);
|
||||
roundtrip(&lines, 12);
|
||||
roundtrip(&lines, 21);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_source_file_edit() {
|
||||
// This tests only the non-parser parts. The reparsing itself is
|
||||
// tested separately.
|
||||
#[track_caller]
|
||||
fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
|
||||
let reference = Lines::new(after);
|
||||
|
||||
let mut edited = Lines::new(prev.to_string());
|
||||
edited.edit(range.clone(), with);
|
||||
assert_eq!(edited.text(), reference.text());
|
||||
assert_eq!(edited.0.lines, reference.0.lines);
|
||||
|
||||
let mut replaced = Lines::new(prev.to_string());
|
||||
replaced.replace(&{
|
||||
let mut s = prev.to_string();
|
||||
s.replace_range(range, with);
|
||||
s
|
||||
});
|
||||
assert_eq!(replaced.text(), reference.text());
|
||||
assert_eq!(replaced.0.lines, reference.0.lines);
|
||||
}
|
||||
|
||||
// Test inserting at the beginning.
|
||||
test("abc\n", 0..0, "hi\n", "hi\nabc\n");
|
||||
test("\nabc", 0..0, "hi\r", "hi\r\nabc");
|
||||
|
||||
// Test editing in the middle.
|
||||
test(TEST, 4..16, "❌", "ä\tc❌i\rjkl");
|
||||
|
||||
// Test appending.
|
||||
test("abc\ndef", 7..7, "hi", "abc\ndefhi");
|
||||
test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
|
||||
|
||||
// Test appending with adjoining \r and \n.
|
||||
test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
|
||||
|
||||
// Test removing everything.
|
||||
test(TEST, 0..21, "", "");
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: Hash> Hash for Lines<S> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.0.str.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsRef<str>> AsRef<str> for Lines<S> {
|
||||
fn as_ref(&self) -> &str {
|
||||
self.0.str.as_ref()
|
||||
}
|
||||
}
|
@ -2,14 +2,14 @@
|
||||
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::iter::zip;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use typst_utils::LazyHash;
|
||||
|
||||
use crate::lines::Lines;
|
||||
use crate::reparser::reparse;
|
||||
use crate::{is_newline, parse, FileId, LinkedNode, Span, SyntaxNode, VirtualPath};
|
||||
use crate::{parse, FileId, LinkedNode, Span, SyntaxNode, VirtualPath};
|
||||
|
||||
/// A source file.
|
||||
///
|
||||
@ -24,9 +24,8 @@ pub struct Source(Arc<Repr>);
|
||||
#[derive(Clone)]
|
||||
struct Repr {
|
||||
id: FileId,
|
||||
text: LazyHash<String>,
|
||||
root: LazyHash<SyntaxNode>,
|
||||
lines: Vec<Line>,
|
||||
lines: LazyHash<Lines<String>>,
|
||||
}
|
||||
|
||||
impl Source {
|
||||
@ -37,8 +36,7 @@ impl Source {
|
||||
root.numberize(id, Span::FULL).unwrap();
|
||||
Self(Arc::new(Repr {
|
||||
id,
|
||||
lines: lines(&text),
|
||||
text: LazyHash::new(text),
|
||||
lines: LazyHash::new(Lines::new(text)),
|
||||
root: LazyHash::new(root),
|
||||
}))
|
||||
}
|
||||
@ -58,9 +56,14 @@ impl Source {
|
||||
self.0.id
|
||||
}
|
||||
|
||||
/// The whole source as a string slice.
|
||||
pub fn lines(&self) -> Lines<String> {
|
||||
Lines::clone(&self.0.lines)
|
||||
}
|
||||
|
||||
/// The whole source as a string slice.
|
||||
pub fn text(&self) -> &str {
|
||||
&self.0.text
|
||||
&self.0.lines.text()
|
||||
}
|
||||
|
||||
/// Slice out the part of the source code enclosed by the range.
|
||||
@ -77,29 +80,12 @@ impl Source {
|
||||
/// Returns the range in the new source that was ultimately reparsed.
|
||||
pub fn replace(&mut self, new: &str) -> Range<usize> {
|
||||
let _scope = typst_timing::TimingScope::new("replace source");
|
||||
let old = self.text();
|
||||
|
||||
let mut prefix =
|
||||
zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count();
|
||||
|
||||
if prefix == old.len() && prefix == new.len() {
|
||||
let Some((prefix, suffix)) = self.0.lines.replacement_range(new) else {
|
||||
return 0..0;
|
||||
}
|
||||
|
||||
while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
|
||||
prefix -= 1;
|
||||
}
|
||||
|
||||
let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev())
|
||||
.take_while(|(x, y)| x == y)
|
||||
.count();
|
||||
|
||||
while !old.is_char_boundary(old.len() - suffix)
|
||||
|| !new.is_char_boundary(new.len() - suffix)
|
||||
{
|
||||
suffix += 1;
|
||||
}
|
||||
};
|
||||
|
||||
let old = self.text();
|
||||
let replace = prefix..old.len() - suffix;
|
||||
let with = &new[prefix..new.len() - suffix];
|
||||
self.edit(replace, with)
|
||||
@ -112,48 +98,28 @@ impl Source {
|
||||
/// The method panics if the `replace` range is out of bounds.
|
||||
#[track_caller]
|
||||
pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> {
|
||||
let start_byte = replace.start;
|
||||
let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
|
||||
let line = self.byte_to_line(start_byte).unwrap();
|
||||
|
||||
let inner = Arc::make_mut(&mut self.0);
|
||||
|
||||
// Update the text itself.
|
||||
inner.text.replace_range(replace.clone(), with);
|
||||
|
||||
// Remove invalidated line starts.
|
||||
inner.lines.truncate(line + 1);
|
||||
|
||||
// Handle adjoining of \r and \n.
|
||||
if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') {
|
||||
inner.lines.pop();
|
||||
}
|
||||
|
||||
// Recalculate the line starts after the edit.
|
||||
inner.lines.extend(lines_from(
|
||||
start_byte,
|
||||
start_utf16,
|
||||
&inner.text[start_byte..],
|
||||
));
|
||||
// Update the text and lines.
|
||||
inner.lines.edit(replace.clone(), with);
|
||||
|
||||
// Incrementally reparse the replaced range.
|
||||
reparse(&mut inner.root, &inner.text, replace, with.len())
|
||||
reparse(&mut inner.root, inner.lines.text(), replace, with.len())
|
||||
}
|
||||
|
||||
/// Get the length of the file in UTF-8 encoded bytes.
|
||||
pub fn len_bytes(&self) -> usize {
|
||||
self.text().len()
|
||||
self.0.lines.len_bytes()
|
||||
}
|
||||
|
||||
/// Get the length of the file in UTF-16 code units.
|
||||
pub fn len_utf16(&self) -> usize {
|
||||
let last = self.0.lines.last().unwrap();
|
||||
last.utf16_idx + len_utf16(&self.0.text[last.byte_idx..])
|
||||
self.0.lines.len_utf16()
|
||||
}
|
||||
|
||||
/// Get the length of the file in lines.
|
||||
pub fn len_lines(&self) -> usize {
|
||||
self.0.lines.len()
|
||||
self.0.lines.len_lines()
|
||||
}
|
||||
|
||||
/// Find the node with the given span.
|
||||
@ -171,85 +137,6 @@ impl Source {
|
||||
pub fn range(&self, span: Span) -> Option<Range<usize>> {
|
||||
Some(self.find(span)?.range())
|
||||
}
|
||||
|
||||
/// Return the index of the UTF-16 code unit at the byte index.
|
||||
pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
|
||||
let line_idx = self.byte_to_line(byte_idx)?;
|
||||
let line = self.0.lines.get(line_idx)?;
|
||||
let head = self.0.text.get(line.byte_idx..byte_idx)?;
|
||||
Some(line.utf16_idx + len_utf16(head))
|
||||
}
|
||||
|
||||
/// Return the index of the line that contains the given byte index.
|
||||
pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
|
||||
(byte_idx <= self.0.text.len()).then(|| {
|
||||
match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
|
||||
Ok(i) => i,
|
||||
Err(i) => i - 1,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the index of the column at the byte index.
|
||||
///
|
||||
/// The column is defined as the number of characters in the line before the
|
||||
/// byte index.
|
||||
pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
|
||||
let line = self.byte_to_line(byte_idx)?;
|
||||
let start = self.line_to_byte(line)?;
|
||||
let head = self.get(start..byte_idx)?;
|
||||
Some(head.chars().count())
|
||||
}
|
||||
|
||||
/// Return the byte index at the UTF-16 code unit.
|
||||
pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
|
||||
let line = self.0.lines.get(
|
||||
match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
|
||||
Ok(i) => i,
|
||||
Err(i) => i - 1,
|
||||
},
|
||||
)?;
|
||||
|
||||
let mut k = line.utf16_idx;
|
||||
for (i, c) in self.0.text[line.byte_idx..].char_indices() {
|
||||
if k >= utf16_idx {
|
||||
return Some(line.byte_idx + i);
|
||||
}
|
||||
k += c.len_utf16();
|
||||
}
|
||||
|
||||
(k == utf16_idx).then_some(self.0.text.len())
|
||||
}
|
||||
|
||||
/// Return the byte position at which the given line starts.
|
||||
pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
|
||||
self.0.lines.get(line_idx).map(|line| line.byte_idx)
|
||||
}
|
||||
|
||||
/// Return the range which encloses the given line.
|
||||
pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
|
||||
let start = self.line_to_byte(line_idx)?;
|
||||
let end = self.line_to_byte(line_idx + 1).unwrap_or(self.0.text.len());
|
||||
Some(start..end)
|
||||
}
|
||||
|
||||
/// Return the byte index of the given (line, column) pair.
|
||||
///
|
||||
/// The column defines the number of characters to go beyond the start of
|
||||
/// the line.
|
||||
pub fn line_column_to_byte(
|
||||
&self,
|
||||
line_idx: usize,
|
||||
column_idx: usize,
|
||||
) -> Option<usize> {
|
||||
let range = self.line_to_range(line_idx)?;
|
||||
let line = self.get(range.clone())?;
|
||||
let mut chars = line.chars();
|
||||
for _ in 0..column_idx {
|
||||
chars.next();
|
||||
}
|
||||
Some(range.start + (line.len() - chars.as_str().len()))
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Source {
|
||||
@ -261,7 +148,7 @@ impl Debug for Source {
|
||||
impl Hash for Source {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.0.id.hash(state);
|
||||
self.0.text.hash(state);
|
||||
self.0.lines.hash(state);
|
||||
self.0.root.hash(state);
|
||||
}
|
||||
}
|
||||
@ -271,176 +158,3 @@ impl AsRef<str> for Source {
|
||||
self.text()
|
||||
}
|
||||
}
|
||||
|
||||
/// Metadata about a line.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
struct Line {
|
||||
/// The UTF-8 byte offset where the line starts.
|
||||
byte_idx: usize,
|
||||
/// The UTF-16 codepoint offset where the line starts.
|
||||
utf16_idx: usize,
|
||||
}
|
||||
|
||||
/// Create a line vector.
|
||||
fn lines(text: &str) -> Vec<Line> {
|
||||
std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
|
||||
.chain(lines_from(0, 0, text))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Compute a line iterator from an offset.
|
||||
fn lines_from(
|
||||
byte_offset: usize,
|
||||
utf16_offset: usize,
|
||||
text: &str,
|
||||
) -> impl Iterator<Item = Line> + '_ {
|
||||
let mut s = unscanny::Scanner::new(text);
|
||||
let mut utf16_idx = utf16_offset;
|
||||
|
||||
std::iter::from_fn(move || {
|
||||
s.eat_until(|c: char| {
|
||||
utf16_idx += c.len_utf16();
|
||||
is_newline(c)
|
||||
});
|
||||
|
||||
if s.done() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if s.eat() == Some('\r') && s.eat_if('\n') {
|
||||
utf16_idx += 1;
|
||||
}
|
||||
|
||||
Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
|
||||
})
|
||||
}
|
||||
|
||||
/// The number of code units this string would use if it was encoded in
|
||||
/// UTF16. This runs in linear time.
|
||||
fn len_utf16(string: &str) -> usize {
|
||||
string.chars().map(char::len_utf16).sum()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
|
||||
|
||||
#[test]
|
||||
fn test_source_file_new() {
|
||||
let source = Source::detached(TEST);
|
||||
assert_eq!(
|
||||
source.0.lines,
|
||||
[
|
||||
Line { byte_idx: 0, utf16_idx: 0 },
|
||||
Line { byte_idx: 7, utf16_idx: 6 },
|
||||
Line { byte_idx: 15, utf16_idx: 12 },
|
||||
Line { byte_idx: 18, utf16_idx: 15 },
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_source_file_pos_to_line() {
|
||||
let source = Source::detached(TEST);
|
||||
assert_eq!(source.byte_to_line(0), Some(0));
|
||||
assert_eq!(source.byte_to_line(2), Some(0));
|
||||
assert_eq!(source.byte_to_line(6), Some(0));
|
||||
assert_eq!(source.byte_to_line(7), Some(1));
|
||||
assert_eq!(source.byte_to_line(8), Some(1));
|
||||
assert_eq!(source.byte_to_line(12), Some(1));
|
||||
assert_eq!(source.byte_to_line(21), Some(3));
|
||||
assert_eq!(source.byte_to_line(22), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_source_file_pos_to_column() {
|
||||
let source = Source::detached(TEST);
|
||||
assert_eq!(source.byte_to_column(0), Some(0));
|
||||
assert_eq!(source.byte_to_column(2), Some(1));
|
||||
assert_eq!(source.byte_to_column(6), Some(5));
|
||||
assert_eq!(source.byte_to_column(7), Some(0));
|
||||
assert_eq!(source.byte_to_column(8), Some(1));
|
||||
assert_eq!(source.byte_to_column(12), Some(2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_source_file_utf16() {
|
||||
#[track_caller]
|
||||
fn roundtrip(source: &Source, byte_idx: usize, utf16_idx: usize) {
|
||||
let middle = source.byte_to_utf16(byte_idx).unwrap();
|
||||
let result = source.utf16_to_byte(middle).unwrap();
|
||||
assert_eq!(middle, utf16_idx);
|
||||
assert_eq!(result, byte_idx);
|
||||
}
|
||||
|
||||
let source = Source::detached(TEST);
|
||||
roundtrip(&source, 0, 0);
|
||||
roundtrip(&source, 2, 1);
|
||||
roundtrip(&source, 3, 2);
|
||||
roundtrip(&source, 8, 7);
|
||||
roundtrip(&source, 12, 9);
|
||||
roundtrip(&source, 21, 18);
|
||||
assert_eq!(source.byte_to_utf16(22), None);
|
||||
assert_eq!(source.utf16_to_byte(19), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_source_file_roundtrip() {
|
||||
#[track_caller]
|
||||
fn roundtrip(source: &Source, byte_idx: usize) {
|
||||
let line = source.byte_to_line(byte_idx).unwrap();
|
||||
let column = source.byte_to_column(byte_idx).unwrap();
|
||||
let result = source.line_column_to_byte(line, column).unwrap();
|
||||
assert_eq!(result, byte_idx);
|
||||
}
|
||||
|
||||
let source = Source::detached(TEST);
|
||||
roundtrip(&source, 0);
|
||||
roundtrip(&source, 7);
|
||||
roundtrip(&source, 12);
|
||||
roundtrip(&source, 21);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_source_file_edit() {
|
||||
// This tests only the non-parser parts. The reparsing itself is
|
||||
// tested separately.
|
||||
#[track_caller]
|
||||
fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
|
||||
let reference = Source::detached(after);
|
||||
|
||||
let mut edited = Source::detached(prev);
|
||||
edited.edit(range.clone(), with);
|
||||
assert_eq!(edited.text(), reference.text());
|
||||
assert_eq!(edited.0.lines, reference.0.lines);
|
||||
|
||||
let mut replaced = Source::detached(prev);
|
||||
replaced.replace(&{
|
||||
let mut s = prev.to_string();
|
||||
s.replace_range(range, with);
|
||||
s
|
||||
});
|
||||
assert_eq!(replaced.text(), reference.text());
|
||||
assert_eq!(replaced.0.lines, reference.0.lines);
|
||||
}
|
||||
|
||||
// Test inserting at the beginning.
|
||||
test("abc\n", 0..0, "hi\n", "hi\nabc\n");
|
||||
test("\nabc", 0..0, "hi\r", "hi\r\nabc");
|
||||
|
||||
// Test editing in the middle.
|
||||
test(TEST, 4..16, "❌", "ä\tc❌i\rjkl");
|
||||
|
||||
// Test appending.
|
||||
test("abc\ndef", 7..7, "hi", "abc\ndefhi");
|
||||
test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
|
||||
|
||||
// Test appending with adjoining \r and \n.
|
||||
test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
|
||||
|
||||
// Test removing everything.
|
||||
test(TEST, 0..21, "", "");
|
||||
}
|
||||
}
|
||||
|
@ -6,9 +6,11 @@ use std::str::FromStr;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use ecow::{eco_format, EcoString};
|
||||
use typst::loading::LineCol;
|
||||
use typst::diag::LineCol;
|
||||
use typst_syntax::package::PackageVersion;
|
||||
use typst_syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
|
||||
use typst_syntax::{
|
||||
is_id_continue, is_ident, is_newline, FileId, Lines, Source, VirtualPath,
|
||||
};
|
||||
use unscanny::Scanner;
|
||||
|
||||
use crate::world::{read, system_path};
|
||||
@ -426,11 +428,17 @@ impl<'a> Parser<'a> {
|
||||
}
|
||||
|
||||
let start = self.parse_line_col()?;
|
||||
let lines = Lines::from_bytes(text.as_ref()).expect("Errors shouldn't be annotated for files that aren't human readable (not valid utf-8)");
|
||||
let range = if self.s.eat_if('-') {
|
||||
let end = self.parse_line_col()?;
|
||||
LineCol::byte_range(start..end, &text)
|
||||
let (line, col) = start.indices();
|
||||
let start = lines.line_column_to_byte(line, col);
|
||||
let (line, col) = end.indices();
|
||||
let end = lines.line_column_to_byte(line, col);
|
||||
Option::zip(start, end).map(|(a, b)| a..b)
|
||||
} else {
|
||||
start.byte_pos(&text).map(|i| i..i)
|
||||
let (line, col) = start.indices();
|
||||
lines.line_column_to_byte(line, col).map(|i| i..i)
|
||||
};
|
||||
if range.is_none() {
|
||||
self.error("range is out of bounds");
|
||||
@ -484,13 +492,13 @@ impl<'a> Parser<'a> {
|
||||
let line_idx = (line_idx_in_test + comments).checked_add_signed(line_delta)?;
|
||||
let column_idx = if column < 0 {
|
||||
// Negative column index is from the back.
|
||||
let range = source.line_to_range(line_idx)?;
|
||||
let range = source.lines().line_to_range(line_idx)?;
|
||||
text[range].chars().count().saturating_add_signed(column)
|
||||
} else {
|
||||
usize::try_from(column).ok()?.checked_sub(1)?
|
||||
};
|
||||
|
||||
source.line_column_to_byte(line_idx, column_idx)
|
||||
source.lines().line_column_to_byte(line_idx, column_idx)
|
||||
}
|
||||
|
||||
/// Parse a number.
|
||||
|
@ -7,11 +7,10 @@ use tiny_skia as sk;
|
||||
use typst::diag::{SourceDiagnostic, Warned};
|
||||
use typst::html::HtmlDocument;
|
||||
use typst::layout::{Abs, Frame, FrameItem, PagedDocument, Transform};
|
||||
use typst::loading::LineCol;
|
||||
use typst::visualize::Color;
|
||||
use typst::{Document, World, WorldExt};
|
||||
use typst_pdf::PdfOptions;
|
||||
use typst_syntax::FileId;
|
||||
use typst_syntax::{FileId, Lines};
|
||||
|
||||
use crate::collect::{Attr, FileSize, NoteKind, Test};
|
||||
use crate::logger::TestResult;
|
||||
@ -329,12 +328,12 @@ impl<'a> Runner<'a> {
|
||||
fn format_pos(&self, file: FileId, pos: usize) -> String {
|
||||
let res = if file != self.test.source.id() {
|
||||
let bytes = self.world.file(file).unwrap();
|
||||
LineCol::from_byte_pos(pos, &bytes).map(|l| l.numbers())
|
||||
let lines = Lines::from_bytes(&bytes).unwrap();
|
||||
lines.byte_to_line_column(pos).map(|(line, col)| (line + 1, col + 1))
|
||||
} else {
|
||||
let line = self.test.source.byte_to_line(pos).map(|l| l + 1);
|
||||
let col = (self.test.source.byte_to_column(pos))
|
||||
.map(|c| self.test.pos.line + c + 1);
|
||||
Option::zip(line, col)
|
||||
(self.test.source.lines())
|
||||
.byte_to_line_column(pos)
|
||||
.map(|(line, col)| (line + 1, col + 1))
|
||||
};
|
||||
let Some((line, col)) = res else {
|
||||
return "oob".into();
|
||||
|
Loading…
x
Reference in New Issue
Block a user