mirror of
https://github.com/typst/typst
synced 2025-08-24 19:54:14 +08:00
feat: try to report line and column in files that contain invalid utf-8
This commit is contained in:
parent
e5d8f02554
commit
23f1c86b84
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3112,6 +3112,7 @@ dependencies = [
|
|||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
"unscanny",
|
"unscanny",
|
||||||
"usvg",
|
"usvg",
|
||||||
|
"utf8_iter",
|
||||||
"wasmi",
|
"wasmi",
|
||||||
"xmlwriter",
|
"xmlwriter",
|
||||||
]
|
]
|
||||||
|
@ -135,6 +135,7 @@ unicode-segmentation = "1"
|
|||||||
unscanny = "0.1"
|
unscanny = "0.1"
|
||||||
ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
|
ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
|
||||||
usvg = { version = "0.45", default-features = false, features = ["text"] }
|
usvg = { version = "0.45", default-features = false, features = ["text"] }
|
||||||
|
utf8_iter = "1.0.4"
|
||||||
walkdir = "2"
|
walkdir = "2"
|
||||||
wasmi = "0.40.0"
|
wasmi = "0.40.0"
|
||||||
web-sys = "0.3"
|
web-sys = "0.3"
|
||||||
|
@ -66,6 +66,7 @@ unicode-normalization = { workspace = true }
|
|||||||
unicode-segmentation = { workspace = true }
|
unicode-segmentation = { workspace = true }
|
||||||
unscanny = { workspace = true }
|
unscanny = { workspace = true }
|
||||||
usvg = { workspace = true }
|
usvg = { workspace = true }
|
||||||
|
utf8_iter = { workspace = true }
|
||||||
wasmi = { workspace = true }
|
wasmi = { workspace = true }
|
||||||
xmlwriter = { workspace = true }
|
xmlwriter = { workspace = true }
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@ use comemo::Tracked;
|
|||||||
use ecow::{eco_vec, EcoVec};
|
use ecow::{eco_vec, EcoVec};
|
||||||
use typst_syntax::package::{PackageSpec, PackageVersion};
|
use typst_syntax::package::{PackageSpec, PackageVersion};
|
||||||
use typst_syntax::{Lines, Span, Spanned, SyntaxError};
|
use typst_syntax::{Lines, Span, Spanned, SyntaxError};
|
||||||
|
use utf8_iter::ErrorReportingUtf8Chars;
|
||||||
|
|
||||||
use crate::engine::Engine;
|
use crate::engine::Engine;
|
||||||
use crate::loading::{LoadSource, Loaded};
|
use crate::loading::{LoadSource, Loaded};
|
||||||
@ -577,12 +578,12 @@ impl Loaded {
|
|||||||
msg: impl std::fmt::Display,
|
msg: impl std::fmt::Display,
|
||||||
error: impl std::fmt::Display,
|
error: impl std::fmt::Display,
|
||||||
) -> EcoVec<SourceDiagnostic> {
|
) -> EcoVec<SourceDiagnostic> {
|
||||||
|
let pos = pos.into();
|
||||||
let lines = Lines::from_bytes(&self.bytes);
|
let lines = Lines::from_bytes(&self.bytes);
|
||||||
match (self.source.v, lines) {
|
match (self.source.v, lines) {
|
||||||
// Only report an error in an external file,
|
// Only report an error in an external file,
|
||||||
// if it is human readable (valid utf-8).
|
// if it is human readable (valid utf-8).
|
||||||
(LoadSource::Path(file_id), Ok(lines)) => {
|
(LoadSource::Path(file_id), Ok(lines)) => {
|
||||||
let pos = pos.into();
|
|
||||||
if let Some(range) = pos.range(&lines) {
|
if let Some(range) = pos.range(&lines) {
|
||||||
let span = Span::from_range(file_id, range);
|
let span = Span::from_range(file_id, range);
|
||||||
return eco_vec!(error!(span, "{msg} ({error})"));
|
return eco_vec!(error!(span, "{msg} ({error})"));
|
||||||
@ -600,20 +601,28 @@ impl Loaded {
|
|||||||
};
|
};
|
||||||
eco_vec![error]
|
eco_vec![error]
|
||||||
}
|
}
|
||||||
_ => self.err_in_bytes(pos, msg, error),
|
(_, Ok(lines)) => {
|
||||||
|
let error = if let Some(pair) = pos.line_col(&lines) {
|
||||||
|
let (line, col) = pair.numbers();
|
||||||
|
error!(self.source.span, "{msg} ({error} at {line}:{col})")
|
||||||
|
} else {
|
||||||
|
error!(self.source.span, "{msg} ({error})")
|
||||||
|
};
|
||||||
|
eco_vec![error]
|
||||||
|
}
|
||||||
|
_ => self.err_in_invalid_text(pos, msg, error),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Report an error, possibly in an external file.
|
/// Report an error, possibly in an external file.
|
||||||
pub fn err_in_bytes(
|
pub fn err_in_invalid_text(
|
||||||
&self,
|
&self,
|
||||||
pos: impl Into<ReportPos>,
|
pos: impl Into<ReportPos>,
|
||||||
msg: impl std::fmt::Display,
|
msg: impl std::fmt::Display,
|
||||||
error: impl std::fmt::Display,
|
error: impl std::fmt::Display,
|
||||||
) -> EcoVec<SourceDiagnostic> {
|
) -> EcoVec<SourceDiagnostic> {
|
||||||
let pos = pos.into();
|
let pos = pos.into();
|
||||||
let result = Lines::from_bytes(&self.bytes).ok().and_then(|l| pos.line_col(&l));
|
let error = if let Some(pair) = pos.try_line_col(&self.bytes) {
|
||||||
let error = if let Some(pair) = result {
|
|
||||||
let (line, col) = pair.numbers();
|
let (line, col) = pair.numbers();
|
||||||
error!(self.source.span, "{msg} ({error} at {line}:{col})")
|
error!(self.source.span, "{msg} ({error} at {line}:{col})")
|
||||||
} else {
|
} else {
|
||||||
@ -671,6 +680,17 @@ impl ReportPos {
|
|||||||
ReportPos::None => None,
|
ReportPos::None => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Either get the the line/column pair, or try to compute it from possibly
|
||||||
|
/// invalid utf-8 data.
|
||||||
|
fn try_line_col(&self, bytes: &[u8]) -> Option<LineCol> {
|
||||||
|
match self {
|
||||||
|
&ReportPos::Full(_, pair) => Some(pair),
|
||||||
|
ReportPos::Range(range) => LineCol::try_from_byte_pos(range.start, bytes),
|
||||||
|
&ReportPos::LineCol(pair) => Some(pair),
|
||||||
|
ReportPos::None => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A line/column pair.
|
/// A line/column pair.
|
||||||
@ -696,6 +716,20 @@ impl LineCol {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Try to compute a line/column pair from possibly invalid utf-8 data.
|
||||||
|
pub fn try_from_byte_pos(pos: usize, bytes: &[u8]) -> Option<Self> {
|
||||||
|
let bytes = &bytes[..pos];
|
||||||
|
let mut line = 0;
|
||||||
|
let line_start = memchr::memchr_iter(b'\n', bytes)
|
||||||
|
.inspect(|_| line += 1)
|
||||||
|
.last()
|
||||||
|
.map(|i| i + 1)
|
||||||
|
.unwrap_or(bytes.len());
|
||||||
|
|
||||||
|
let col = ErrorReportingUtf8Chars::new(&bytes[line_start..]).count();
|
||||||
|
Some(LineCol::zero_based(line, col))
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the 0-based line/column indices.
|
/// Returns the 0-based line/column indices.
|
||||||
pub fn indices(&self) -> (usize, usize) {
|
pub fn indices(&self) -> (usize, usize) {
|
||||||
(self.line, self.col)
|
(self.line, self.col)
|
||||||
|
@ -145,7 +145,7 @@ impl Loaded {
|
|||||||
let start = err.valid_up_to();
|
let start = err.valid_up_to();
|
||||||
let end = start + err.error_len().unwrap_or(0);
|
let end = start + err.error_len().unwrap_or(0);
|
||||||
// always report this error in the source file.
|
// always report this error in the source file.
|
||||||
self.err_in_bytes(
|
self.err_in_invalid_text(
|
||||||
start..end,
|
start..end,
|
||||||
"failed to convert to string",
|
"failed to convert to string",
|
||||||
FileError::from(err),
|
FileError::from(err),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user