mirror of
https://github.com/typst/typst
synced 2025-08-24 03:34:14 +08:00
feat: try to report line and column in files that contain invalid utf-8
This commit is contained in:
parent
e5d8f02554
commit
23f1c86b84
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3112,6 +3112,7 @@ dependencies = [
|
||||
"unicode-segmentation",
|
||||
"unscanny",
|
||||
"usvg",
|
||||
"utf8_iter",
|
||||
"wasmi",
|
||||
"xmlwriter",
|
||||
]
|
||||
|
@ -135,6 +135,7 @@ unicode-segmentation = "1"
|
||||
unscanny = "0.1"
|
||||
ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
|
||||
usvg = { version = "0.45", default-features = false, features = ["text"] }
|
||||
utf8_iter = "1.0.4"
|
||||
walkdir = "2"
|
||||
wasmi = "0.40.0"
|
||||
web-sys = "0.3"
|
||||
|
@ -66,6 +66,7 @@ unicode-normalization = { workspace = true }
|
||||
unicode-segmentation = { workspace = true }
|
||||
unscanny = { workspace = true }
|
||||
usvg = { workspace = true }
|
||||
utf8_iter = { workspace = true }
|
||||
wasmi = { workspace = true }
|
||||
xmlwriter = { workspace = true }
|
||||
|
||||
|
@ -10,6 +10,7 @@ use comemo::Tracked;
|
||||
use ecow::{eco_vec, EcoVec};
|
||||
use typst_syntax::package::{PackageSpec, PackageVersion};
|
||||
use typst_syntax::{Lines, Span, Spanned, SyntaxError};
|
||||
use utf8_iter::ErrorReportingUtf8Chars;
|
||||
|
||||
use crate::engine::Engine;
|
||||
use crate::loading::{LoadSource, Loaded};
|
||||
@ -577,12 +578,12 @@ impl Loaded {
|
||||
msg: impl std::fmt::Display,
|
||||
error: impl std::fmt::Display,
|
||||
) -> EcoVec<SourceDiagnostic> {
|
||||
let pos = pos.into();
|
||||
let lines = Lines::from_bytes(&self.bytes);
|
||||
match (self.source.v, lines) {
|
||||
// Only report an error in an external file,
|
||||
// if it is human readable (valid utf-8).
|
||||
(LoadSource::Path(file_id), Ok(lines)) => {
|
||||
let pos = pos.into();
|
||||
if let Some(range) = pos.range(&lines) {
|
||||
let span = Span::from_range(file_id, range);
|
||||
return eco_vec!(error!(span, "{msg} ({error})"));
|
||||
@ -600,20 +601,28 @@ impl Loaded {
|
||||
};
|
||||
eco_vec![error]
|
||||
}
|
||||
_ => self.err_in_bytes(pos, msg, error),
|
||||
(_, Ok(lines)) => {
|
||||
let error = if let Some(pair) = pos.line_col(&lines) {
|
||||
let (line, col) = pair.numbers();
|
||||
error!(self.source.span, "{msg} ({error} at {line}:{col})")
|
||||
} else {
|
||||
error!(self.source.span, "{msg} ({error})")
|
||||
};
|
||||
eco_vec![error]
|
||||
}
|
||||
_ => self.err_in_invalid_text(pos, msg, error),
|
||||
}
|
||||
}
|
||||
|
||||
/// Report an error, possibly in an external file.
|
||||
pub fn err_in_bytes(
|
||||
pub fn err_in_invalid_text(
|
||||
&self,
|
||||
pos: impl Into<ReportPos>,
|
||||
msg: impl std::fmt::Display,
|
||||
error: impl std::fmt::Display,
|
||||
) -> EcoVec<SourceDiagnostic> {
|
||||
let pos = pos.into();
|
||||
let result = Lines::from_bytes(&self.bytes).ok().and_then(|l| pos.line_col(&l));
|
||||
let error = if let Some(pair) = result {
|
||||
let error = if let Some(pair) = pos.try_line_col(&self.bytes) {
|
||||
let (line, col) = pair.numbers();
|
||||
error!(self.source.span, "{msg} ({error} at {line}:{col})")
|
||||
} else {
|
||||
@ -671,6 +680,17 @@ impl ReportPos {
|
||||
ReportPos::None => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Either get the the line/column pair, or try to compute it from possibly
|
||||
/// invalid utf-8 data.
|
||||
fn try_line_col(&self, bytes: &[u8]) -> Option<LineCol> {
|
||||
match self {
|
||||
&ReportPos::Full(_, pair) => Some(pair),
|
||||
ReportPos::Range(range) => LineCol::try_from_byte_pos(range.start, bytes),
|
||||
&ReportPos::LineCol(pair) => Some(pair),
|
||||
ReportPos::None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A line/column pair.
|
||||
@ -696,6 +716,20 @@ impl LineCol {
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to compute a line/column pair from possibly invalid utf-8 data.
|
||||
pub fn try_from_byte_pos(pos: usize, bytes: &[u8]) -> Option<Self> {
|
||||
let bytes = &bytes[..pos];
|
||||
let mut line = 0;
|
||||
let line_start = memchr::memchr_iter(b'\n', bytes)
|
||||
.inspect(|_| line += 1)
|
||||
.last()
|
||||
.map(|i| i + 1)
|
||||
.unwrap_or(bytes.len());
|
||||
|
||||
let col = ErrorReportingUtf8Chars::new(&bytes[line_start..]).count();
|
||||
Some(LineCol::zero_based(line, col))
|
||||
}
|
||||
|
||||
/// Returns the 0-based line/column indices.
|
||||
pub fn indices(&self) -> (usize, usize) {
|
||||
(self.line, self.col)
|
||||
|
@ -145,7 +145,7 @@ impl Loaded {
|
||||
let start = err.valid_up_to();
|
||||
let end = start + err.error_len().unwrap_or(0);
|
||||
// always report this error in the source file.
|
||||
self.err_in_bytes(
|
||||
self.err_in_invalid_text(
|
||||
start..end,
|
||||
"failed to convert to string",
|
||||
FileError::from(err),
|
||||
|
Loading…
x
Reference in New Issue
Block a user