mirror of
https://github.com/typst/typst
synced 2025-08-24 03:34:14 +08:00
experiment with utf8_iter
This commit is contained in:
parent
a79120b668
commit
3879d0826a
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3112,6 +3112,7 @@ dependencies = [
|
||||
"unicode-segmentation",
|
||||
"unscanny",
|
||||
"usvg",
|
||||
"utf8_iter",
|
||||
"wasmi",
|
||||
"xmlwriter",
|
||||
]
|
||||
|
@ -135,6 +135,7 @@ unicode-segmentation = "1"
|
||||
unscanny = "0.1"
|
||||
ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
|
||||
usvg = { version = "0.45", default-features = false, features = ["text"] }
|
||||
utf8_iter = "1.0.4"
|
||||
walkdir = "2"
|
||||
wasmi = "0.40.0"
|
||||
web-sys = "0.3"
|
||||
|
@ -66,6 +66,7 @@ unicode-normalization = { workspace = true }
|
||||
unicode-segmentation = { workspace = true }
|
||||
unscanny = { workspace = true }
|
||||
usvg = { workspace = true }
|
||||
utf8_iter = { workspace = true }
|
||||
wasmi = { workspace = true }
|
||||
xmlwriter = { workspace = true }
|
||||
|
||||
|
@ -18,6 +18,7 @@ mod yaml_;
|
||||
use comemo::Tracked;
|
||||
use ecow::{eco_vec, EcoString, EcoVec};
|
||||
use typst_syntax::{FileId, Span, Spanned};
|
||||
use utf8_iter::ErrorReportingUtf8Chars;
|
||||
|
||||
pub use self::cbor_::*;
|
||||
pub use self::csv_::*;
|
||||
@ -260,8 +261,6 @@ impl LineCol {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this function should only return None if the position is out of
|
||||
// bounds not if there is invalid utf-8
|
||||
pub fn from_byte_pos(pos: usize, bytes: &[u8]) -> Option<Self> {
|
||||
let bytes = &bytes[..pos];
|
||||
let mut line = 0;
|
||||
@ -270,10 +269,9 @@ impl LineCol {
|
||||
.last()
|
||||
.map(|i| i + 1)
|
||||
.unwrap_or(bytes.len());
|
||||
// TODO: streaming-utf8 decoding ignore invalid characters
|
||||
// might neeed to update error reporting too (use utf8_iter)
|
||||
let str = std::str::from_utf8(&bytes[line_start..]).ok()?;
|
||||
let col = str.chars().count();
|
||||
|
||||
// Try to compute a column even if the string isn't valid utf-8.
|
||||
let col = ErrorReportingUtf8Chars::new(&bytes[line_start..]).count();
|
||||
Some(LineCol::zero_based(line, col))
|
||||
}
|
||||
|
||||
@ -319,18 +317,15 @@ impl LineCol {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this function should only return None if the position is out of
|
||||
// bounds not if there is invalid utf-8
|
||||
fn col_offset(line_offset: usize, col: usize, bytes: &[u8]) -> Option<usize> {
|
||||
let line = &bytes[line_offset..];
|
||||
// TODO: streaming-utf8 decoding ignore invalid characters
|
||||
// might neeed to update error reporting too (use utf8_iter)
|
||||
|
||||
// validate the whole line, so it can be displayed
|
||||
let len = memchr::memchr(b'\n', line).unwrap_or(line.len());
|
||||
let str = std::str::from_utf8(&line[..len]).ok()?;
|
||||
if let Some(idx) = col.checked_sub(1) {
|
||||
str.char_indices().nth(idx).map(|(i, c)| i + c.len_utf8())
|
||||
// Try to compute position even if the string isn't valid utf-8.
|
||||
let mut iter = ErrorReportingUtf8Chars::new(line);
|
||||
_ = iter.nth(idx)?;
|
||||
Some(line.len() - iter.as_slice().len())
|
||||
} else {
|
||||
Some(0)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user