mirror of
https://github.com/typst/typst
synced 2025-08-24 19:54:14 +08:00
experiment with utf8_iter
This commit is contained in:
parent
a79120b668
commit
3879d0826a
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3112,6 +3112,7 @@ dependencies = [
|
|||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
"unscanny",
|
"unscanny",
|
||||||
"usvg",
|
"usvg",
|
||||||
|
"utf8_iter",
|
||||||
"wasmi",
|
"wasmi",
|
||||||
"xmlwriter",
|
"xmlwriter",
|
||||||
]
|
]
|
||||||
|
@ -135,6 +135,7 @@ unicode-segmentation = "1"
|
|||||||
unscanny = "0.1"
|
unscanny = "0.1"
|
||||||
ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
|
ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
|
||||||
usvg = { version = "0.45", default-features = false, features = ["text"] }
|
usvg = { version = "0.45", default-features = false, features = ["text"] }
|
||||||
|
utf8_iter = "1.0.4"
|
||||||
walkdir = "2"
|
walkdir = "2"
|
||||||
wasmi = "0.40.0"
|
wasmi = "0.40.0"
|
||||||
web-sys = "0.3"
|
web-sys = "0.3"
|
||||||
|
@ -66,6 +66,7 @@ unicode-normalization = { workspace = true }
|
|||||||
unicode-segmentation = { workspace = true }
|
unicode-segmentation = { workspace = true }
|
||||||
unscanny = { workspace = true }
|
unscanny = { workspace = true }
|
||||||
usvg = { workspace = true }
|
usvg = { workspace = true }
|
||||||
|
utf8_iter = { workspace = true }
|
||||||
wasmi = { workspace = true }
|
wasmi = { workspace = true }
|
||||||
xmlwriter = { workspace = true }
|
xmlwriter = { workspace = true }
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@ mod yaml_;
|
|||||||
use comemo::Tracked;
|
use comemo::Tracked;
|
||||||
use ecow::{eco_vec, EcoString, EcoVec};
|
use ecow::{eco_vec, EcoString, EcoVec};
|
||||||
use typst_syntax::{FileId, Span, Spanned};
|
use typst_syntax::{FileId, Span, Spanned};
|
||||||
|
use utf8_iter::ErrorReportingUtf8Chars;
|
||||||
|
|
||||||
pub use self::cbor_::*;
|
pub use self::cbor_::*;
|
||||||
pub use self::csv_::*;
|
pub use self::csv_::*;
|
||||||
@ -260,8 +261,6 @@ impl LineCol {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: this function should only return None if the position is out of
|
|
||||||
// bounds not if there is invalid utf-8
|
|
||||||
pub fn from_byte_pos(pos: usize, bytes: &[u8]) -> Option<Self> {
|
pub fn from_byte_pos(pos: usize, bytes: &[u8]) -> Option<Self> {
|
||||||
let bytes = &bytes[..pos];
|
let bytes = &bytes[..pos];
|
||||||
let mut line = 0;
|
let mut line = 0;
|
||||||
@ -270,10 +269,9 @@ impl LineCol {
|
|||||||
.last()
|
.last()
|
||||||
.map(|i| i + 1)
|
.map(|i| i + 1)
|
||||||
.unwrap_or(bytes.len());
|
.unwrap_or(bytes.len());
|
||||||
// TODO: streaming-utf8 decoding ignore invalid characters
|
|
||||||
// might neeed to update error reporting too (use utf8_iter)
|
// Try to compute a column even if the string isn't valid utf-8.
|
||||||
let str = std::str::from_utf8(&bytes[line_start..]).ok()?;
|
let col = ErrorReportingUtf8Chars::new(&bytes[line_start..]).count();
|
||||||
let col = str.chars().count();
|
|
||||||
Some(LineCol::zero_based(line, col))
|
Some(LineCol::zero_based(line, col))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -319,18 +317,15 @@ impl LineCol {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: this function should only return None if the position is out of
|
|
||||||
// bounds not if there is invalid utf-8
|
|
||||||
fn col_offset(line_offset: usize, col: usize, bytes: &[u8]) -> Option<usize> {
|
fn col_offset(line_offset: usize, col: usize, bytes: &[u8]) -> Option<usize> {
|
||||||
let line = &bytes[line_offset..];
|
let line = &bytes[line_offset..];
|
||||||
// TODO: streaming-utf8 decoding ignore invalid characters
|
// TODO: streaming-utf8 decoding ignore invalid characters
|
||||||
// might neeed to update error reporting too (use utf8_iter)
|
// might neeed to update error reporting too (use utf8_iter)
|
||||||
|
|
||||||
// validate the whole line, so it can be displayed
|
|
||||||
let len = memchr::memchr(b'\n', line).unwrap_or(line.len());
|
|
||||||
let str = std::str::from_utf8(&line[..len]).ok()?;
|
|
||||||
if let Some(idx) = col.checked_sub(1) {
|
if let Some(idx) = col.checked_sub(1) {
|
||||||
str.char_indices().nth(idx).map(|(i, c)| i + c.len_utf8())
|
// Try to compute position even if the string isn't valid utf-8.
|
||||||
|
let mut iter = ErrorReportingUtf8Chars::new(line);
|
||||||
|
_ = iter.nth(idx)?;
|
||||||
|
Some(line.len() - iter.as_slice().len())
|
||||||
} else {
|
} else {
|
||||||
Some(0)
|
Some(0)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user