refactor: factor out a general Lines struct from Source

2025-08-24 19:54:14 +08:00 · 2025-05-19 13:14:44 +02:00 · 2025-05-19 13:14:44 +02:00 · e5d8f02554
commit e5d8f02554
parent 2e2f646f2a
21 changed files with 645 additions and 525 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3112,7 +3112,6 @@ dependencies = [
 "unicode-segmentation",
 "unscanny",
 "usvg",
 "utf8_iter",
 "wasmi",
 "xmlwriter",
 ]
@ -3201,6 +3200,7 @@ dependencies = [
 name = "typst-syntax"
 version = "0.13.1"
 dependencies = [
 "comemo",
 "ecow",
 "serde",
 "toml",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -135,7 +135,6 @@ unicode-segmentation = "1"
 unscanny = "0.1"
 ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
 usvg = { version = "0.45", default-features = false, features = ["text"] }
 utf8_iter = "1.0.4"
 walkdir = "2"
 wasmi = "0.40.0"
 web-sys = "0.3"
--- a/crates/typst-cli/src/compile.rs
+++ b/crates/typst-cli/src/compile.rs
@ -16,7 +16,7 @@ use typst::diag::{
 use typst::foundations::{Datetime, Smart};
 use typst::html::HtmlDocument;
 use typst::layout::{Frame, Page, PageRanges, PagedDocument};
-use typst::syntax::{FileId, Source, Span};
+use typst::syntax::{FileId, Lines, Span};
 use typst::WorldExt;
 use typst_pdf::{PdfOptions, PdfStandards, Timestamp};
@ -696,7 +696,7 @@ fn label(world: &SystemWorld, span: Span) -> Option<Label<FileId>> {
 impl<'a> codespan_reporting::files::Files<'a> for SystemWorld {
    type FileId = FileId;
    type Name = String;
-    type Source = Source;
+    type Source = Lines<String>;
    fn name(&'a self, id: FileId) -> CodespanResult<Self::Name> {
        let vpath = id.vpath();
--- a/crates/typst-cli/src/timings.rs
+++ b/crates/typst-cli/src/timings.rs
@ -85,6 +85,6 @@ fn resolve_span(world: &SystemWorld, span: Span) -> Option<(String, u32)> {
    let id = span.id()?;
    let source = world.source(id).ok()?;
    let range = source.range(span)?;
-    let line = source.byte_to_line(range.start)?;
+    let line = source.lines().byte_to_line(range.start)?;
    Some((format!("{id:?}"), line as u32 + 1))
 }
--- a/crates/typst-cli/src/world.rs
+++ b/crates/typst-cli/src/world.rs
@ -9,7 +9,7 @@ use ecow::{eco_format, EcoString};
 use parking_lot::Mutex;
 use typst::diag::{FileError, FileResult};
 use typst::foundations::{Bytes, Datetime, Dict, IntoValue};
-use typst::syntax::{FileId, Source, VirtualPath};
+use typst::syntax::{FileId, Lines, Source, VirtualPath};
 use typst::text::{Font, FontBook};
 use typst::utils::LazyHash;
 use typst::{Library, World};
@ -183,8 +183,18 @@ impl SystemWorld {
    /// Lookup a source file by id.
    #[track_caller]
-    pub fn lookup(&self, id: FileId) -> Source {
+    pub fn lookup(&self, id: FileId) -> Lines<String> {
-        self.source(id).expect("file id does not point to any source file")
+        self.slot(id, |slot| {
            if let Some(source) = slot.source.get() {
                let source = source.as_ref().expect("file is not valid");
                source.lines()
            } else if let Some(bytes) = slot.file.get() {
                let bytes = bytes.as_ref().expect("file is not valid");
                Lines::from_bytes(bytes.as_slice()).expect("file is not valid utf-8")
            } else {
                panic!("file id does not point to any source file");
            }
        })
    }
 }
@ -339,6 +349,11 @@ impl<T: Clone> SlotCell<T> {
        self.accessed = false;
    }
    /// Gets the contents of the cell.
    fn get(&self) -> Option<&FileResult<T>> {
        self.data.as_ref()
    }
    /// Gets the contents of the cell or initialize them.
    fn get_or_init(
        &mut self,
--- a/crates/typst-library/Cargo.toml
+++ b/crates/typst-library/Cargo.toml
@ -66,7 +66,6 @@ unicode-normalization = { workspace = true }
 unicode-segmentation = { workspace = true }
 unscanny = { workspace = true }
 usvg = { workspace = true }
 utf8_iter = { workspace = true }
 wasmi = { workspace = true }
 xmlwriter = { workspace = true }
--- a/crates/typst-library/src/diag.rs
+++ b/crates/typst-library/src/diag.rs
@ -9,10 +9,10 @@ use std::string::FromUtf8Error;
 use comemo::Tracked;
 use ecow::{eco_vec, EcoVec};
 use typst_syntax::package::{PackageSpec, PackageVersion};
-use typst_syntax::{Span, Spanned, SyntaxError};
+use typst_syntax::{Lines, Span, Spanned, SyntaxError};
 use crate::engine::Engine;
-use crate::loading::{Loaded, LineCol};
+use crate::loading::{LoadSource, Loaded};
 use crate::{World, WorldExt};
 /// Early-return with a [`StrResult`] or [`SourceResult`].
@ -569,6 +569,144 @@ impl From<PackageError> for EcoString {
    }
 }
 impl Loaded {
    /// Report an error, possibly in an external file.
    pub fn err_in_text(
        &self,
        pos: impl Into<ReportPos>,
        msg: impl std::fmt::Display,
        error: impl std::fmt::Display,
    ) -> EcoVec<SourceDiagnostic> {
        let lines = Lines::from_bytes(&self.bytes);
        match (self.source.v, lines) {
            // Only report an error in an external file,
            // if it is human readable (valid utf-8).
            (LoadSource::Path(file_id), Ok(lines)) => {
                let pos = pos.into();
                if let Some(range) = pos.range(&lines) {
                    let span = Span::from_range(file_id, range);
                    return eco_vec!(error!(span, "{msg} ({error})"));
                }
                // Either `ReportPos::None` was provided, or resolving the range
                // from the line/column failed. If present report the possibly
                // wrong line/column in the error message anyway.
                let span = Span::from_range(file_id, 0..self.bytes.len());
                let error = if let Some(pair) = pos.line_col(&lines) {
                    let (line, col) = pair.numbers();
                    error!(span, "{msg} ({error} at {line}:{col})")
                } else {
                    error!(span, "{msg} ({error})")
                };
                eco_vec![error]
            }
            _ => self.err_in_bytes(pos, msg, error),
        }
    }
    /// Report an error, possibly in an external file.
    pub fn err_in_bytes(
        &self,
        pos: impl Into<ReportPos>,
        msg: impl std::fmt::Display,
        error: impl std::fmt::Display,
    ) -> EcoVec<SourceDiagnostic> {
        let pos = pos.into();
        let result = Lines::from_bytes(&self.bytes).ok().and_then(|l| pos.line_col(&l));
        let error = if let Some(pair) = result {
            let (line, col) = pair.numbers();
            error!(self.source.span, "{msg} ({error} at {line}:{col})")
        } else {
            error!(self.source.span, "{msg} ({error})")
        };
        eco_vec![error]
    }
 }
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub enum ReportPos {
    /// Contains the range, and the 0-based line/column.
    Full(std::ops::Range<usize>, LineCol),
    /// Contains the range.
    Range(std::ops::Range<usize>),
    /// Contains the 0-based line/column.
    LineCol(LineCol),
    #[default]
    None,
 }
 impl From<std::ops::Range<usize>> for ReportPos {
    fn from(value: std::ops::Range<usize>) -> Self {
        Self::Range(value)
    }
 }
 impl From<LineCol> for ReportPos {
    fn from(value: LineCol) -> Self {
        Self::LineCol(value)
    }
 }
 impl ReportPos {
    fn range(&self, lines: &Lines<String>) -> Option<std::ops::Range<usize>> {
        match self {
            ReportPos::Full(range, _) => Some(range.clone()),
            ReportPos::Range(range) => Some(range.clone()),
            &ReportPos::LineCol(pair) => {
                let i = lines.line_column_to_byte(pair.line, pair.col)?;
                Some(i..i)
            }
            ReportPos::None => None,
        }
    }
    fn line_col(&self, lines: &Lines<String>) -> Option<LineCol> {
        match self {
            &ReportPos::Full(_, pair) => Some(pair),
            ReportPos::Range(range) => {
                let (line, col) = lines.byte_to_line_column(range.start)?;
                Some(LineCol::zero_based(line, col))
            }
            &ReportPos::LineCol(pair) => Some(pair),
            ReportPos::None => None,
        }
    }
 }
 /// A line/column pair.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub struct LineCol {
    /// The 0-based line.
    line: usize,
    /// The 0-based column.
    col: usize,
 }
 impl LineCol {
    /// Constructs the line/column pair from 0-based indices.
    pub fn zero_based(line: usize, col: usize) -> Self {
        Self { line, col }
    }
    /// Constructs the line/column pair from 1-based numbers.
    pub fn one_based(line: usize, col: usize) -> Self {
        Self {
            line: line.saturating_sub(1),
            col: col.saturating_sub(1),
        }
    }
    /// Returns the 0-based line/column indices.
    pub fn indices(&self) -> (usize, usize) {
        (self.line, self.col)
    }
    /// Returns the 1-based line/column numbers.
    pub fn numbers(&self) -> (usize, usize) {
        (self.line + 1, self.col + 1)
    }
 }
 /// Format a user-facing error message for an XML-like file format.
 pub fn format_xml_like_error(
    format: &str,
--- a/crates/typst-library/src/loading/csv.rs
+++ b/crates/typst-library/src/loading/csv.rs
@ -1,10 +1,10 @@
 use ecow::EcoVec;
 use typst_syntax::Spanned;
-use crate::diag::{bail, SourceDiagnostic, SourceResult};
+use crate::diag::{bail, LineCol, ReportPos, SourceDiagnostic, SourceResult};
 use crate::engine::Engine;
 use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
-use crate::loading::{Loaded, DataSource, LineCol, Load, Readable, ReportPos};
+use crate::loading::{DataSource, Load, Loaded, Readable};
 /// Reads structured data from a CSV file.
 ///
@ -176,7 +176,9 @@ fn format_csv_error(
        })
        .unwrap_or(LineCol::one_based(line, 1).into());
    match err.kind() {
-        ::csv::ErrorKind::Utf8 { .. } => data.err_in_text(pos, msg, "file is not valid utf-8"),
+        ::csv::ErrorKind::Utf8 { .. } => {
            data.err_in_text(pos, msg, "file is not valid utf-8")
        }
        ::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
            let err =
                format!("found {len} instead of {expected_len} fields in line {line}");
--- a/crates/typst-library/src/loading/json.rs
+++ b/crates/typst-library/src/loading/json.rs
@ -1,10 +1,10 @@
 use ecow::eco_format;
 use typst_syntax::Spanned;
-use crate::diag::{At, SourceResult};
+use crate::diag::{At, LineCol, SourceResult};
 use crate::engine::Engine;
 use crate::foundations::{func, scope, Str, Value};
-use crate::loading::{DataSource, LineCol, Load, Readable};
+use crate::loading::{DataSource, Load, Readable};
 /// Reads structured data from a JSON file.
 ///
--- a/crates/typst-library/src/loading/mod.rs
+++ b/crates/typst-library/src/loading/mod.rs
@ -16,9 +16,8 @@ mod xml_;
 mod yaml_;
 use comemo::Tracked;
-use ecow::{eco_vec, EcoString, EcoVec};
+use ecow::EcoString;
 use typst_syntax::{FileId, Span, Spanned};
 use utf8_iter::ErrorReportingUtf8Chars;
 pub use self::cbor_::*;
 pub use self::csv_::*;
@ -28,7 +27,7 @@ pub use self::toml_::*;
 pub use self::xml_::*;
 pub use self::yaml_::*;
-use crate::diag::{error, At, FileError, SourceDiagnostic, SourceResult};
+use crate::diag::{At, FileError, SourceResult};
 use crate::foundations::OneOrMultiple;
 use crate::foundations::{cast, Bytes, Scope, Str};
 use crate::World;
@ -129,6 +128,7 @@ pub struct Loaded {
 }
 impl Loaded {
    /// FIXME: remove this?
    pub fn dummy() -> Self {
        Loaded::new(
            typst_syntax::Spanned::new(LoadSource::Bytes, Span::detached()),
@ -142,50 +142,16 @@ impl Loaded {
    pub fn as_str(&self) -> SourceResult<&str> {
        self.bytes.as_str().map_err(|err| {
            // TODO: should the error even be reported in the file if it's possibly binary?
            let start = err.valid_up_to();
            let end = start + err.error_len().unwrap_or(0);
-            self.err_in_text(start..end, "failed to convert to string", FileError::from(err))
+            // always report this error in the source file.
            self.err_in_bytes(
                start..end,
                "failed to convert to string",
                FileError::from(err),
            )
        })
    }
    /// Report an error, possibly in an external file.
    pub fn err_in_text(
        &self,
        pos: impl Into<ReportPos>,
        msg: impl std::fmt::Display,
        error: impl std::fmt::Display,
    ) -> EcoVec<SourceDiagnostic> {
        let pos = pos.into();
        let error = match self.source.v {
            LoadSource::Path(file_id) => {
                if let Some(range) = pos.range(self.bytes.as_slice()) {
                    let span = Span::from_range(file_id, range);
                    return eco_vec!(error!(span, "{msg} ({error})"));
                }
                // Either there was no range provided, or resolving the range
                // from the line/column failed. If present report the possibly
                // wrong line/column anyway.
                let span = Span::from_range(file_id, 0..self.bytes.len());
                if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
                    let (line, col) = pair.numbers();
                    error!(span, "{msg} ({error} at {line}:{col})")
                } else {
                    error!(span, "{msg} ({error})")
                }
            }
            LoadSource::Bytes => {
                if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
                    let (line, col) = pair.numbers();
                    error!(self.source.span, "{msg} ({error} at {line}:{col})")
                } else {
                    error!(self.source.span, "{msg} ({error})")
                }
            }
        };
        eco_vec![error]
    }
 }
 /// A loaded [`DataSource`].
@ -195,142 +161,6 @@ pub enum LoadSource {
    Bytes,
 }
 #[derive(Debug, Default)]
 pub enum ReportPos {
    /// Contains the range, and the 0-based line/column.
    Full(std::ops::Range<usize>, LineCol),
    /// Contains the range.
    Range(std::ops::Range<usize>),
    /// Contains the 0-based line/column.
    LineCol(LineCol),
    #[default]
    None,
 }
 impl From<std::ops::Range<usize>> for ReportPos {
    fn from(value: std::ops::Range<usize>) -> Self {
        Self::Range(value)
    }
 }
 impl From<LineCol> for ReportPos {
    fn from(value: LineCol) -> Self {
        Self::LineCol(value)
    }
 }
 impl ReportPos {
    fn range(&self, bytes: &[u8]) -> Option<std::ops::Range<usize>> {
        match self {
            ReportPos::Full(range, _) => Some(range.clone()),
            ReportPos::Range(range) => Some(range.clone()),
            &ReportPos::LineCol(pair) => pair.byte_pos(bytes).map(|i| i..i),
            ReportPos::None => None,
        }
    }
    fn line_col(&self, bytes: &[u8]) -> Option<LineCol> {
        match self {
            &ReportPos::Full(_, pair) => Some(pair),
            ReportPos::Range(range) => LineCol::from_byte_pos(range.start, bytes),
            &ReportPos::LineCol(pair) => Some(pair),
            ReportPos::None => None,
        }
    }
 }
 #[derive(Clone, Copy, Debug)]
 pub struct LineCol {
    /// The 0-based line.
    line: usize,
    /// The 0-based column.
    col: usize,
 }
 impl LineCol {
    /// Constructs the line/column pair from 0-based indices.
    pub fn zero_based(line: usize, col: usize) -> Self {
        Self { line, col }
    }
    /// Constructs the line/column pair from 1-based numbers.
    pub fn one_based(line: usize, col: usize) -> Self {
        Self {
            line: line.saturating_sub(1),
            col: col.saturating_sub(1),
        }
    }
    pub fn from_byte_pos(pos: usize, bytes: &[u8]) -> Option<Self> {
        let bytes = &bytes[..pos];
        let mut line = 0;
        let line_start = memchr::memchr_iter(b'\n', bytes)
            .inspect(|_| line += 1)
            .last()
            .map(|i| i + 1)
            .unwrap_or(bytes.len());
        // Try to compute a column even if the string isn't valid utf-8.
        let col = ErrorReportingUtf8Chars::new(&bytes[line_start..]).count();
        Some(LineCol::zero_based(line, col))
    }
    pub fn byte_pos(&self, bytes: &[u8]) -> Option<usize> {
        let line_offset = if let Some(idx) = self.line.checked_sub(1) {
            memchr::memchr_iter(b'\n', bytes).nth(idx).map(|i| i + 1)?
        } else {
            0
        };
        let col_offset = col_offset(line_offset, self.col, bytes)?;
        let pos = line_offset + col_offset;
        Some(pos)
    }
    pub fn byte_range(
        range: std::ops::Range<Self>,
        bytes: &[u8],
    ) -> Option<std::ops::Range<usize>> {
        let mut line_iter = memchr::memchr_iter(b'\n', bytes);
        let start_line_offset = if let Some(idx) = range.start.line.checked_sub(1) {
            line_iter.nth(idx).map(|i| i + 1)?
        } else {
            0
        };
        let line_delta = range.end.line - range.start.line;
        let end_line_offset = if let Some(idx) = line_delta.checked_sub(1) {
            line_iter.nth(idx).map(|i| i + 1)?
        } else {
            start_line_offset
        };
        let start_col_offset = col_offset(start_line_offset, range.start.col, bytes)?;
        let end_col_offset = col_offset(end_line_offset, range.end.col, bytes)?;
        let start = start_line_offset + start_col_offset;
        let end = end_line_offset + end_col_offset;
        Some(start..end)
    }
    pub fn numbers(&self) -> (usize, usize) {
        (self.line + 1, self.col + 1)
    }
 }
 fn col_offset(line_offset: usize, col: usize, bytes: &[u8]) -> Option<usize> {
    let line = &bytes[line_offset..];
    // TODO: streaming-utf8 decoding ignore invalid characters
    // might neeed to update error reporting too (use utf8_iter)
    if let Some(idx) = col.checked_sub(1) {
        // Try to compute position even if the string isn't valid utf-8.
        let mut iter = ErrorReportingUtf8Chars::new(line);
        _ = iter.nth(idx)?;
        Some(line.len() - iter.as_slice().len())
    } else {
        Some(0)
    }
 }
 /// A value that can be read from a file.
 #[derive(Debug, Clone, PartialEq, Hash)]
 pub enum Readable {
--- a/crates/typst-library/src/loading/toml.rs
+++ b/crates/typst-library/src/loading/toml.rs
@ -1,10 +1,10 @@
 use ecow::{eco_format, EcoVec};
 use typst_syntax::Spanned;
-use crate::diag::{At, SourceDiagnostic, SourceResult};
+use crate::diag::{At, ReportPos, SourceDiagnostic, SourceResult};
 use crate::engine::Engine;
 use crate::foundations::{func, scope, Str, Value};
-use crate::loading::{Loaded, DataSource, Load, Readable, ReportPos};
+use crate::loading::{DataSource, Load, Loaded, Readable};
 /// Reads structured data from a TOML file.
 ///
@ -69,7 +69,10 @@ impl toml {
 }
 /// Format the user-facing TOML error message.
-fn format_toml_error(data: &Loaded, error: ::toml::de::Error) -> EcoVec<SourceDiagnostic> {
+fn format_toml_error(
    data: &Loaded,
    error: ::toml::de::Error,
 ) -> EcoVec<SourceDiagnostic> {
    let pos = error.span().map(ReportPos::Range).unwrap_or_default();
    data.err_in_text(pos, "failed to parse TOML", error.message())
 }
--- a/crates/typst-library/src/loading/xml.rs
+++ b/crates/typst-library/src/loading/xml.rs
@ -5,7 +5,7 @@ use typst_syntax::Spanned;
 use crate::diag::{format_xml_like_error, SourceDiagnostic, SourceResult};
 use crate::engine::Engine;
 use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
-use crate::loading::{Loaded, DataSource, Load, Readable};
+use crate::loading::{DataSource, Load, Loaded, Readable};
 /// Reads structured data from an XML file.
 ///
--- a/crates/typst-library/src/loading/yaml.rs
+++ b/crates/typst-library/src/loading/yaml.rs
@ -1,10 +1,10 @@
 use ecow::{eco_format, EcoVec};
 use typst_syntax::Spanned;
-use crate::diag::{At, SourceDiagnostic, SourceResult};
+use crate::diag::{At, LineCol, ReportPos, SourceDiagnostic, SourceResult};
 use crate::engine::Engine;
 use crate::foundations::{func, scope, Str, Value};
-use crate::loading::{Loaded, DataSource, LineCol, Load, Readable, ReportPos};
+use crate::loading::{DataSource, Load, Loaded, Readable};
 /// Reads structured data from a YAML file.
 ///
--- a/crates/typst-library/src/model/bibliography.rs
+++ b/crates/typst-library/src/model/bibliography.rs
@ -20,7 +20,8 @@ use typst_syntax::{Span, Spanned};
 use typst_utils::{Get, ManuallyHash, NonZeroExt, PicoStr};
 use crate::diag::{
-    bail, error, At, HintedStrResult, SourceDiagnostic, SourceResult, StrResult,
+    bail, error, At, HintedStrResult, ReportPos, SourceDiagnostic, SourceResult,
    StrResult,
 };
 use crate::engine::{Engine, Sink};
 use crate::foundations::{
@ -33,7 +34,7 @@ use crate::layout::{
    BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem,
    Sides, Sizing, TrackSizings,
 };
-use crate::loading::{format_yaml_error, Loaded, DataSource, Load, LoadSource, ReportPos};
+use crate::loading::{format_yaml_error, DataSource, Load, LoadSource, Loaded};
 use crate::model::{
    CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem,
    Url,
@ -480,7 +481,9 @@ impl CslStyle {
                    typst_utils::hash128(&(TypeId::of::<Bytes>(), data)),
                )))
            })
-            .map_err(|err| data.err_in_text(ReportPos::None, "failed to load CSL style", err))
+            .map_err(|err| {
                data.err_in_text(ReportPos::None, "failed to load CSL style", err)
            })
    }
    /// Get the underlying independent style.
--- a/crates/typst-library/src/text/raw.rs
+++ b/crates/typst-library/src/text/raw.rs
@ -11,7 +11,7 @@ use typst_utils::ManuallyHash;
 use unicode_segmentation::UnicodeSegmentation;
 use super::Lang;
-use crate::diag::{SourceDiagnostic, SourceResult};
+use crate::diag::{LineCol, ReportPos, SourceDiagnostic, SourceResult};
 use crate::engine::Engine;
 use crate::foundations::{
    cast, elem, scope, Content, Derived, NativeElement, OneOrMultiple, Packed, PlainText,
@ -19,7 +19,7 @@ use crate::foundations::{
 };
 use crate::html::{tag, HtmlElem};
 use crate::layout::{BlockBody, BlockElem, Em, HAlignment};
-use crate::loading::{DataSource, LineCol, Load, Loaded, ReportPos};
+use crate::loading::{DataSource, Load, Loaded};
 use crate::model::{Figurable, ParElem};
 use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize};
 use crate::visualize::Color;
--- a/crates/typst-syntax/Cargo.toml
+++ b/crates/typst-syntax/Cargo.toml
@ -15,6 +15,7 @@ readme = { workspace = true }
 [dependencies]
 typst-timing = { workspace = true }
 typst-utils = { workspace = true }
 comemo = { workspace = true }
 ecow = { workspace = true }
 serde = { workspace = true }
 toml = { workspace = true }
--- a/crates/typst-syntax/src/lib.rs
+++ b/crates/typst-syntax/src/lib.rs
@ -7,6 +7,7 @@ mod file;
 mod highlight;
 mod kind;
 mod lexer;
 mod lines;
 mod node;
 mod parser;
 mod path;
@ -22,6 +23,7 @@ pub use self::lexer::{
    is_id_continue, is_id_start, is_ident, is_newline, is_valid_label_literal_id,
    link_prefix, split_newlines,
 };
 pub use self::lines::Lines;
 pub use self::node::{LinkedChildren, LinkedNode, Side, SyntaxError, SyntaxNode};
 pub use self::parser::{parse, parse_code, parse_math};
 pub use self::path::VirtualPath;
--- a/crates/typst-syntax/src/lines.rs
+++ b/crates/typst-syntax/src/lines.rs
@ -0,0 +1,407 @@
 use std::hash::{Hash, Hasher};
 use std::iter::zip;
 use std::ops::Range;
 use std::str::Utf8Error;
 use std::sync::Arc;
 use crate::is_newline;
 /// Metadata about lines.
 #[derive(Clone)]
 pub struct Lines<S>(Arc<Repr<S>>);
 #[derive(Clone)]
 struct Repr<S> {
    lines: Vec<Line>,
    str: S,
 }
 /// Metadata about a line.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub struct Line {
    /// The UTF-8 byte offset where the line starts.
    byte_idx: usize,
    /// The UTF-16 codepoint offset where the line starts.
    utf16_idx: usize,
 }
 impl<S: AsRef<str>> Lines<S> {
    /// TODO: memoize this?
    pub fn new(str: S) -> Self {
        let lines = lines(str.as_ref());
        Lines(Arc::new(Repr { lines, str }))
    }
    pub fn text(&self) -> &str {
        self.0.str.as_ref()
    }
    /// Get the length of the file in UTF-8 encoded bytes.
    pub fn len_bytes(&self) -> usize {
        self.0.str.as_ref().len()
    }
    /// Get the length of the file in UTF-16 code units.
    pub fn len_utf16(&self) -> usize {
        let last = self.0.lines.last().unwrap();
        last.utf16_idx + len_utf16(&self.text()[last.byte_idx..])
    }
    /// Get the length of the file in lines.
    pub fn len_lines(&self) -> usize {
        self.0.lines.len()
    }
    /// Return the index of the UTF-16 code unit at the byte index.
    pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
        let line_idx = self.byte_to_line(byte_idx)?;
        let line = self.0.lines.get(line_idx)?;
        let head = self.text().get(line.byte_idx..byte_idx)?;
        Some(line.utf16_idx + len_utf16(head))
    }
    /// Return the index of the line that contains the given byte index.
    pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
        (byte_idx <= self.text().len()).then(|| {
            match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
                Ok(i) => i,
                Err(i) => i - 1,
            }
        })
    }
    /// Return the index of the column at the byte index.
    ///
    /// The column is defined as the number of characters in the line before the
    /// byte index.
    pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
        let line = self.byte_to_line(byte_idx)?;
        let start = self.line_to_byte(line)?;
        let head = self.text().get(start..byte_idx)?;
        Some(head.chars().count())
    }
    /// Return the index of the line and column at the byte index.
    pub fn byte_to_line_column(&self, byte_idx: usize) -> Option<(usize, usize)> {
        let line = self.byte_to_line(byte_idx)?;
        let start = self.line_to_byte(line)?;
        let head = self.text().get(start..byte_idx)?;
        let col = head.chars().count();
        Some((line, col))
    }
    /// Return the byte index at the UTF-16 code unit.
    pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
        let line = self.0.lines.get(
            match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
                Ok(i) => i,
                Err(i) => i - 1,
            },
        )?;
        let text = self.text();
        let mut k = line.utf16_idx;
        for (i, c) in text[line.byte_idx..].char_indices() {
            if k >= utf16_idx {
                return Some(line.byte_idx + i);
            }
            k += c.len_utf16();
        }
        (k == utf16_idx).then_some(text.len())
    }
    /// Return the byte position at which the given line starts.
    pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
        self.0.lines.get(line_idx).map(|line| line.byte_idx)
    }
    /// Return the range which encloses the given line.
    pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
        let start = self.line_to_byte(line_idx)?;
        let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text().len());
        Some(start..end)
    }
    /// Return the byte index of the given (line, column) pair.
    ///
    /// The column defines the number of characters to go beyond the start of
    /// the line.
    pub fn line_column_to_byte(
        &self,
        line_idx: usize,
        column_idx: usize,
    ) -> Option<usize> {
        let range = self.line_to_range(line_idx)?;
        let line = self.text().get(range.clone())?;
        let mut chars = line.chars();
        for _ in 0..column_idx {
            chars.next();
        }
        Some(range.start + (line.len() - chars.as_str().len()))
    }
 }
 impl Lines<String> {
    /// Tries to convert the bytes
    #[comemo::memoize]
    pub fn from_bytes(bytes: &[u8]) -> Result<Lines<String>, Utf8Error> {
        let str = std::str::from_utf8(bytes)?;
        Ok(Lines::new(str.to_string()))
    }
    /// Fully replace the source text.
    ///
    /// This performs a naive (suffix/prefix-based) diff of the old and new text
    /// to produce the smallest single edit that transforms old into new and
    /// then calls [`edit`](Self::edit) with it.
    ///
    /// Returns whether any changes were made.
    pub fn replace(&mut self, new: &str) -> bool {
        let Some((prefix, suffix)) = self.replacement_range(new) else {
            return false;
        };
        let old = self.text();
        let replace = prefix..old.len() - suffix;
        let with = &new[prefix..new.len() - suffix];
        self.edit(replace, with);
        true
    }
    /// Returns the common prefix and suffix lengths.
    /// Returns [`None`] if the old and new strings are equal.
    pub fn replacement_range(&self, new: &str) -> Option<(usize, usize)> {
        let old = self.text();
        let mut prefix =
            zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count();
        if prefix == old.len() && prefix == new.len() {
            return None;
        }
        while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
            prefix -= 1;
        }
        let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev())
            .take_while(|(x, y)| x == y)
            .count();
        while !old.is_char_boundary(old.len() - suffix)
            || !new.is_char_boundary(new.len() - suffix)
        {
            suffix += 1;
        }
        Some((prefix, suffix))
    }
    /// Edit the source file by replacing the given range.
    ///
    /// Returns the range in the new source that was ultimately reparsed.
    ///
    /// The method panics if the `replace` range is out of bounds.
    #[track_caller]
    pub fn edit(&mut self, replace: Range<usize>, with: &str) {
        let start_byte = replace.start;
        let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
        let line = self.byte_to_line(start_byte).unwrap();
        let inner = Arc::make_mut(&mut self.0);
        // Update the text itself.
        inner.str.replace_range(replace.clone(), with);
        // Remove invalidated line starts.
        inner.lines.truncate(line + 1);
        // Handle adjoining of \r and \n.
        if inner.str[..start_byte].ends_with('\r') && with.starts_with('\n') {
            inner.lines.pop();
        }
        // Recalculate the line starts after the edit.
        inner
            .lines
            .extend(lines_from(start_byte, start_utf16, &inner.str[start_byte..]));
    }
 }
 /// Create a line vector.
 fn lines(text: &str) -> Vec<Line> {
    std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
        .chain(lines_from(0, 0, text))
        .collect()
 }
 /// Compute a line iterator from an offset.
 fn lines_from(
    byte_offset: usize,
    utf16_offset: usize,
    text: &str,
 ) -> impl Iterator<Item = Line> + '_ {
    let mut s = unscanny::Scanner::new(text);
    let mut utf16_idx = utf16_offset;
    std::iter::from_fn(move || {
        s.eat_until(|c: char| {
            utf16_idx += c.len_utf16();
            is_newline(c)
        });
        if s.done() {
            return None;
        }
        if s.eat() == Some('\r') && s.eat_if('\n') {
            utf16_idx += 1;
        }
        Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
    })
 }
 /// The number of code units this string would use if it was encoded in
 /// UTF16. This runs in linear time.
 fn len_utf16(string: &str) -> usize {
    string.chars().map(char::len_utf16).sum()
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
    #[test]
    fn test_source_file_new() {
        let lines = Lines::new(TEST);
        assert_eq!(
            lines.0.lines,
            [
                Line { byte_idx: 0, utf16_idx: 0 },
                Line { byte_idx: 7, utf16_idx: 6 },
                Line { byte_idx: 15, utf16_idx: 12 },
                Line { byte_idx: 18, utf16_idx: 15 },
            ]
        );
    }
    #[test]
    fn test_source_file_pos_to_line() {
        let lines = Lines::new(TEST);
        assert_eq!(lines.byte_to_line(0), Some(0));
        assert_eq!(lines.byte_to_line(2), Some(0));
        assert_eq!(lines.byte_to_line(6), Some(0));
        assert_eq!(lines.byte_to_line(7), Some(1));
        assert_eq!(lines.byte_to_line(8), Some(1));
        assert_eq!(lines.byte_to_line(12), Some(1));
        assert_eq!(lines.byte_to_line(21), Some(3));
        assert_eq!(lines.byte_to_line(22), None);
    }
    #[test]
    fn test_source_file_pos_to_column() {
        let lines = Lines::new(TEST);
        assert_eq!(lines.byte_to_column(0), Some(0));
        assert_eq!(lines.byte_to_column(2), Some(1));
        assert_eq!(lines.byte_to_column(6), Some(5));
        assert_eq!(lines.byte_to_column(7), Some(0));
        assert_eq!(lines.byte_to_column(8), Some(1));
        assert_eq!(lines.byte_to_column(12), Some(2));
    }
    #[test]
    fn test_source_file_utf16() {
        #[track_caller]
        fn roundtrip(lines: &Lines<&str>, byte_idx: usize, utf16_idx: usize) {
            let middle = lines.byte_to_utf16(byte_idx).unwrap();
            let result = lines.utf16_to_byte(middle).unwrap();
            assert_eq!(middle, utf16_idx);
            assert_eq!(result, byte_idx);
        }
        let lines = Lines::new(TEST);
        roundtrip(&lines, 0, 0);
        roundtrip(&lines, 2, 1);
        roundtrip(&lines, 3, 2);
        roundtrip(&lines, 8, 7);
        roundtrip(&lines, 12, 9);
        roundtrip(&lines, 21, 18);
        assert_eq!(lines.byte_to_utf16(22), None);
        assert_eq!(lines.utf16_to_byte(19), None);
    }
    #[test]
    fn test_source_file_roundtrip() {
        #[track_caller]
        fn roundtrip(lines: &Lines<&str>, byte_idx: usize) {
            let line = lines.byte_to_line(byte_idx).unwrap();
            let column = lines.byte_to_column(byte_idx).unwrap();
            let result = lines.line_column_to_byte(line, column).unwrap();
            assert_eq!(result, byte_idx);
        }
        let lines = Lines::new(TEST);
        roundtrip(&lines, 0);
        roundtrip(&lines, 7);
        roundtrip(&lines, 12);
        roundtrip(&lines, 21);
    }
    #[test]
    fn test_source_file_edit() {
        // This tests only the non-parser parts. The reparsing itself is
        // tested separately.
        #[track_caller]
        fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
            let reference = Lines::new(after);
            let mut edited = Lines::new(prev.to_string());
            edited.edit(range.clone(), with);
            assert_eq!(edited.text(), reference.text());
            assert_eq!(edited.0.lines, reference.0.lines);
            let mut replaced = Lines::new(prev.to_string());
            replaced.replace(&{
                let mut s = prev.to_string();
                s.replace_range(range, with);
                s
            });
            assert_eq!(replaced.text(), reference.text());
            assert_eq!(replaced.0.lines, reference.0.lines);
        }
        // Test inserting at the beginning.
        test("abc\n", 0..0, "hi\n", "hi\nabc\n");
        test("\nabc", 0..0, "hi\r", "hi\r\nabc");
        // Test editing in the middle.
        test(TEST, 4..16, "❌", "ä\tc❌i\rjkl");
        // Test appending.
        test("abc\ndef", 7..7, "hi", "abc\ndefhi");
        test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
        // Test appending with adjoining \r and \n.
        test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
        // Test removing everything.
        test(TEST, 0..21, "", "");
    }
 }
 impl<S: Hash> Hash for Lines<S> {
    fn hash<H: Hasher>(&self, state: &mut H) {
        self.0.str.hash(state);
    }
 }
 impl<S: AsRef<str>> AsRef<str> for Lines<S> {
    fn as_ref(&self) -> &str {
        self.0.str.as_ref()
    }
 }
--- a/crates/typst-syntax/src/source.rs
+++ b/crates/typst-syntax/src/source.rs
@ -2,14 +2,14 @@
 use std::fmt::{self, Debug, Formatter};
 use std::hash::{Hash, Hasher};
 use std::iter::zip;
 use std::ops::Range;
 use std::sync::Arc;
 use typst_utils::LazyHash;
 use crate::lines::Lines;
 use crate::reparser::reparse;
-use crate::{is_newline, parse, FileId, LinkedNode, Span, SyntaxNode, VirtualPath};
+use crate::{parse, FileId, LinkedNode, Span, SyntaxNode, VirtualPath};
 /// A source file.
 ///
@ -24,9 +24,8 @@ pub struct Source(Arc<Repr>);
 #[derive(Clone)]
 struct Repr {
    id: FileId,
    text: LazyHash<String>,
    root: LazyHash<SyntaxNode>,
-    lines: Vec<Line>,
+    lines: LazyHash<Lines<String>>,
 }
 impl Source {
@ -37,8 +36,7 @@ impl Source {
        root.numberize(id, Span::FULL).unwrap();
        Self(Arc::new(Repr {
            id,
-            lines: lines(&text),
+            lines: LazyHash::new(Lines::new(text)),
            text: LazyHash::new(text),
            root: LazyHash::new(root),
        }))
    }
@ -58,9 +56,14 @@ impl Source {
        self.0.id
    }
    /// The whole source as a string slice.
    pub fn lines(&self) -> Lines<String> {
        Lines::clone(&self.0.lines)
    }
    /// The whole source as a string slice.
    pub fn text(&self) -> &str {
-        &self.0.text
+        &self.0.lines.text()
    }
    /// Slice out the part of the source code enclosed by the range.
@ -77,29 +80,12 @@ impl Source {
    /// Returns the range in the new source that was ultimately reparsed.
    pub fn replace(&mut self, new: &str) -> Range<usize> {
        let _scope = typst_timing::TimingScope::new("replace source");
        let old = self.text();
-        let mut prefix =
+        let Some((prefix, suffix)) = self.0.lines.replacement_range(new) else {
            zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count();
        if prefix == old.len() && prefix == new.len() {
            return 0..0;
-        }
+        };
        while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
            prefix -= 1;
        }
        let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev())
            .take_while(|(x, y)| x == y)
            .count();
        while !old.is_char_boundary(old.len() - suffix)
            || !new.is_char_boundary(new.len() - suffix)
        {
            suffix += 1;
        }
        let old = self.text();
        let replace = prefix..old.len() - suffix;
        let with = &new[prefix..new.len() - suffix];
        self.edit(replace, with)
@ -112,48 +98,28 @@ impl Source {
    /// The method panics if the `replace` range is out of bounds.
    #[track_caller]
    pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> {
        let start_byte = replace.start;
        let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
        let line = self.byte_to_line(start_byte).unwrap();
        let inner = Arc::make_mut(&mut self.0);
-        // Update the text itself.
+        // Update the text and lines.
-        inner.text.replace_range(replace.clone(), with);
+        inner.lines.edit(replace.clone(), with);
        // Remove invalidated line starts.
        inner.lines.truncate(line + 1);
        // Handle adjoining of \r and \n.
        if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') {
            inner.lines.pop();
        }
        // Recalculate the line starts after the edit.
        inner.lines.extend(lines_from(
            start_byte,
            start_utf16,
            &inner.text[start_byte..],
        ));
        // Incrementally reparse the replaced range.
-        reparse(&mut inner.root, &inner.text, replace, with.len())
+        reparse(&mut inner.root, inner.lines.text(), replace, with.len())
    }
    /// Get the length of the file in UTF-8 encoded bytes.
    pub fn len_bytes(&self) -> usize {
-        self.text().len()
+        self.0.lines.len_bytes()
    }
    /// Get the length of the file in UTF-16 code units.
    pub fn len_utf16(&self) -> usize {
-        let last = self.0.lines.last().unwrap();
+        self.0.lines.len_utf16()
        last.utf16_idx + len_utf16(&self.0.text[last.byte_idx..])
    }
    /// Get the length of the file in lines.
    pub fn len_lines(&self) -> usize {
-        self.0.lines.len()
+        self.0.lines.len_lines()
    }
    /// Find the node with the given span.
@ -171,85 +137,6 @@ impl Source {
    pub fn range(&self, span: Span) -> Option<Range<usize>> {
        Some(self.find(span)?.range())
    }
    /// Return the index of the UTF-16 code unit at the byte index.
    pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
        let line_idx = self.byte_to_line(byte_idx)?;
        let line = self.0.lines.get(line_idx)?;
        let head = self.0.text.get(line.byte_idx..byte_idx)?;
        Some(line.utf16_idx + len_utf16(head))
    }
    /// Return the index of the line that contains the given byte index.
    pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
        (byte_idx <= self.0.text.len()).then(|| {
            match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
                Ok(i) => i,
                Err(i) => i - 1,
            }
        })
    }
    /// Return the index of the column at the byte index.
    ///
    /// The column is defined as the number of characters in the line before the
    /// byte index.
    pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
        let line = self.byte_to_line(byte_idx)?;
        let start = self.line_to_byte(line)?;
        let head = self.get(start..byte_idx)?;
        Some(head.chars().count())
    }
    /// Return the byte index at the UTF-16 code unit.
    pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
        let line = self.0.lines.get(
            match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
                Ok(i) => i,
                Err(i) => i - 1,
            },
        )?;
        let mut k = line.utf16_idx;
        for (i, c) in self.0.text[line.byte_idx..].char_indices() {
            if k >= utf16_idx {
                return Some(line.byte_idx + i);
            }
            k += c.len_utf16();
        }
        (k == utf16_idx).then_some(self.0.text.len())
    }
    /// Return the byte position at which the given line starts.
    pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
        self.0.lines.get(line_idx).map(|line| line.byte_idx)
    }
    /// Return the range which encloses the given line.
    pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
        let start = self.line_to_byte(line_idx)?;
        let end = self.line_to_byte(line_idx + 1).unwrap_or(self.0.text.len());
        Some(start..end)
    }
    /// Return the byte index of the given (line, column) pair.
    ///
    /// The column defines the number of characters to go beyond the start of
    /// the line.
    pub fn line_column_to_byte(
        &self,
        line_idx: usize,
        column_idx: usize,
    ) -> Option<usize> {
        let range = self.line_to_range(line_idx)?;
        let line = self.get(range.clone())?;
        let mut chars = line.chars();
        for _ in 0..column_idx {
            chars.next();
        }
        Some(range.start + (line.len() - chars.as_str().len()))
    }
 }
 impl Debug for Source {
@ -261,7 +148,7 @@ impl Debug for Source {
 impl Hash for Source {
    fn hash<H: Hasher>(&self, state: &mut H) {
        self.0.id.hash(state);
-        self.0.text.hash(state);
+        self.0.lines.hash(state);
        self.0.root.hash(state);
    }
 }
@ -271,176 +158,3 @@ impl AsRef<str> for Source {
        self.text()
    }
 }
 /// Metadata about a line.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 struct Line {
    /// The UTF-8 byte offset where the line starts.
    byte_idx: usize,
    /// The UTF-16 codepoint offset where the line starts.
    utf16_idx: usize,
 }
 /// Create a line vector.
 fn lines(text: &str) -> Vec<Line> {
    std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
        .chain(lines_from(0, 0, text))
        .collect()
 }
 /// Compute a line iterator from an offset.
 fn lines_from(
    byte_offset: usize,
    utf16_offset: usize,
    text: &str,
 ) -> impl Iterator<Item = Line> + '_ {
    let mut s = unscanny::Scanner::new(text);
    let mut utf16_idx = utf16_offset;
    std::iter::from_fn(move || {
        s.eat_until(|c: char| {
            utf16_idx += c.len_utf16();
            is_newline(c)
        });
        if s.done() {
            return None;
        }
        if s.eat() == Some('\r') && s.eat_if('\n') {
            utf16_idx += 1;
        }
        Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
    })
 }
 /// The number of code units this string would use if it was encoded in
 /// UTF16. This runs in linear time.
 fn len_utf16(string: &str) -> usize {
    string.chars().map(char::len_utf16).sum()
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
    #[test]
    fn test_source_file_new() {
        let source = Source::detached(TEST);
        assert_eq!(
            source.0.lines,
            [
                Line { byte_idx: 0, utf16_idx: 0 },
                Line { byte_idx: 7, utf16_idx: 6 },
                Line { byte_idx: 15, utf16_idx: 12 },
                Line { byte_idx: 18, utf16_idx: 15 },
            ]
        );
    }
    #[test]
    fn test_source_file_pos_to_line() {
        let source = Source::detached(TEST);
        assert_eq!(source.byte_to_line(0), Some(0));
        assert_eq!(source.byte_to_line(2), Some(0));
        assert_eq!(source.byte_to_line(6), Some(0));
        assert_eq!(source.byte_to_line(7), Some(1));
        assert_eq!(source.byte_to_line(8), Some(1));
        assert_eq!(source.byte_to_line(12), Some(1));
        assert_eq!(source.byte_to_line(21), Some(3));
        assert_eq!(source.byte_to_line(22), None);
    }
    #[test]
    fn test_source_file_pos_to_column() {
        let source = Source::detached(TEST);
        assert_eq!(source.byte_to_column(0), Some(0));
        assert_eq!(source.byte_to_column(2), Some(1));
        assert_eq!(source.byte_to_column(6), Some(5));
        assert_eq!(source.byte_to_column(7), Some(0));
        assert_eq!(source.byte_to_column(8), Some(1));
        assert_eq!(source.byte_to_column(12), Some(2));
    }
    #[test]
    fn test_source_file_utf16() {
        #[track_caller]
        fn roundtrip(source: &Source, byte_idx: usize, utf16_idx: usize) {
            let middle = source.byte_to_utf16(byte_idx).unwrap();
            let result = source.utf16_to_byte(middle).unwrap();
            assert_eq!(middle, utf16_idx);
            assert_eq!(result, byte_idx);
        }
        let source = Source::detached(TEST);
        roundtrip(&source, 0, 0);
        roundtrip(&source, 2, 1);
        roundtrip(&source, 3, 2);
        roundtrip(&source, 8, 7);
        roundtrip(&source, 12, 9);
        roundtrip(&source, 21, 18);
        assert_eq!(source.byte_to_utf16(22), None);
        assert_eq!(source.utf16_to_byte(19), None);
    }
    #[test]
    fn test_source_file_roundtrip() {
        #[track_caller]
        fn roundtrip(source: &Source, byte_idx: usize) {
            let line = source.byte_to_line(byte_idx).unwrap();
            let column = source.byte_to_column(byte_idx).unwrap();
            let result = source.line_column_to_byte(line, column).unwrap();
            assert_eq!(result, byte_idx);
        }
        let source = Source::detached(TEST);
        roundtrip(&source, 0);
        roundtrip(&source, 7);
        roundtrip(&source, 12);
        roundtrip(&source, 21);
    }
    #[test]
    fn test_source_file_edit() {
        // This tests only the non-parser parts. The reparsing itself is
        // tested separately.
        #[track_caller]
        fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
            let reference = Source::detached(after);
            let mut edited = Source::detached(prev);
            edited.edit(range.clone(), with);
            assert_eq!(edited.text(), reference.text());
            assert_eq!(edited.0.lines, reference.0.lines);
            let mut replaced = Source::detached(prev);
            replaced.replace(&{
                let mut s = prev.to_string();
                s.replace_range(range, with);
                s
            });
            assert_eq!(replaced.text(), reference.text());
            assert_eq!(replaced.0.lines, reference.0.lines);
        }
        // Test inserting at the beginning.
        test("abc\n", 0..0, "hi\n", "hi\nabc\n");
        test("\nabc", 0..0, "hi\r", "hi\r\nabc");
        // Test editing in the middle.
        test(TEST, 4..16, "❌", "ä\tc❌i\rjkl");
        // Test appending.
        test("abc\ndef", 7..7, "hi", "abc\ndefhi");
        test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
        // Test appending with adjoining \r and \n.
        test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
        // Test removing everything.
        test(TEST, 0..21, "", "");
    }
 }
--- a/tests/src/collect.rs
+++ b/tests/src/collect.rs
@ -6,9 +6,11 @@ use std::str::FromStr;
 use std::sync::LazyLock;
 use ecow::{eco_format, EcoString};
-use typst::loading::LineCol;
+use typst::diag::LineCol;
 use typst_syntax::package::PackageVersion;
-use typst_syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
+use typst_syntax::{
    is_id_continue, is_ident, is_newline, FileId, Lines, Source, VirtualPath,
 };
 use unscanny::Scanner;
 use crate::world::{read, system_path};
@ -426,11 +428,17 @@ impl<'a> Parser<'a> {
        }
        let start = self.parse_line_col()?;
        let lines = Lines::from_bytes(text.as_ref()).expect("Errors shouldn't be annotated for files that aren't human readable (not valid utf-8)");
        let range = if self.s.eat_if('-') {
            let end = self.parse_line_col()?;
-            LineCol::byte_range(start..end, &text)
+            let (line, col) = start.indices();
            let start = lines.line_column_to_byte(line, col);
            let (line, col) = end.indices();
            let end = lines.line_column_to_byte(line, col);
            Option::zip(start, end).map(|(a, b)| a..b)
        } else {
-            start.byte_pos(&text).map(|i| i..i)
+            let (line, col) = start.indices();
            lines.line_column_to_byte(line, col).map(|i| i..i)
        };
        if range.is_none() {
            self.error("range is out of bounds");
@ -484,13 +492,13 @@ impl<'a> Parser<'a> {
        let line_idx = (line_idx_in_test + comments).checked_add_signed(line_delta)?;
        let column_idx = if column < 0 {
            // Negative column index is from the back.
-            let range = source.line_to_range(line_idx)?;
+            let range = source.lines().line_to_range(line_idx)?;
            text[range].chars().count().saturating_add_signed(column)
        } else {
            usize::try_from(column).ok()?.checked_sub(1)?
        };
-        source.line_column_to_byte(line_idx, column_idx)
+        source.lines().line_column_to_byte(line_idx, column_idx)
    }
    /// Parse a number.
--- a/tests/src/run.rs
+++ b/tests/src/run.rs
@ -7,11 +7,10 @@ use tiny_skia as sk;
 use typst::diag::{SourceDiagnostic, Warned};
 use typst::html::HtmlDocument;
 use typst::layout::{Abs, Frame, FrameItem, PagedDocument, Transform};
 use typst::loading::LineCol;
 use typst::visualize::Color;
 use typst::{Document, World, WorldExt};
 use typst_pdf::PdfOptions;
-use typst_syntax::FileId;
+use typst_syntax::{FileId, Lines};
 use crate::collect::{Attr, FileSize, NoteKind, Test};
 use crate::logger::TestResult;
@ -329,12 +328,12 @@ impl<'a> Runner<'a> {
    fn format_pos(&self, file: FileId, pos: usize) -> String {
        let res = if file != self.test.source.id() {
            let bytes = self.world.file(file).unwrap();
-            LineCol::from_byte_pos(pos, &bytes).map(|l| l.numbers())
+            let lines = Lines::from_bytes(&bytes).unwrap();
            lines.byte_to_line_column(pos).map(|(line, col)| (line + 1, col + 1))
        } else {
-            let line = self.test.source.byte_to_line(pos).map(|l| l + 1);
+            (self.test.source.lines())
-            let col = (self.test.source.byte_to_column(pos))
+                .byte_to_line_column(pos)
-                .map(|c| self.test.pos.line + c + 1);
+                .map(|(line, col)| (line + 1, col + 1))
            Option::zip(line, col)
        };
        let Some((line, col)) = res else {
            return "oob".into();