refactor: factor out a general Lines struct from Source

2025-08-24 19:54:14 +08:00 · 2025-05-19 13:14:44 +02:00 · 2025-05-19 13:14:44 +02:00 · e5d8f02554
commit e5d8f02554
parent 2e2f646f2a
21 changed files with 645 additions and 525 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3112,7 +3112,6 @@ dependencies = [
 "unicode-segmentation",
 "unscanny",
 "usvg",
- "utf8_iter",
 "wasmi",
 "xmlwriter",
 ]
@ -3201,6 +3200,7 @@ dependencies = [
 name = "typst-syntax"
 version = "0.13.1"
 dependencies = [
+ "comemo",
 "ecow",
 "serde",
 "toml",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -135,7 +135,6 @@ unicode-segmentation = "1"
 unscanny = "0.1"
 ureq = { version = "2", default-features = false, features = ["native-tls", "gzip", "json"] }
 usvg = { version = "0.45", default-features = false, features = ["text"] }
-utf8_iter = "1.0.4"
 walkdir = "2"
 wasmi = "0.40.0"
 web-sys = "0.3"
--- a/crates/typst-cli/src/compile.rs
+++ b/crates/typst-cli/src/compile.rs
@ -16,7 +16,7 @@ use typst::diag::{
 use typst::foundations::{Datetime, Smart};
 use typst::html::HtmlDocument;
 use typst::layout::{Frame, Page, PageRanges, PagedDocument};
-use typst::syntax::{FileId, Source, Span};
+use typst::syntax::{FileId, Lines, Span};
 use typst::WorldExt;
 use typst_pdf::{PdfOptions, PdfStandards, Timestamp};

@ -696,7 +696,7 @@ fn label(world: &SystemWorld, span: Span) -> Option<Label<FileId>> {
 impl<'a> codespan_reporting::files::Files<'a> for SystemWorld {
    type FileId = FileId;
    type Name = String;
-    type Source = Source;
+    type Source = Lines<String>;

    fn name(&'a self, id: FileId) -> CodespanResult<Self::Name> {
        let vpath = id.vpath();
--- a/crates/typst-cli/src/timings.rs
+++ b/crates/typst-cli/src/timings.rs
@ -85,6 +85,6 @@ fn resolve_span(world: &SystemWorld, span: Span) -> Option<(String, u32)> {
    let id = span.id()?;
    let source = world.source(id).ok()?;
    let range = source.range(span)?;
-    let line = source.byte_to_line(range.start)?;
+    let line = source.lines().byte_to_line(range.start)?;
    Some((format!("{id:?}"), line as u32 + 1))
 }
--- a/crates/typst-cli/src/world.rs
+++ b/crates/typst-cli/src/world.rs
@ -9,7 +9,7 @@ use ecow::{eco_format, EcoString};
 use parking_lot::Mutex;
 use typst::diag::{FileError, FileResult};
 use typst::foundations::{Bytes, Datetime, Dict, IntoValue};
-use typst::syntax::{FileId, Source, VirtualPath};
+use typst::syntax::{FileId, Lines, Source, VirtualPath};
 use typst::text::{Font, FontBook};
 use typst::utils::LazyHash;
 use typst::{Library, World};
@ -183,8 +183,18 @@ impl SystemWorld {

    /// Lookup a source file by id.
    #[track_caller]
-    pub fn lookup(&self, id: FileId) -> Source {
-        self.source(id).expect("file id does not point to any source file")
+    pub fn lookup(&self, id: FileId) -> Lines<String> {
+        self.slot(id, |slot| {
+            if let Some(source) = slot.source.get() {
+                let source = source.as_ref().expect("file is not valid");
+                source.lines()
+            } else if let Some(bytes) = slot.file.get() {
+                let bytes = bytes.as_ref().expect("file is not valid");
+                Lines::from_bytes(bytes.as_slice()).expect("file is not valid utf-8")
+            } else {
+                panic!("file id does not point to any source file");
+            }
+        })
    }
 }

@ -339,6 +349,11 @@ impl<T: Clone> SlotCell<T> {
        self.accessed = false;
    }

+    /// Gets the contents of the cell.
+    fn get(&self) -> Option<&FileResult<T>> {
+        self.data.as_ref()
+    }
+
    /// Gets the contents of the cell or initialize them.
    fn get_or_init(
        &mut self,
--- a/crates/typst-library/Cargo.toml
+++ b/crates/typst-library/Cargo.toml
@ -66,7 +66,6 @@ unicode-normalization = { workspace = true }
 unicode-segmentation = { workspace = true }
 unscanny = { workspace = true }
 usvg = { workspace = true }
-utf8_iter = { workspace = true }
 wasmi = { workspace = true }
 xmlwriter = { workspace = true }

--- a/crates/typst-library/src/diag.rs
+++ b/crates/typst-library/src/diag.rs
@ -9,10 +9,10 @@ use std::string::FromUtf8Error;
 use comemo::Tracked;
 use ecow::{eco_vec, EcoVec};
 use typst_syntax::package::{PackageSpec, PackageVersion};
-use typst_syntax::{Span, Spanned, SyntaxError};
+use typst_syntax::{Lines, Span, Spanned, SyntaxError};

 use crate::engine::Engine;
-use crate::loading::{Loaded, LineCol};
+use crate::loading::{LoadSource, Loaded};
 use crate::{World, WorldExt};

 /// Early-return with a [`StrResult`] or [`SourceResult`].
@ -569,6 +569,144 @@ impl From<PackageError> for EcoString {
    }
 }

+impl Loaded {
+    /// Report an error, possibly in an external file.
+    pub fn err_in_text(
+        &self,
+        pos: impl Into<ReportPos>,
+        msg: impl std::fmt::Display,
+        error: impl std::fmt::Display,
+    ) -> EcoVec<SourceDiagnostic> {
+        let lines = Lines::from_bytes(&self.bytes);
+        match (self.source.v, lines) {
+            // Only report an error in an external file,
+            // if it is human readable (valid utf-8).
+            (LoadSource::Path(file_id), Ok(lines)) => {
+                let pos = pos.into();
+                if let Some(range) = pos.range(&lines) {
+                    let span = Span::from_range(file_id, range);
+                    return eco_vec!(error!(span, "{msg} ({error})"));
+                }
+
+                // Either `ReportPos::None` was provided, or resolving the range
+                // from the line/column failed. If present report the possibly
+                // wrong line/column in the error message anyway.
+                let span = Span::from_range(file_id, 0..self.bytes.len());
+                let error = if let Some(pair) = pos.line_col(&lines) {
+                    let (line, col) = pair.numbers();
+                    error!(span, "{msg} ({error} at {line}:{col})")
+                } else {
+                    error!(span, "{msg} ({error})")
+                };
+                eco_vec![error]
+            }
+            _ => self.err_in_bytes(pos, msg, error),
+        }
+    }
+
+    /// Report an error, possibly in an external file.
+    pub fn err_in_bytes(
+        &self,
+        pos: impl Into<ReportPos>,
+        msg: impl std::fmt::Display,
+        error: impl std::fmt::Display,
+    ) -> EcoVec<SourceDiagnostic> {
+        let pos = pos.into();
+        let result = Lines::from_bytes(&self.bytes).ok().and_then(|l| pos.line_col(&l));
+        let error = if let Some(pair) = result {
+            let (line, col) = pair.numbers();
+            error!(self.source.span, "{msg} ({error} at {line}:{col})")
+        } else {
+            error!(self.source.span, "{msg} ({error})")
+        };
+        eco_vec![error]
+    }
+}
+
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+pub enum ReportPos {
+    /// Contains the range, and the 0-based line/column.
+    Full(std::ops::Range<usize>, LineCol),
+    /// Contains the range.
+    Range(std::ops::Range<usize>),
+    /// Contains the 0-based line/column.
+    LineCol(LineCol),
+    #[default]
+    None,
+}
+
+impl From<std::ops::Range<usize>> for ReportPos {
+    fn from(value: std::ops::Range<usize>) -> Self {
+        Self::Range(value)
+    }
+}
+
+impl From<LineCol> for ReportPos {
+    fn from(value: LineCol) -> Self {
+        Self::LineCol(value)
+    }
+}
+
+impl ReportPos {
+    fn range(&self, lines: &Lines<String>) -> Option<std::ops::Range<usize>> {
+        match self {
+            ReportPos::Full(range, _) => Some(range.clone()),
+            ReportPos::Range(range) => Some(range.clone()),
+            &ReportPos::LineCol(pair) => {
+                let i = lines.line_column_to_byte(pair.line, pair.col)?;
+                Some(i..i)
+            }
+            ReportPos::None => None,
+        }
+    }
+
+    fn line_col(&self, lines: &Lines<String>) -> Option<LineCol> {
+        match self {
+            &ReportPos::Full(_, pair) => Some(pair),
+            ReportPos::Range(range) => {
+                let (line, col) = lines.byte_to_line_column(range.start)?;
+                Some(LineCol::zero_based(line, col))
+            }
+            &ReportPos::LineCol(pair) => Some(pair),
+            ReportPos::None => None,
+        }
+    }
+}
+
+/// A line/column pair.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct LineCol {
+    /// The 0-based line.
+    line: usize,
+    /// The 0-based column.
+    col: usize,
+}
+
+impl LineCol {
+    /// Constructs the line/column pair from 0-based indices.
+    pub fn zero_based(line: usize, col: usize) -> Self {
+        Self { line, col }
+    }
+
+    /// Constructs the line/column pair from 1-based numbers.
+    pub fn one_based(line: usize, col: usize) -> Self {
+        Self {
+            line: line.saturating_sub(1),
+            col: col.saturating_sub(1),
+        }
+    }
+
+    /// Returns the 0-based line/column indices.
+    pub fn indices(&self) -> (usize, usize) {
+        (self.line, self.col)
+    }
+
+    /// Returns the 1-based line/column numbers.
+    pub fn numbers(&self) -> (usize, usize) {
+        (self.line + 1, self.col + 1)
+    }
+}
+
 /// Format a user-facing error message for an XML-like file format.
 pub fn format_xml_like_error(
    format: &str,
--- a/crates/typst-library/src/loading/csv.rs
+++ b/crates/typst-library/src/loading/csv.rs
@ -1,10 +1,10 @@
 use ecow::EcoVec;
 use typst_syntax::Spanned;

-use crate::diag::{bail, SourceDiagnostic, SourceResult};
+use crate::diag::{bail, LineCol, ReportPos, SourceDiagnostic, SourceResult};
 use crate::engine::Engine;
 use crate::foundations::{cast, func, scope, Array, Dict, IntoValue, Type, Value};
-use crate::loading::{Loaded, DataSource, LineCol, Load, Readable, ReportPos};
+use crate::loading::{DataSource, Load, Loaded, Readable};

 /// Reads structured data from a CSV file.
 ///
@ -176,7 +176,9 @@ fn format_csv_error(
        })
        .unwrap_or(LineCol::one_based(line, 1).into());
    match err.kind() {
-        ::csv::ErrorKind::Utf8 { .. } => data.err_in_text(pos, msg, "file is not valid utf-8"),
+        ::csv::ErrorKind::Utf8 { .. } => {
+            data.err_in_text(pos, msg, "file is not valid utf-8")
+        }
        ::csv::ErrorKind::UnequalLengths { expected_len, len, .. } => {
            let err =
                format!("found {len} instead of {expected_len} fields in line {line}");
--- a/crates/typst-library/src/loading/json.rs
+++ b/crates/typst-library/src/loading/json.rs
@ -1,10 +1,10 @@
 use ecow::eco_format;
 use typst_syntax::Spanned;

-use crate::diag::{At, SourceResult};
+use crate::diag::{At, LineCol, SourceResult};
 use crate::engine::Engine;
 use crate::foundations::{func, scope, Str, Value};
-use crate::loading::{DataSource, LineCol, Load, Readable};
+use crate::loading::{DataSource, Load, Readable};

 /// Reads structured data from a JSON file.
 ///
--- a/crates/typst-library/src/loading/mod.rs
+++ b/crates/typst-library/src/loading/mod.rs
@ -16,9 +16,8 @@ mod xml_;
 mod yaml_;

 use comemo::Tracked;
-use ecow::{eco_vec, EcoString, EcoVec};
+use ecow::EcoString;
 use typst_syntax::{FileId, Span, Spanned};
-use utf8_iter::ErrorReportingUtf8Chars;

 pub use self::cbor_::*;
 pub use self::csv_::*;
@ -28,7 +27,7 @@ pub use self::toml_::*;
 pub use self::xml_::*;
 pub use self::yaml_::*;

-use crate::diag::{error, At, FileError, SourceDiagnostic, SourceResult};
+use crate::diag::{At, FileError, SourceResult};
 use crate::foundations::OneOrMultiple;
 use crate::foundations::{cast, Bytes, Scope, Str};
 use crate::World;
@ -129,6 +128,7 @@ pub struct Loaded {
 }

 impl Loaded {
+    /// FIXME: remove this?
    pub fn dummy() -> Self {
        Loaded::new(
            typst_syntax::Spanned::new(LoadSource::Bytes, Span::detached()),
@ -142,50 +142,16 @@ impl Loaded {

    pub fn as_str(&self) -> SourceResult<&str> {
        self.bytes.as_str().map_err(|err| {
-            // TODO: should the error even be reported in the file if it's possibly binary?
            let start = err.valid_up_to();
            let end = start + err.error_len().unwrap_or(0);
-            self.err_in_text(start..end, "failed to convert to string", FileError::from(err))
+            // always report this error in the source file.
+            self.err_in_bytes(
+                start..end,
+                "failed to convert to string",
+                FileError::from(err),
+            )
        })
    }
-
-    /// Report an error, possibly in an external file.
-    pub fn err_in_text(
-        &self,
-        pos: impl Into<ReportPos>,
-        msg: impl std::fmt::Display,
-        error: impl std::fmt::Display,
-    ) -> EcoVec<SourceDiagnostic> {
-        let pos = pos.into();
-        let error = match self.source.v {
-            LoadSource::Path(file_id) => {
-                if let Some(range) = pos.range(self.bytes.as_slice()) {
-                    let span = Span::from_range(file_id, range);
-                    return eco_vec!(error!(span, "{msg} ({error})"));
-                }
-
-                // Either there was no range provided, or resolving the range
-                // from the line/column failed. If present report the possibly
-                // wrong line/column anyway.
-                let span = Span::from_range(file_id, 0..self.bytes.len());
-                if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
-                    let (line, col) = pair.numbers();
-                    error!(span, "{msg} ({error} at {line}:{col})")
-                } else {
-                    error!(span, "{msg} ({error})")
-                }
-            }
-            LoadSource::Bytes => {
-                if let Some(pair) = pos.line_col(self.bytes.as_slice()) {
-                    let (line, col) = pair.numbers();
-                    error!(self.source.span, "{msg} ({error} at {line}:{col})")
-                } else {
-                    error!(self.source.span, "{msg} ({error})")
-                }
-            }
-        };
-        eco_vec![error]
-    }
 }

 /// A loaded [`DataSource`].
@ -195,142 +161,6 @@ pub enum LoadSource {
    Bytes,
 }

-#[derive(Debug, Default)]
-pub enum ReportPos {
-    /// Contains the range, and the 0-based line/column.
-    Full(std::ops::Range<usize>, LineCol),
-    /// Contains the range.
-    Range(std::ops::Range<usize>),
-    /// Contains the 0-based line/column.
-    LineCol(LineCol),
-    #[default]
-    None,
-}
-
-impl From<std::ops::Range<usize>> for ReportPos {
-    fn from(value: std::ops::Range<usize>) -> Self {
-        Self::Range(value)
-    }
-}
-
-impl From<LineCol> for ReportPos {
-    fn from(value: LineCol) -> Self {
-        Self::LineCol(value)
-    }
-}
-
-impl ReportPos {
-    fn range(&self, bytes: &[u8]) -> Option<std::ops::Range<usize>> {
-        match self {
-            ReportPos::Full(range, _) => Some(range.clone()),
-            ReportPos::Range(range) => Some(range.clone()),
-            &ReportPos::LineCol(pair) => pair.byte_pos(bytes).map(|i| i..i),
-            ReportPos::None => None,
-        }
-    }
-
-    fn line_col(&self, bytes: &[u8]) -> Option<LineCol> {
-        match self {
-            &ReportPos::Full(_, pair) => Some(pair),
-            ReportPos::Range(range) => LineCol::from_byte_pos(range.start, bytes),
-            &ReportPos::LineCol(pair) => Some(pair),
-            ReportPos::None => None,
-        }
-    }
-}
-
-#[derive(Clone, Copy, Debug)]
-pub struct LineCol {
-    /// The 0-based line.
-    line: usize,
-    /// The 0-based column.
-    col: usize,
-}
-
-impl LineCol {
-    /// Constructs the line/column pair from 0-based indices.
-    pub fn zero_based(line: usize, col: usize) -> Self {
-        Self { line, col }
-    }
-
-    /// Constructs the line/column pair from 1-based numbers.
-    pub fn one_based(line: usize, col: usize) -> Self {
-        Self {
-            line: line.saturating_sub(1),
-            col: col.saturating_sub(1),
-        }
-    }
-
-    pub fn from_byte_pos(pos: usize, bytes: &[u8]) -> Option<Self> {
-        let bytes = &bytes[..pos];
-        let mut line = 0;
-        let line_start = memchr::memchr_iter(b'\n', bytes)
-            .inspect(|_| line += 1)
-            .last()
-            .map(|i| i + 1)
-            .unwrap_or(bytes.len());
-
-        // Try to compute a column even if the string isn't valid utf-8.
-        let col = ErrorReportingUtf8Chars::new(&bytes[line_start..]).count();
-        Some(LineCol::zero_based(line, col))
-    }
-
-    pub fn byte_pos(&self, bytes: &[u8]) -> Option<usize> {
-        let line_offset = if let Some(idx) = self.line.checked_sub(1) {
-            memchr::memchr_iter(b'\n', bytes).nth(idx).map(|i| i + 1)?
-        } else {
-            0
-        };
-
-        let col_offset = col_offset(line_offset, self.col, bytes)?;
-        let pos = line_offset + col_offset;
-        Some(pos)
-    }
-
-    pub fn byte_range(
-        range: std::ops::Range<Self>,
-        bytes: &[u8],
-    ) -> Option<std::ops::Range<usize>> {
-        let mut line_iter = memchr::memchr_iter(b'\n', bytes);
-        let start_line_offset = if let Some(idx) = range.start.line.checked_sub(1) {
-            line_iter.nth(idx).map(|i| i + 1)?
-        } else {
-            0
-        };
-        let line_delta = range.end.line - range.start.line;
-        let end_line_offset = if let Some(idx) = line_delta.checked_sub(1) {
-            line_iter.nth(idx).map(|i| i + 1)?
-        } else {
-            start_line_offset
-        };
-
-        let start_col_offset = col_offset(start_line_offset, range.start.col, bytes)?;
-        let end_col_offset = col_offset(end_line_offset, range.end.col, bytes)?;
-
-        let start = start_line_offset + start_col_offset;
-        let end = end_line_offset + end_col_offset;
-        Some(start..end)
-    }
-
-    pub fn numbers(&self) -> (usize, usize) {
-        (self.line + 1, self.col + 1)
-    }
-}
-
-fn col_offset(line_offset: usize, col: usize, bytes: &[u8]) -> Option<usize> {
-    let line = &bytes[line_offset..];
-    // TODO: streaming-utf8 decoding ignore invalid characters
-    // might neeed to update error reporting too (use utf8_iter)
-    if let Some(idx) = col.checked_sub(1) {
-        // Try to compute position even if the string isn't valid utf-8.
-        let mut iter = ErrorReportingUtf8Chars::new(line);
-        _ = iter.nth(idx)?;
-        Some(line.len() - iter.as_slice().len())
-    } else {
-        Some(0)
-    }
-}
-
 /// A value that can be read from a file.
 #[derive(Debug, Clone, PartialEq, Hash)]
 pub enum Readable {
--- a/crates/typst-library/src/loading/toml.rs
+++ b/crates/typst-library/src/loading/toml.rs
@ -1,10 +1,10 @@
 use ecow::{eco_format, EcoVec};
 use typst_syntax::Spanned;

-use crate::diag::{At, SourceDiagnostic, SourceResult};
+use crate::diag::{At, ReportPos, SourceDiagnostic, SourceResult};
 use crate::engine::Engine;
 use crate::foundations::{func, scope, Str, Value};
-use crate::loading::{Loaded, DataSource, Load, Readable, ReportPos};
+use crate::loading::{DataSource, Load, Loaded, Readable};

 /// Reads structured data from a TOML file.
 ///
@ -69,7 +69,10 @@ impl toml {
 }

 /// Format the user-facing TOML error message.
-fn format_toml_error(data: &Loaded, error: ::toml::de::Error) -> EcoVec<SourceDiagnostic> {
+fn format_toml_error(
+    data: &Loaded,
+    error: ::toml::de::Error,
+) -> EcoVec<SourceDiagnostic> {
    let pos = error.span().map(ReportPos::Range).unwrap_or_default();
    data.err_in_text(pos, "failed to parse TOML", error.message())
 }
--- a/crates/typst-library/src/loading/xml.rs
+++ b/crates/typst-library/src/loading/xml.rs
@ -5,7 +5,7 @@ use typst_syntax::Spanned;
 use crate::diag::{format_xml_like_error, SourceDiagnostic, SourceResult};
 use crate::engine::Engine;
 use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
-use crate::loading::{Loaded, DataSource, Load, Readable};
+use crate::loading::{DataSource, Load, Loaded, Readable};

 /// Reads structured data from an XML file.
 ///
--- a/crates/typst-library/src/loading/yaml.rs
+++ b/crates/typst-library/src/loading/yaml.rs
@ -1,10 +1,10 @@
 use ecow::{eco_format, EcoVec};
 use typst_syntax::Spanned;

-use crate::diag::{At, SourceDiagnostic, SourceResult};
+use crate::diag::{At, LineCol, ReportPos, SourceDiagnostic, SourceResult};
 use crate::engine::Engine;
 use crate::foundations::{func, scope, Str, Value};
-use crate::loading::{Loaded, DataSource, LineCol, Load, Readable, ReportPos};
+use crate::loading::{DataSource, Load, Loaded, Readable};

 /// Reads structured data from a YAML file.
 ///
--- a/crates/typst-library/src/model/bibliography.rs
+++ b/crates/typst-library/src/model/bibliography.rs
@ -20,7 +20,8 @@ use typst_syntax::{Span, Spanned};
 use typst_utils::{Get, ManuallyHash, NonZeroExt, PicoStr};

 use crate::diag::{
-    bail, error, At, HintedStrResult, SourceDiagnostic, SourceResult, StrResult,
+    bail, error, At, HintedStrResult, ReportPos, SourceDiagnostic, SourceResult,
+    StrResult,
 };
 use crate::engine::{Engine, Sink};
 use crate::foundations::{
@ -33,7 +34,7 @@ use crate::layout::{
    BlockBody, BlockElem, Em, GridCell, GridChild, GridElem, GridItem, HElem, PadElem,
    Sides, Sizing, TrackSizings,
 };
-use crate::loading::{format_yaml_error, Loaded, DataSource, Load, LoadSource, ReportPos};
+use crate::loading::{format_yaml_error, DataSource, Load, LoadSource, Loaded};
 use crate::model::{
    CitationForm, CiteGroup, Destination, FootnoteElem, HeadingElem, LinkElem, ParElem,
    Url,
@ -480,7 +481,9 @@ impl CslStyle {
                    typst_utils::hash128(&(TypeId::of::<Bytes>(), data)),
                )))
            })
-            .map_err(|err| data.err_in_text(ReportPos::None, "failed to load CSL style", err))
+            .map_err(|err| {
+                data.err_in_text(ReportPos::None, "failed to load CSL style", err)
+            })
    }

    /// Get the underlying independent style.
--- a/crates/typst-library/src/text/raw.rs
+++ b/crates/typst-library/src/text/raw.rs
@ -11,7 +11,7 @@ use typst_utils::ManuallyHash;
 use unicode_segmentation::UnicodeSegmentation;

 use super::Lang;
-use crate::diag::{SourceDiagnostic, SourceResult};
+use crate::diag::{LineCol, ReportPos, SourceDiagnostic, SourceResult};
 use crate::engine::Engine;
 use crate::foundations::{
    cast, elem, scope, Content, Derived, NativeElement, OneOrMultiple, Packed, PlainText,
@ -19,7 +19,7 @@ use crate::foundations::{
 };
 use crate::html::{tag, HtmlElem};
 use crate::layout::{BlockBody, BlockElem, Em, HAlignment};
-use crate::loading::{DataSource, LineCol, Load, Loaded, ReportPos};
+use crate::loading::{DataSource, Load, Loaded};
 use crate::model::{Figurable, ParElem};
 use crate::text::{FontFamily, FontList, LinebreakElem, LocalName, TextElem, TextSize};
 use crate::visualize::Color;
--- a/crates/typst-syntax/Cargo.toml
+++ b/crates/typst-syntax/Cargo.toml
@ -15,6 +15,7 @@ readme = { workspace = true }
 [dependencies]
 typst-timing = { workspace = true }
 typst-utils = { workspace = true }
+comemo = { workspace = true }
 ecow = { workspace = true }
 serde = { workspace = true }
 toml = { workspace = true }
--- a/crates/typst-syntax/src/lib.rs
+++ b/crates/typst-syntax/src/lib.rs
@ -7,6 +7,7 @@ mod file;
 mod highlight;
 mod kind;
 mod lexer;
+mod lines;
 mod node;
 mod parser;
 mod path;
@ -22,6 +23,7 @@ pub use self::lexer::{
    is_id_continue, is_id_start, is_ident, is_newline, is_valid_label_literal_id,
    link_prefix, split_newlines,
 };
+pub use self::lines::Lines;
 pub use self::node::{LinkedChildren, LinkedNode, Side, SyntaxError, SyntaxNode};
 pub use self::parser::{parse, parse_code, parse_math};
 pub use self::path::VirtualPath;
--- a/crates/typst-syntax/src/lines.rs
+++ b/crates/typst-syntax/src/lines.rs
@ -0,0 +1,407 @@
+use std::hash::{Hash, Hasher};
+use std::iter::zip;
+use std::ops::Range;
+use std::str::Utf8Error;
+use std::sync::Arc;
+
+use crate::is_newline;
+
+/// Metadata about lines.
+#[derive(Clone)]
+pub struct Lines<S>(Arc<Repr<S>>);
+
+#[derive(Clone)]
+struct Repr<S> {
+    lines: Vec<Line>,
+    str: S,
+}
+
+/// Metadata about a line.
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub struct Line {
+    /// The UTF-8 byte offset where the line starts.
+    byte_idx: usize,
+    /// The UTF-16 codepoint offset where the line starts.
+    utf16_idx: usize,
+}
+
+impl<S: AsRef<str>> Lines<S> {
+    /// TODO: memoize this?
+    pub fn new(str: S) -> Self {
+        let lines = lines(str.as_ref());
+        Lines(Arc::new(Repr { lines, str }))
+    }
+
+    pub fn text(&self) -> &str {
+        self.0.str.as_ref()
+    }
+
+    /// Get the length of the file in UTF-8 encoded bytes.
+    pub fn len_bytes(&self) -> usize {
+        self.0.str.as_ref().len()
+    }
+
+    /// Get the length of the file in UTF-16 code units.
+    pub fn len_utf16(&self) -> usize {
+        let last = self.0.lines.last().unwrap();
+        last.utf16_idx + len_utf16(&self.text()[last.byte_idx..])
+    }
+
+    /// Get the length of the file in lines.
+    pub fn len_lines(&self) -> usize {
+        self.0.lines.len()
+    }
+
+    /// Return the index of the UTF-16 code unit at the byte index.
+    pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
+        let line_idx = self.byte_to_line(byte_idx)?;
+        let line = self.0.lines.get(line_idx)?;
+        let head = self.text().get(line.byte_idx..byte_idx)?;
+        Some(line.utf16_idx + len_utf16(head))
+    }
+
+    /// Return the index of the line that contains the given byte index.
+    pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
+        (byte_idx <= self.text().len()).then(|| {
+            match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
+                Ok(i) => i,
+                Err(i) => i - 1,
+            }
+        })
+    }
+
+    /// Return the index of the column at the byte index.
+    ///
+    /// The column is defined as the number of characters in the line before the
+    /// byte index.
+    pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
+        let line = self.byte_to_line(byte_idx)?;
+        let start = self.line_to_byte(line)?;
+        let head = self.text().get(start..byte_idx)?;
+        Some(head.chars().count())
+    }
+
+    /// Return the index of the line and column at the byte index.
+    pub fn byte_to_line_column(&self, byte_idx: usize) -> Option<(usize, usize)> {
+        let line = self.byte_to_line(byte_idx)?;
+        let start = self.line_to_byte(line)?;
+        let head = self.text().get(start..byte_idx)?;
+        let col = head.chars().count();
+        Some((line, col))
+    }
+
+    /// Return the byte index at the UTF-16 code unit.
+    pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
+        let line = self.0.lines.get(
+            match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
+                Ok(i) => i,
+                Err(i) => i - 1,
+            },
+        )?;
+
+        let text = self.text();
+        let mut k = line.utf16_idx;
+        for (i, c) in text[line.byte_idx..].char_indices() {
+            if k >= utf16_idx {
+                return Some(line.byte_idx + i);
+            }
+            k += c.len_utf16();
+        }
+
+        (k == utf16_idx).then_some(text.len())
+    }
+
+    /// Return the byte position at which the given line starts.
+    pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
+        self.0.lines.get(line_idx).map(|line| line.byte_idx)
+    }
+
+    /// Return the range which encloses the given line.
+    pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
+        let start = self.line_to_byte(line_idx)?;
+        let end = self.line_to_byte(line_idx + 1).unwrap_or(self.text().len());
+        Some(start..end)
+    }
+
+    /// Return the byte index of the given (line, column) pair.
+    ///
+    /// The column defines the number of characters to go beyond the start of
+    /// the line.
+    pub fn line_column_to_byte(
+        &self,
+        line_idx: usize,
+        column_idx: usize,
+    ) -> Option<usize> {
+        let range = self.line_to_range(line_idx)?;
+        let line = self.text().get(range.clone())?;
+        let mut chars = line.chars();
+        for _ in 0..column_idx {
+            chars.next();
+        }
+        Some(range.start + (line.len() - chars.as_str().len()))
+    }
+}
+
+impl Lines<String> {
+    /// Tries to convert the bytes
+    #[comemo::memoize]
+    pub fn from_bytes(bytes: &[u8]) -> Result<Lines<String>, Utf8Error> {
+        let str = std::str::from_utf8(bytes)?;
+        Ok(Lines::new(str.to_string()))
+    }
+
+    /// Fully replace the source text.
+    ///
+    /// This performs a naive (suffix/prefix-based) diff of the old and new text
+    /// to produce the smallest single edit that transforms old into new and
+    /// then calls [`edit`](Self::edit) with it.
+    ///
+    /// Returns whether any changes were made.
+    pub fn replace(&mut self, new: &str) -> bool {
+        let Some((prefix, suffix)) = self.replacement_range(new) else {
+            return false;
+        };
+
+        let old = self.text();
+        let replace = prefix..old.len() - suffix;
+        let with = &new[prefix..new.len() - suffix];
+        self.edit(replace, with);
+
+        true
+    }
+
+    /// Returns the common prefix and suffix lengths.
+    /// Returns [`None`] if the old and new strings are equal.
+    pub fn replacement_range(&self, new: &str) -> Option<(usize, usize)> {
+        let old = self.text();
+
+        let mut prefix =
+            zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count();
+
+        if prefix == old.len() && prefix == new.len() {
+            return None;
+        }
+
+        while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
+            prefix -= 1;
+        }
+
+        let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev())
+            .take_while(|(x, y)| x == y)
+            .count();
+
+        while !old.is_char_boundary(old.len() - suffix)
+            || !new.is_char_boundary(new.len() - suffix)
+        {
+            suffix += 1;
+        }
+
+        Some((prefix, suffix))
+    }
+
+    /// Edit the source file by replacing the given range.
+    ///
+    /// Returns the range in the new source that was ultimately reparsed.
+    ///
+    /// The method panics if the `replace` range is out of bounds.
+    #[track_caller]
+    pub fn edit(&mut self, replace: Range<usize>, with: &str) {
+        let start_byte = replace.start;
+        let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
+        let line = self.byte_to_line(start_byte).unwrap();
+
+        let inner = Arc::make_mut(&mut self.0);
+
+        // Update the text itself.
+        inner.str.replace_range(replace.clone(), with);
+
+        // Remove invalidated line starts.
+        inner.lines.truncate(line + 1);
+
+        // Handle adjoining of \r and \n.
+        if inner.str[..start_byte].ends_with('\r') && with.starts_with('\n') {
+            inner.lines.pop();
+        }
+
+        // Recalculate the line starts after the edit.
+        inner
+            .lines
+            .extend(lines_from(start_byte, start_utf16, &inner.str[start_byte..]));
+    }
+}
+
+/// Create a line vector.
+fn lines(text: &str) -> Vec<Line> {
+    std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
+        .chain(lines_from(0, 0, text))
+        .collect()
+}
+
+/// Compute a line iterator from an offset.
+fn lines_from(
+    byte_offset: usize,
+    utf16_offset: usize,
+    text: &str,
+) -> impl Iterator<Item = Line> + '_ {
+    let mut s = unscanny::Scanner::new(text);
+    let mut utf16_idx = utf16_offset;
+
+    std::iter::from_fn(move || {
+        s.eat_until(|c: char| {
+            utf16_idx += c.len_utf16();
+            is_newline(c)
+        });
+
+        if s.done() {
+            return None;
+        }
+
+        if s.eat() == Some('\r') && s.eat_if('\n') {
+            utf16_idx += 1;
+        }
+
+        Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
+    })
+}
+
+/// The number of code units this string would use if it was encoded in
+/// UTF16. This runs in linear time.
+fn len_utf16(string: &str) -> usize {
+    string.chars().map(char::len_utf16).sum()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
+
+    #[test]
+    fn test_source_file_new() {
+        let lines = Lines::new(TEST);
+        assert_eq!(
+            lines.0.lines,
+            [
+                Line { byte_idx: 0, utf16_idx: 0 },
+                Line { byte_idx: 7, utf16_idx: 6 },
+                Line { byte_idx: 15, utf16_idx: 12 },
+                Line { byte_idx: 18, utf16_idx: 15 },
+            ]
+        );
+    }
+
+    #[test]
+    fn test_source_file_pos_to_line() {
+        let lines = Lines::new(TEST);
+        assert_eq!(lines.byte_to_line(0), Some(0));
+        assert_eq!(lines.byte_to_line(2), Some(0));
+        assert_eq!(lines.byte_to_line(6), Some(0));
+        assert_eq!(lines.byte_to_line(7), Some(1));
+        assert_eq!(lines.byte_to_line(8), Some(1));
+        assert_eq!(lines.byte_to_line(12), Some(1));
+        assert_eq!(lines.byte_to_line(21), Some(3));
+        assert_eq!(lines.byte_to_line(22), None);
+    }
+
+    #[test]
+    fn test_source_file_pos_to_column() {
+        let lines = Lines::new(TEST);
+        assert_eq!(lines.byte_to_column(0), Some(0));
+        assert_eq!(lines.byte_to_column(2), Some(1));
+        assert_eq!(lines.byte_to_column(6), Some(5));
+        assert_eq!(lines.byte_to_column(7), Some(0));
+        assert_eq!(lines.byte_to_column(8), Some(1));
+        assert_eq!(lines.byte_to_column(12), Some(2));
+    }
+
+    #[test]
+    fn test_source_file_utf16() {
+        #[track_caller]
+        fn roundtrip(lines: &Lines<&str>, byte_idx: usize, utf16_idx: usize) {
+            let middle = lines.byte_to_utf16(byte_idx).unwrap();
+            let result = lines.utf16_to_byte(middle).unwrap();
+            assert_eq!(middle, utf16_idx);
+            assert_eq!(result, byte_idx);
+        }
+
+        let lines = Lines::new(TEST);
+        roundtrip(&lines, 0, 0);
+        roundtrip(&lines, 2, 1);
+        roundtrip(&lines, 3, 2);
+        roundtrip(&lines, 8, 7);
+        roundtrip(&lines, 12, 9);
+        roundtrip(&lines, 21, 18);
+        assert_eq!(lines.byte_to_utf16(22), None);
+        assert_eq!(lines.utf16_to_byte(19), None);
+    }
+
+    #[test]
+    fn test_source_file_roundtrip() {
+        #[track_caller]
+        fn roundtrip(lines: &Lines<&str>, byte_idx: usize) {
+            let line = lines.byte_to_line(byte_idx).unwrap();
+            let column = lines.byte_to_column(byte_idx).unwrap();
+            let result = lines.line_column_to_byte(line, column).unwrap();
+            assert_eq!(result, byte_idx);
+        }
+
+        let lines = Lines::new(TEST);
+        roundtrip(&lines, 0);
+        roundtrip(&lines, 7);
+        roundtrip(&lines, 12);
+        roundtrip(&lines, 21);
+    }
+
+    #[test]
+    fn test_source_file_edit() {
+        // This tests only the non-parser parts. The reparsing itself is
+        // tested separately.
+        #[track_caller]
+        fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
+            let reference = Lines::new(after);
+
+            let mut edited = Lines::new(prev.to_string());
+            edited.edit(range.clone(), with);
+            assert_eq!(edited.text(), reference.text());
+            assert_eq!(edited.0.lines, reference.0.lines);
+
+            let mut replaced = Lines::new(prev.to_string());
+            replaced.replace(&{
+                let mut s = prev.to_string();
+                s.replace_range(range, with);
+                s
+            });
+            assert_eq!(replaced.text(), reference.text());
+            assert_eq!(replaced.0.lines, reference.0.lines);
+        }
+
+        // Test inserting at the beginning.
+        test("abc\n", 0..0, "hi\n", "hi\nabc\n");
+        test("\nabc", 0..0, "hi\r", "hi\r\nabc");
+
+        // Test editing in the middle.
+        test(TEST, 4..16, "❌", "ä\tc❌i\rjkl");
+
+        // Test appending.
+        test("abc\ndef", 7..7, "hi", "abc\ndefhi");
+        test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
+
+        // Test appending with adjoining \r and \n.
+        test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
+
+        // Test removing everything.
+        test(TEST, 0..21, "", "");
+    }
+}
+
+impl<S: Hash> Hash for Lines<S> {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.0.str.hash(state);
+    }
+}
+
+impl<S: AsRef<str>> AsRef<str> for Lines<S> {
+    fn as_ref(&self) -> &str {
+        self.0.str.as_ref()
+    }
+}
--- a/crates/typst-syntax/src/source.rs
+++ b/crates/typst-syntax/src/source.rs
@ -2,14 +2,14 @@

 use std::fmt::{self, Debug, Formatter};
 use std::hash::{Hash, Hasher};
-use std::iter::zip;
 use std::ops::Range;
 use std::sync::Arc;

 use typst_utils::LazyHash;

+use crate::lines::Lines;
 use crate::reparser::reparse;
-use crate::{is_newline, parse, FileId, LinkedNode, Span, SyntaxNode, VirtualPath};
+use crate::{parse, FileId, LinkedNode, Span, SyntaxNode, VirtualPath};

 /// A source file.
 ///
@ -24,9 +24,8 @@ pub struct Source(Arc<Repr>);
 #[derive(Clone)]
 struct Repr {
    id: FileId,
-    text: LazyHash<String>,
    root: LazyHash<SyntaxNode>,
-    lines: Vec<Line>,
+    lines: LazyHash<Lines<String>>,
 }

 impl Source {
@ -37,8 +36,7 @@ impl Source {
        root.numberize(id, Span::FULL).unwrap();
        Self(Arc::new(Repr {
            id,
-            lines: lines(&text),
-            text: LazyHash::new(text),
+            lines: LazyHash::new(Lines::new(text)),
            root: LazyHash::new(root),
        }))
    }
@ -58,9 +56,14 @@ impl Source {
        self.0.id
    }

+    /// The whole source as a string slice.
+    pub fn lines(&self) -> Lines<String> {
+        Lines::clone(&self.0.lines)
+    }
+
    /// The whole source as a string slice.
    pub fn text(&self) -> &str {
-        &self.0.text
+        &self.0.lines.text()
    }

    /// Slice out the part of the source code enclosed by the range.
@ -77,29 +80,12 @@ impl Source {
    /// Returns the range in the new source that was ultimately reparsed.
    pub fn replace(&mut self, new: &str) -> Range<usize> {
        let _scope = typst_timing::TimingScope::new("replace source");
-        let old = self.text();

-        let mut prefix =
-            zip(old.bytes(), new.bytes()).take_while(|(x, y)| x == y).count();
-
-        if prefix == old.len() && prefix == new.len() {
+        let Some((prefix, suffix)) = self.0.lines.replacement_range(new) else {
            return 0..0;
-        }
-
-        while !old.is_char_boundary(prefix) || !new.is_char_boundary(prefix) {
-            prefix -= 1;
-        }
-
-        let mut suffix = zip(old[prefix..].bytes().rev(), new[prefix..].bytes().rev())
-            .take_while(|(x, y)| x == y)
-            .count();
-
-        while !old.is_char_boundary(old.len() - suffix)
-            || !new.is_char_boundary(new.len() - suffix)
-        {
-            suffix += 1;
-        }
+        };

+        let old = self.text();
        let replace = prefix..old.len() - suffix;
        let with = &new[prefix..new.len() - suffix];
        self.edit(replace, with)
@ -112,48 +98,28 @@ impl Source {
    /// The method panics if the `replace` range is out of bounds.
    #[track_caller]
    pub fn edit(&mut self, replace: Range<usize>, with: &str) -> Range<usize> {
-        let start_byte = replace.start;
-        let start_utf16 = self.byte_to_utf16(start_byte).unwrap();
-        let line = self.byte_to_line(start_byte).unwrap();
-
        let inner = Arc::make_mut(&mut self.0);

-        // Update the text itself.
-        inner.text.replace_range(replace.clone(), with);
-
-        // Remove invalidated line starts.
-        inner.lines.truncate(line + 1);
-
-        // Handle adjoining of \r and \n.
-        if inner.text[..start_byte].ends_with('\r') && with.starts_with('\n') {
-            inner.lines.pop();
-        }
-
-        // Recalculate the line starts after the edit.
-        inner.lines.extend(lines_from(
-            start_byte,
-            start_utf16,
-            &inner.text[start_byte..],
-        ));
+        // Update the text and lines.
+        inner.lines.edit(replace.clone(), with);

        // Incrementally reparse the replaced range.
-        reparse(&mut inner.root, &inner.text, replace, with.len())
+        reparse(&mut inner.root, inner.lines.text(), replace, with.len())
    }

    /// Get the length of the file in UTF-8 encoded bytes.
    pub fn len_bytes(&self) -> usize {
-        self.text().len()
+        self.0.lines.len_bytes()
    }

    /// Get the length of the file in UTF-16 code units.
    pub fn len_utf16(&self) -> usize {
-        let last = self.0.lines.last().unwrap();
-        last.utf16_idx + len_utf16(&self.0.text[last.byte_idx..])
+        self.0.lines.len_utf16()
    }

    /// Get the length of the file in lines.
    pub fn len_lines(&self) -> usize {
-        self.0.lines.len()
+        self.0.lines.len_lines()
    }

    /// Find the node with the given span.
@ -171,85 +137,6 @@ impl Source {
    pub fn range(&self, span: Span) -> Option<Range<usize>> {
        Some(self.find(span)?.range())
    }
-
-    /// Return the index of the UTF-16 code unit at the byte index.
-    pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
-        let line_idx = self.byte_to_line(byte_idx)?;
-        let line = self.0.lines.get(line_idx)?;
-        let head = self.0.text.get(line.byte_idx..byte_idx)?;
-        Some(line.utf16_idx + len_utf16(head))
-    }
-
-    /// Return the index of the line that contains the given byte index.
-    pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
-        (byte_idx <= self.0.text.len()).then(|| {
-            match self.0.lines.binary_search_by_key(&byte_idx, |line| line.byte_idx) {
-                Ok(i) => i,
-                Err(i) => i - 1,
-            }
-        })
-    }
-
-    /// Return the index of the column at the byte index.
-    ///
-    /// The column is defined as the number of characters in the line before the
-    /// byte index.
-    pub fn byte_to_column(&self, byte_idx: usize) -> Option<usize> {
-        let line = self.byte_to_line(byte_idx)?;
-        let start = self.line_to_byte(line)?;
-        let head = self.get(start..byte_idx)?;
-        Some(head.chars().count())
-    }
-
-    /// Return the byte index at the UTF-16 code unit.
-    pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
-        let line = self.0.lines.get(
-            match self.0.lines.binary_search_by_key(&utf16_idx, |line| line.utf16_idx) {
-                Ok(i) => i,
-                Err(i) => i - 1,
-            },
-        )?;
-
-        let mut k = line.utf16_idx;
-        for (i, c) in self.0.text[line.byte_idx..].char_indices() {
-            if k >= utf16_idx {
-                return Some(line.byte_idx + i);
-            }
-            k += c.len_utf16();
-        }
-
-        (k == utf16_idx).then_some(self.0.text.len())
-    }
-
-    /// Return the byte position at which the given line starts.
-    pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
-        self.0.lines.get(line_idx).map(|line| line.byte_idx)
-    }
-
-    /// Return the range which encloses the given line.
-    pub fn line_to_range(&self, line_idx: usize) -> Option<Range<usize>> {
-        let start = self.line_to_byte(line_idx)?;
-        let end = self.line_to_byte(line_idx + 1).unwrap_or(self.0.text.len());
-        Some(start..end)
-    }
-
-    /// Return the byte index of the given (line, column) pair.
-    ///
-    /// The column defines the number of characters to go beyond the start of
-    /// the line.
-    pub fn line_column_to_byte(
-        &self,
-        line_idx: usize,
-        column_idx: usize,
-    ) -> Option<usize> {
-        let range = self.line_to_range(line_idx)?;
-        let line = self.get(range.clone())?;
-        let mut chars = line.chars();
-        for _ in 0..column_idx {
-            chars.next();
-        }
-        Some(range.start + (line.len() - chars.as_str().len()))
-    }
 }

 impl Debug for Source {
@ -261,7 +148,7 @@ impl Debug for Source {
 impl Hash for Source {
    fn hash<H: Hasher>(&self, state: &mut H) {
        self.0.id.hash(state);
-        self.0.text.hash(state);
+        self.0.lines.hash(state);
        self.0.root.hash(state);
    }
 }
@ -271,176 +158,3 @@ impl AsRef<str> for Source {
        self.text()
    }
 }
-
-/// Metadata about a line.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-struct Line {
-    /// The UTF-8 byte offset where the line starts.
-    byte_idx: usize,
-    /// The UTF-16 codepoint offset where the line starts.
-    utf16_idx: usize,
-}
-
-/// Create a line vector.
-fn lines(text: &str) -> Vec<Line> {
-    std::iter::once(Line { byte_idx: 0, utf16_idx: 0 })
-        .chain(lines_from(0, 0, text))
-        .collect()
-}
-
-/// Compute a line iterator from an offset.
-fn lines_from(
-    byte_offset: usize,
-    utf16_offset: usize,
-    text: &str,
-) -> impl Iterator<Item = Line> + '_ {
-    let mut s = unscanny::Scanner::new(text);
-    let mut utf16_idx = utf16_offset;
-
-    std::iter::from_fn(move || {
-        s.eat_until(|c: char| {
-            utf16_idx += c.len_utf16();
-            is_newline(c)
-        });
-
-        if s.done() {
-            return None;
-        }
-
-        if s.eat() == Some('\r') && s.eat_if('\n') {
-            utf16_idx += 1;
-        }
-
-        Some(Line { byte_idx: byte_offset + s.cursor(), utf16_idx })
-    })
-}
-
-/// The number of code units this string would use if it was encoded in
-/// UTF16. This runs in linear time.
-fn len_utf16(string: &str) -> usize {
-    string.chars().map(char::len_utf16).sum()
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
-
-    #[test]
-    fn test_source_file_new() {
-        let source = Source::detached(TEST);
-        assert_eq!(
-            source.0.lines,
-            [
-                Line { byte_idx: 0, utf16_idx: 0 },
-                Line { byte_idx: 7, utf16_idx: 6 },
-                Line { byte_idx: 15, utf16_idx: 12 },
-                Line { byte_idx: 18, utf16_idx: 15 },
-            ]
-        );
-    }
-
-    #[test]
-    fn test_source_file_pos_to_line() {
-        let source = Source::detached(TEST);
-        assert_eq!(source.byte_to_line(0), Some(0));
-        assert_eq!(source.byte_to_line(2), Some(0));
-        assert_eq!(source.byte_to_line(6), Some(0));
-        assert_eq!(source.byte_to_line(7), Some(1));
-        assert_eq!(source.byte_to_line(8), Some(1));
-        assert_eq!(source.byte_to_line(12), Some(1));
-        assert_eq!(source.byte_to_line(21), Some(3));
-        assert_eq!(source.byte_to_line(22), None);
-    }
-
-    #[test]
-    fn test_source_file_pos_to_column() {
-        let source = Source::detached(TEST);
-        assert_eq!(source.byte_to_column(0), Some(0));
-        assert_eq!(source.byte_to_column(2), Some(1));
-        assert_eq!(source.byte_to_column(6), Some(5));
-        assert_eq!(source.byte_to_column(7), Some(0));
-        assert_eq!(source.byte_to_column(8), Some(1));
-        assert_eq!(source.byte_to_column(12), Some(2));
-    }
-
-    #[test]
-    fn test_source_file_utf16() {
-        #[track_caller]
-        fn roundtrip(source: &Source, byte_idx: usize, utf16_idx: usize) {
-            let middle = source.byte_to_utf16(byte_idx).unwrap();
-            let result = source.utf16_to_byte(middle).unwrap();
-            assert_eq!(middle, utf16_idx);
-            assert_eq!(result, byte_idx);
-        }
-
-        let source = Source::detached(TEST);
-        roundtrip(&source, 0, 0);
-        roundtrip(&source, 2, 1);
-        roundtrip(&source, 3, 2);
-        roundtrip(&source, 8, 7);
-        roundtrip(&source, 12, 9);
-        roundtrip(&source, 21, 18);
-        assert_eq!(source.byte_to_utf16(22), None);
-        assert_eq!(source.utf16_to_byte(19), None);
-    }
-
-    #[test]
-    fn test_source_file_roundtrip() {
-        #[track_caller]
-        fn roundtrip(source: &Source, byte_idx: usize) {
-            let line = source.byte_to_line(byte_idx).unwrap();
-            let column = source.byte_to_column(byte_idx).unwrap();
-            let result = source.line_column_to_byte(line, column).unwrap();
-            assert_eq!(result, byte_idx);
-        }
-
-        let source = Source::detached(TEST);
-        roundtrip(&source, 0);
-        roundtrip(&source, 7);
-        roundtrip(&source, 12);
-        roundtrip(&source, 21);
-    }
-
-    #[test]
-    fn test_source_file_edit() {
-        // This tests only the non-parser parts. The reparsing itself is
-        // tested separately.
-        #[track_caller]
-        fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
-            let reference = Source::detached(after);
-
-            let mut edited = Source::detached(prev);
-            edited.edit(range.clone(), with);
-            assert_eq!(edited.text(), reference.text());
-            assert_eq!(edited.0.lines, reference.0.lines);
-
-            let mut replaced = Source::detached(prev);
-            replaced.replace(&{
-                let mut s = prev.to_string();
-                s.replace_range(range, with);
-                s
-            });
-            assert_eq!(replaced.text(), reference.text());
-            assert_eq!(replaced.0.lines, reference.0.lines);
-        }
-
-        // Test inserting at the beginning.
-        test("abc\n", 0..0, "hi\n", "hi\nabc\n");
-        test("\nabc", 0..0, "hi\r", "hi\r\nabc");
-
-        // Test editing in the middle.
-        test(TEST, 4..16, "❌", "ä\tc❌i\rjkl");
-
-        // Test appending.
-        test("abc\ndef", 7..7, "hi", "abc\ndefhi");
-        test("abc\ndef\n", 8..8, "hi", "abc\ndef\nhi");
-
-        // Test appending with adjoining \r and \n.
-        test("abc\ndef\r", 8..8, "\nghi", "abc\ndef\r\nghi");
-
-        // Test removing everything.
-        test(TEST, 0..21, "", "");
-    }
-}
--- a/tests/src/collect.rs
+++ b/tests/src/collect.rs
@ -6,9 +6,11 @@ use std::str::FromStr;
 use std::sync::LazyLock;

 use ecow::{eco_format, EcoString};
-use typst::loading::LineCol;
+use typst::diag::LineCol;
 use typst_syntax::package::PackageVersion;
-use typst_syntax::{is_id_continue, is_ident, is_newline, FileId, Source, VirtualPath};
+use typst_syntax::{
+    is_id_continue, is_ident, is_newline, FileId, Lines, Source, VirtualPath,
+};
 use unscanny::Scanner;

 use crate::world::{read, system_path};
@ -426,11 +428,17 @@ impl<'a> Parser<'a> {
        }

        let start = self.parse_line_col()?;
+        let lines = Lines::from_bytes(text.as_ref()).expect("Errors shouldn't be annotated for files that aren't human readable (not valid utf-8)");
        let range = if self.s.eat_if('-') {
            let end = self.parse_line_col()?;
-            LineCol::byte_range(start..end, &text)
+            let (line, col) = start.indices();
+            let start = lines.line_column_to_byte(line, col);
+            let (line, col) = end.indices();
+            let end = lines.line_column_to_byte(line, col);
+            Option::zip(start, end).map(|(a, b)| a..b)
        } else {
-            start.byte_pos(&text).map(|i| i..i)
+            let (line, col) = start.indices();
+            lines.line_column_to_byte(line, col).map(|i| i..i)
        };
        if range.is_none() {
            self.error("range is out of bounds");
@ -484,13 +492,13 @@ impl<'a> Parser<'a> {
        let line_idx = (line_idx_in_test + comments).checked_add_signed(line_delta)?;
        let column_idx = if column < 0 {
            // Negative column index is from the back.
-            let range = source.line_to_range(line_idx)?;
+            let range = source.lines().line_to_range(line_idx)?;
            text[range].chars().count().saturating_add_signed(column)
        } else {
            usize::try_from(column).ok()?.checked_sub(1)?
        };

-        source.line_column_to_byte(line_idx, column_idx)
+        source.lines().line_column_to_byte(line_idx, column_idx)
    }

    /// Parse a number.
--- a/tests/src/run.rs
+++ b/tests/src/run.rs
@ -7,11 +7,10 @@ use tiny_skia as sk;
 use typst::diag::{SourceDiagnostic, Warned};
 use typst::html::HtmlDocument;
 use typst::layout::{Abs, Frame, FrameItem, PagedDocument, Transform};
-use typst::loading::LineCol;
 use typst::visualize::Color;
 use typst::{Document, World, WorldExt};
 use typst_pdf::PdfOptions;
-use typst_syntax::FileId;
+use typst_syntax::{FileId, Lines};

 use crate::collect::{Attr, FileSize, NoteKind, Test};
 use crate::logger::TestResult;
@ -329,12 +328,12 @@ impl<'a> Runner<'a> {
    fn format_pos(&self, file: FileId, pos: usize) -> String {
        let res = if file != self.test.source.id() {
            let bytes = self.world.file(file).unwrap();
-            LineCol::from_byte_pos(pos, &bytes).map(|l| l.numbers())
+            let lines = Lines::from_bytes(&bytes).unwrap();
+            lines.byte_to_line_column(pos).map(|(line, col)| (line + 1, col + 1))
        } else {
-            let line = self.test.source.byte_to_line(pos).map(|l| l + 1);
-            let col = (self.test.source.byte_to_column(pos))
-                .map(|c| self.test.pos.line + c + 1);
-            Option::zip(line, col)
+            (self.test.source.lines())
+                .byte_to_line_column(pos)
+                .map(|(line, col)| (line + 1, col + 1))
        };
        let Some((line, col)) = res else {
            return "oob".into();